From 43cc8a1aea20da58c979bda6a0563220ff91c680 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Fri, 10 Nov 2023 10:15:02 +0100 Subject: [PATCH 01/22] evaluating new reference model Signed-off-by: Peter Staar --- deepsearch_glm/nlp_train_reference.py | 322 ++++++------------ pyproject.toml | 2 +- src/andromeda/nlp/ent/reference.h | 14 +- .../base_crf_model/structures/crf_model.cpp | 24 +- 4 files changed, 130 insertions(+), 232 deletions(-) diff --git a/deepsearch_glm/nlp_train_reference.py b/deepsearch_glm/nlp_train_reference.py index 3a5897df..2630bc6c 100644 --- a/deepsearch_glm/nlp_train_reference.py +++ b/deepsearch_glm/nlp_train_reference.py @@ -6,6 +6,7 @@ import time import json import glob +import tqdm import argparse import random @@ -17,17 +18,13 @@ import pandas as pd import matplotlib.pyplot as plt -#import fasttext import textColor as tc -#import deepsearch as ds -#from tabulate import tabulate - -#import andromeda_nlp +from tabulate import tabulate from deepsearch_glm.andromeda_nlp import nlp_model from deepsearch_glm.utils.ds_utils import convert_pdffiles -from deepsearch_glm.nlp_utils import create_nlp_dir +from deepsearch_glm.nlp_utils import create_nlp_dir, init_nlp_model def parse_arguments(): @@ -40,208 +37,96 @@ def parse_arguments(): 1. end-to-end example on pdf documents: - poetry run python ./deepsearch_glm/nlp_train_reference.py -m all --pdf './data/documents/articles/*.pdf' - + poetry run python ./deepsearch_glm/nlp_train_semantic.py -m all --input-dir ' --output-dir ' """, formatter_class=argparse.RawTextHelpFormatter) - parser.add_argument('-m', '--mode', required=False, default="all", - help="parse: [convert,extract,annotate,classify,pure-classify,crf,pure-crf,all]") + parser.add_argument('-m', '--mode', required=True, default="all", + help="mode for training semantic model", + choices=["extract","annotate","train","all"]) - parser.add_argument('--pdf', required=True, + parser.add_argument('--input-dir', required=False, type=str, default=None, - help="filename(s) of pdf document") - - parser.add_argument('--json', required=False, - type=str, default=None, - help="filename(s) of json document") + help="input directory with documents") parser.add_argument('--output-dir', required=False, - type=str, default=create_nlp_dir(), - help="output root directory for trained models") - - """ - parser.add_argument('', '--source-directory', required=False, default="./data/documents/articles", - help="directory with pdfs") - parser.add_argument('-t', '--target-directory', required=False, default="./data/models/", - help="directory for target files") - """ + type=str, default="./reference-models", + help="output directory for trained models") args = parser.parse_args() - pdf = args.pdf - json = args.json + idir = args.input_dir + + if args.output_dir==None: + odir = create_nlp_dir() + + elif not os.path.exists(args.output_dir): + os.mkdir(args.output_dir) + odir = args.output_dir - if pdf==None and json==None: - exit(-1) - - if pdf!=None: - pdf_files=sorted(glob.glob(pdf)) else: - pdf_files=[] + odir = args.output_dir - if json!=None: - json_files=sorted(glob.glob(json)) - else: - json_files=[] - - if not os.path.exists(args.output_dir): - os.mkdir(args.output_dir) - - return args.mode, pdf_files, json_files, args.output_dir + return args.mode, args.input_dir, odir def shorten_text(text): ntext = text.replace("\n", "") return ntext.strip() - -def extract_references(filenames, sfile, rfile): - print(f"extract references for filenames: ", len(filenames)) - - config = { - "mode" : "apply", - "order" : True, - "models": "numval,link" - } - - #model = andromeda_nlp.nlp_model() - model = nlp_model() - model.initialise(config) - - MINLEN = 5 +def extract_references(filenames, ofile): - fws = open(sfile, "w") - fwr = open(rfile, "w") - - for filename in filenames: + nlp_model = init_nlp_model("semantic") - if filename.endswith("references.json"): - continue - - print(f"reading {filename}") + fw = open(ofile, "w") + + total=0 + for filename in tqdm.tqdm(filenames): + #print(f"reading {filename}") try: with open(filename, "r") as fr: - data = json.load(fr) + idoc = json.load(fr) except: continue - - with open(filename, "w") as fw: - fw.write(json.dumps(data, indent=2)) - is_ref=False - cnt_ref=0 + odoc = nlp_model.apply_on_doc(idoc) - for item in data["main-text"]: - - if "text" in item: - text = item["text"].strip() - else: - continue + for item in odoc["texts"]: - if "type" in item: - label = item["type"] - else: + if "properties" not in item: continue - - label = (label.split("-"))[0] - - content = text.lower().strip().replace(" ", "") - if content.endswith("references"): - is_ref = True - elif is_ref and label=="subtitle": - is_ref = False - - if is_ref and (not content.endswith("references")) and len(text)>=MINLEN: - label = "reference" - elif re.match("^(\d+|\[\d+\])(.*)\((19|20)\d{2}\)\.?$", text): - label = "reference" - elif re.match("^(\[\d+\])(\s+[A-Z]\.)+.*", text): - label = "reference" - elif re.match("^(\[\])(.*)\((19|20)\d{2}\)\.?$", text): - label = "reference" - elif re.match("^(Table|Figure)(\s+\d+(\.\d+)?)(.*)", text): - label = "caption" - elif len(text.strip())0: - print(tc.green(f"{filename}: {cnt_ref}")) - else: - print(tc.yellow(f"{filename}: {cnt_ref}")) + training_sample = False - fws.close() - fwr.close() + item = {"training-sample": training_sample, "text": item["text"]} + fw.write(json.dumps(item)+"\n") - print(f"semantic-classification dumped in {sfile}") - print(f"references dumped in {rfile}") + fw.close() + + print("#-items: ", total) -def parse_with_anystyle_api(tlines): +def parse_with_anystyle_api(refs): time.sleep(1) tmpfile = "tmp.json" payload = { "input": [] } - for tline in tlines: - payload["input"].append(tline[1]) + for ref in refs: + payload["input"].append(ref["text"]) anystyle_token = '9fEhg+39p0J60Bs+WTTwTMcqqTFAUYoyjLlp8nEys4wnfgACn0IoqravX8Exsx/+2q1p4sU7636DR22xUeneLg==' anystyle_session = '9GFKMlFoJwbMV6W1Z37YFsG9nbXLqmGicXVzL4r5mn4SqTLcf0revMMFvAjfxcjqR8YBnj2M0fgTWBW12kK1KMFcOgZvZnwQv5lZZ3PQgPP9sait9WgoDR72BHqRpbPe0c1B6%2BNFtYE7aqpugLsTupqBuj%2B%2Fef0tbyd84wC61GkVA9Vtz2nSNC90hDliCre%2BZ2gQUc6runu6yt1M4xa0F8kM4Cxt2pN92XB8hRusqGNfsaCsw5JKdU%2FcDFtdh%2BYDSEBz6DjQFfJq81%2FTI%2F4ulku7mlv73vOC7ew%3D--o%2B2gjgNJqgCjYf4V--3mSN%2FKmNt68WTJsxBh9Bww%3D%3D' @@ -281,9 +166,12 @@ def parse_with_anystyle_api(tlines): return [] -def update_references(refs, tlines): +def update_references(refs): - results = parse_with_anystyle_api(tlines) + results = parse_with_anystyle_api(refs) + + if len(results)!=len(refs): + return for j,item in enumerate(results): @@ -292,7 +180,8 @@ def update_references(refs, tlines): parts.append(row[1]) text = " ".join(parts) - if text!=tlines[j][1]: + if text!=refs[j]["text"]: + print("WARNING: mismatch text") continue beg=0 @@ -305,11 +194,9 @@ def update_references(refs, tlines): beg += charlen beg += 1 - ind = tlines[j][0] - - refs[ind]["word-tokens"]["headers"].append("true-label") + refs[j]["word-tokens"]["headers"].append("true-label") - for ri,row_i in enumerate(refs[ind]["word-tokens"]["data"]): + for ri,row_i in enumerate(refs[j]["word-tokens"]["data"]): label="__undef__" for rj,row_j in enumerate(item): @@ -317,23 +204,28 @@ def update_references(refs, tlines): label = row_j[0] break - refs[ind]["word-tokens"]["data"][ri].append(label) + refs[j]["word-tokens"]["data"][ri].append(label) """ - print(tabulate(refs[ind]["word-tokens"]["data"], - headers=refs[ind]["word-tokens"]["headers"])) + print("\n\n", tabulate(refs[j]["word-tokens"]["data"], + headers=refs[j]["word-tokens"]["headers"])) """ - refs[ind]["annotated"]=True - - tlines=[] + refs[j]["annotated"]=True def annotate(rfile, ofile): + nlp_model = init_nlp_model("semantic", filters=["properties", "word-tokens"]) + + num_lines = sum(1 for _ in open(rfile)) + refs=[] fr = open(rfile, "r") + fw = open(ofile, "w") + cnt = 0 + while True: line = fr.readline().strip() @@ -342,37 +234,30 @@ def annotate(rfile, ofile): try: item = json.loads(line) - refs.append(item) + ref = nlp_model.apply_on_text(item["text"]) + + ref["training-sample"] = item["training-sample"] + + refs.append(ref) + cnt += 1 except: continue - fr.close() + if len(refs)>=16: - print("#-refs: ", len(refs)) - - tlines=[] - for ind,ref in enumerate(refs): + print(f"\rreferennce-annotation: {cnt}/{num_lines}", end="") + update_references(refs) - print(f"\rreferennce-annotation: {ind}/{len(refs)}", end="") - - refs[ind]["annotated"]=False - tlines.append([ind, ref["text"]]) - - if len(tlines)>0 and len(tlines)%16==0: - update_references(refs, tlines) - tlines=[] + for ref in refs: + if "annotated" in ref and ref["annotated"]: + fw.write(json.dumps(ref)+"\n") - print(" --> done") - - if len(tlines)>0: - update_references(refs, tlines) - - fw = open(ofile, "w") + refs=[] - for ref in refs: - if "annotated" in ref and ref["annotated"]: - fw.write(json.dumps(ref)+"\n") + + print(" --> done") + fr.close() fw.close() print(f"writing annotation to {ofile}") @@ -566,31 +451,36 @@ def train_fst(train_file, model_file, metrics_file): model.train(config) -if __name__ == '__main__': +def create_reference_model(mode, idir, odir): - mode, pdf_files, json_files, tdir = parse_arguments() + json_files = glob.glob(os.path.join(idir, "*.json")) + print("#-docs: ", len(json_files)) - if len(pdf_files)>0: - new_json_files = convert_pdffiles(pdf_files, force=False) - - for _ in new_json_files: - json_files.append(_) + sfile = os.path.join(odir, "nlp-references.data.jsonl") + afile = os.path.join(odir, "nlp-references.annot.jsonl") - json_files = sorted(list(set(json_files))) + crf_model_file = os.path.join(odir, "crf_reference") + crf_metrics_file = crf_model_file+".metrics.txt" - sfile = os.path.join(tdir, "nlp-train-semantic-classification.annot.jsonl") + """ + rfile = os.path.join(tdir, "nlp-train-references-crf.jsonl") - afile = os.path.join(tdir, "nlp-train-references-crf.annot.jsonl") + - crf_model_file = os.path.join(tdir, "crf_reference") - fst_model_file = os.path.join(tdir, "fst_sematic") + fst_model_file = os.path.join(tdir, "fst_sematic") + """ + if mode=="extract" or mode=="all": - extract_references(json_files, sfile, rfile) + extract_references(json_files, sfile) if mode=="annotate" or mode=="all": - annotate(rfile, afile) + annotate(sfile, afile) + + if mode=="train" or mode=="all": + train_crf(afile, crf_model_file, crf_metrics_file) + """ if "classify" in mode or mode=="all": if mode=="classify" or mode==all: @@ -604,4 +494,10 @@ def train_fst(train_file, model_file, metrics_file): prepare_for_crf(afile) train_crf(afile, crf_model_file, crf_model_file+".metrics.txt") - + """ + +if __name__ == '__main__': + + mode, idir, odir = parse_arguments() + + create_reference_model(mode, idir, odir) diff --git a/pyproject.toml b/pyproject.toml index 944bcac6..037eca96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "deepsearch-glm" -version = "0.6.1" +version = "0.6.3" description = "Graph Language Models" authors = ["Peter Staar "] license = "MIT" diff --git a/src/andromeda/nlp/ent/reference.h b/src/andromeda/nlp/ent/reference.h index ae4fdae9..79540fa9 100644 --- a/src/andromeda/nlp/ent/reference.h +++ b/src/andromeda/nlp/ent/reference.h @@ -97,8 +97,6 @@ namespace andromeda bool nlp_model::apply(subject& subj) { - //LOG_S(WARNING) << "reference parsing started ..."; - if(not satisfies_dependencies(subj)) { return false; @@ -107,11 +105,8 @@ namespace andromeda bool is_ref=false; for(auto& cls:subj.properties) { - //LOG_S(INFO) << cls.type << " -> " << (cls.type==to_key(SEMANTIC)); - //LOG_S(INFO) << cls.name << " -> " << (cls.name=="reference"); - - if(cls.get_type()==to_key(SEMANTIC) and - cls.get_name()=="reference") + if((cls.get_type()==to_key(SEMANTIC)) and + (cls.get_name()=="reference")) { is_ref = true; } @@ -120,7 +115,6 @@ namespace andromeda // text in subject is not a reference and we do not apply the reference parser if(not is_ref) { - //LOG_S(WARNING) << "is not a reference ..."; return true; } @@ -226,10 +220,10 @@ namespace andromeda std::set labels = { "citation-number", "author", "title", - "publisher", "editor", + //"publisher", "editor", "journal", "container-title", "location", "date", - "volume", "pages", + //"volume", "pages", "url", "doi"}; for(const auto& label:labels) diff --git a/src/andromeda/tooling/models/base_crf_model/structures/crf_model.cpp b/src/andromeda/tooling/models/base_crf_model/structures/crf_model.cpp index 28c37789..c08e0ee9 100644 --- a/src/andromeda/tooling/models/base_crf_model/structures/crf_model.cpp +++ b/src/andromeda/tooling/models/base_crf_model/structures/crf_model.cpp @@ -875,7 +875,7 @@ namespace andromeda_crf if (seq.vs.size() >= MAX_LEN) { LOG_S(ERROR) << "error: sequence is too long."; return; - //exit(1); + } if (seq.vs.size() == 0) { @@ -905,14 +905,22 @@ namespace andromeda_crf assert(s.label >= 0 && s.label < MAX_LABEL_TYPES); - for (std::vector::const_iterator j = i->features.begin(); j != i->features.end(); j++) { - if (contain_space(*j)) { - LOG_S(ERROR) << "error: the name of a feature must not contain any space."; - exit(1); - } - s.positive_features.push_back(_featurename_bag.Put(*j)); - } + for (std::vector::const_iterator j = i->features.begin(); j != i->features.end(); j++) + { + if(contain_space(*j)) + { + LOG_S(ERROR) << "error: the name of a feature (" << (*j) << ") must not contain any space."; + + std::string feat = *j; + feat = andromeda::utils::replace(feat, " ", "_"); + s.positive_features.push_back(_featurename_bag.Put(feat)); + } + else + { + s.positive_features.push_back(_featurename_bag.Put(*j)); + } + } s1.vs.push_back(s); } From 8972c76e13fe0d035ab3a4a08e2c651f5cd11bd8 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Sun, 12 Nov 2023 06:31:21 +0100 Subject: [PATCH 02/22] updating the trainers Signed-off-by: Peter Staar --- deepsearch_glm/nlp_train_semantic.py | 299 +++++++++++++++------------ src/andromeda/nlp/ent/link.h | 6 + 2 files changed, 171 insertions(+), 134 deletions(-) diff --git a/deepsearch_glm/nlp_train_semantic.py b/deepsearch_glm/nlp_train_semantic.py index 69ca3fdb..9d8959a0 100644 --- a/deepsearch_glm/nlp_train_semantic.py +++ b/deepsearch_glm/nlp_train_semantic.py @@ -43,7 +43,7 @@ def parse_arguments(): choices=["retrieve","prepare","process","train","eval","refine","all"]) parser.add_argument('--input-dir', required=False, - type=str, default=None, + type=str, default="./semantic-models/documents", help="input directory with documents") parser.add_argument('--output-dir', required=False, @@ -72,13 +72,28 @@ def retrieve_data_pubmed(sdir): os.mkdir(sdir) index="pubmed" - query="*" + query="description.publication_date:[2022-01-01 TO 2022-03-01]" odir = ds_index_query(index, query, tdir, sources=["_name", "file-info", "references", "description"], force=True, limit=1000) return odir +def retrieve_data_arxiv(sdir): + + tdir = os.path.join(sdir, "arxiv") + + if not os.path.exists(sdir): + os.mkdir(sdir) + + index="arxiv" + query="description.publication_date:[2022-01-01 TO 2022-03-01]" + + odir = ds_index_query(index, query, tdir, sources=["_name", "file-info", "description", "main-text"], + force=True, limit=50000) + + return odir + def retrieve_data(sdir, index): tdir = os.path.join(sdir, index) @@ -89,7 +104,7 @@ def retrieve_data(sdir, index): query="*" odir = ds_index_query(index, query, tdir, sources=["_name", "file-info", "description", "main-text"], - force=True, limit=1000) + force=True, limit=50000) return odir @@ -101,50 +116,153 @@ def get_data(): for item in doc["references"]: data.append({"label":"reference", "text":item["text"], "document-hash":dhash}) - if "description" in doc: - desc = doc["description"] + """ + +def prepare_data_from_legacy_documents(doc): + + if "file-info" in doc: + dhash = doc["file-info"]["document-hash"] + else: + dhash = -1 + + N = len(doc["main-text"]) + + title_ind=len(doc["main-text"]) + + abs_beg=len(doc["main-text"]) + intro_beg=len(doc["main-text"]) + + ref_beg=len(doc["main-text"]) + ref_end=len(doc["main-text"]) + + data=[] + for i,item in enumerate(doc["main-text"]): + + if "text" not in item: + continue + + label = item["type"].lower() + text = item["text"].lower().strip() + + if "title" == label and title_ind==N: + title_ind=i + + if ("title" in label) and ("abstract" in text) and abs_beg==N: + abs_beg=i - if "title" in desc: - #data.append({"label":"title", "text":desc["title"], "document-hash":dhash}) - data.append({"label":"text", "text":desc["title"], "document-hash":dhash}) + if (text.startswith("abstract")) and abs_beg==N: + abs_beg=i - if "abstract" in desc: - for item in desc["abstract"]: - data.append({"label":"text", "text":item, "document-hash":dhash}) - - affiliations=[] - if "affiliations" in desc: - for item in desc["affiliations"]: - affiliations.append(item["name"]) - #data.append({"label":"affiliation", "text":item["name"], "document-hash":dhash}) - data.append({"label":"meta-data", "text":item["name"], "document-hash":dhash}) - - authors=[] - if "authors" in desc: - for item in desc["authors"]: - authors.append(item["name"]) - #data.append({"label":"person_name", "text":item["name"], "document-hash":dhash}) - data.append({"label":"meta-data", "text":item["name"], "document-hash":dhash}) - - if len(authors)>1: - data.append({"label":"meta-data", "text": ", ".join(authors), "document-hash":dhash}) - data.append({"label":"meta-data", "text": "; ".join(authors), "document-hash":dhash}) - - if len(affiliations)>1: - data.append({"label":"meta-data", "text": ", ".join(affiliations), "document-hash":dhash}) - data.append({"label":"meta-data", "text": "; ".join(affiliations), "document-hash":dhash}) + if ("title" in label) and ("introduction" in text) and intro_beg==N: + intro_beg=i + + if ("title" in label) and ("references" in text) and ref_beg==N: + ref_beg=i + + #(("title" in label) or ("caption" in label)) and ("reference" not in text): + if (ref_end==N and ref_begref_beg and + (("title" in label)) and ("reference" not in text)): + ref_end=i + + if title_ind==N or abs_beg==N or ref_beg==N: + return data + + print(dhash) + for i,item in enumerate(doc["main-text"]): + + if "text" not in item: + continue + + type_ = item["type"] + label = item["type"] + text = item["text"] + + skip = ((len(text)<=1) or (len(text.split(" "))==1)) and ("title" not in label) and (len(text)<=5) + if skip: + #print(f"skipping: {text}") + continue + + if title_ind=1 and len(affiliations)>=1: + if random.random()<0.9: + training_sample = True + else: + training_sample = False + + data.append({"document-hash":dhash, "label":label, "text":item["text"], "training-sample": training_sample}) - for _ in authors: - for __ in affiliations: - data.append({"label":"meta-data", "text": " ".join([_, __]), "document-hash":dhash}) - """ + return data + +def prepare_data_from_description(doc): + + if "file-info" in doc: + dhash = doc["file-info"]["document-hash"] + else: + dhash = -1 + data=[] + + if "references" in doc: + for item in doc["references"]: + data.append({"label":"reference", "text":item["text"], "document-hash":dhash}) -def prepare_data(json_files, data_file): + if "description" in doc: + + desc = doc["description"] + if "title" in desc: + data.append({"label":"text", "text":desc["title"], "document-hash":dhash}) + + if "abstract" in desc: + for item in desc["abstract"]: + data.append({"label":"text", "text":item, "document-hash":dhash}) + + affiliations=[] + if "affiliations" in desc: + for item in desc["affiliations"]: + affiliations.append(item["name"]) + data.append({"label":"meta-data", "text":item["name"], "document-hash":dhash}) + + authors=[] + if "authors" in desc: + for item in desc["authors"]: + authors.append(item["name"]) + data.append({"label":"meta-data", "text":item["name"], "document-hash":dhash}) + + if len(authors)>1: + data.append({"label":"meta-data", "text": ", ".join(authors), "document-hash":dhash}) + + if len(affiliations)>1: + data.append({"label":"meta-data", "text": ", ".join(affiliations), "document-hash":dhash}) + + if len(authors)>=1 and len(affiliations)>=1: + for _ in authors: + for __ in affiliations: + data.append({"label":"meta-data", "text": " ".join([_, __]), "document-hash":dhash}) + + return data + +def prepare_data(json_files, data_file): + num_lines=0 fw = open(data_file, "w") @@ -159,92 +277,12 @@ def prepare_data(json_files, data_file): except: continue - if "file-info" in doc: - dhash = doc["file-info"]["document-hash"] - else: - dhash = -1 - if "main-text" in doc: - - N = len(doc["main-text"]) - - title_ind=len(doc["main-text"]) - - abs_beg=len(doc["main-text"]) - intro_beg=len(doc["main-text"]) - - ref_beg=len(doc["main-text"]) - ref_end=len(doc["main-text"]) + data = prepare_data_from_legacy_documents(doc) + #continue + else: + data = prepare_data_from_description(doc) - for i,item in enumerate(doc["main-text"]): - - if "text" not in item: - continue - - label = item["type"].lower() - text = item["text"].lower().strip() - - if "title" == label and title_ind==N: - title_ind=i - - if ("title" in label) and ("abstract" in text) and abs_beg==N: - abs_beg=i - - if ("title" in label) and ("introduction" in text) and intro_beg==N: - intro_beg=i - - if ("title" in label) and ("reference" in text) and ref_beg==N: - ref_beg=i - - if ref_end==N and ref_begref_beg and (("title" in label) or ("caption" in label)) and ("reference" not in text): - ref_end=i - - if title_ind==N or abs_beg==N or ref_beg==N: - continue - - for i,item in enumerate(doc["main-text"]): - - if "text" not in item: - continue - - type_ = item["type"] - label = item["type"] - text = item["text"] - - skip = ((len(text)<=1) or (len(text.split(" "))==1)) and ("title" not in label) and (len(text)<=5) - if skip: - #print(f"skipping: {text}") - continue - - if title_indget_key(), "email", R"((?P(arXiv:(\d+).(\d+)(v\d*)? \[.+\] (\d+) [A-Za-z]+ \d+)))"); + exprs.push_back(expr); + } + return true; } From 13dc22e67f6187e01a0d7ea55a32cc8d83765d0e Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Tue, 14 Nov 2023 06:56:33 +0100 Subject: [PATCH 03/22] refactoring the properties in a document Signed-off-by: Peter Staar --- deepsearch_glm/nlp_apply_on_docs.py | 14 ++- src/andromeda/enums/structs.h | 5 - .../glm/model_cli/create/model_creator.h | 81 ++++++------- src/andromeda/nlp/cls/language.h | 88 +++++++++++++- src/andromeda/nlp/cls/semantic.h | 14 ++- src/andromeda/nlp/ent/expression.h | 44 ++++--- src/andromeda/nlp/ent/pos_pattern.h | 38 +----- src/andromeda/nlp/ent/quote.h | 4 +- src/andromeda/nlp/ent/reference.h | 4 +- src/andromeda/nlp/ent/sentence.h | 14 +-- src/andromeda/nlp/pos/lapos.h | 16 +-- src/andromeda/nlp/rel/abbreviation.h | 35 +++--- src/andromeda/tooling/models/base.h | 4 +- .../fasttext_supervised_model.h | 15 +-- .../tooling/structs/elements/text_element.h | 3 + .../tooling/structs/items/cls/base.h | 111 +++++++++++++----- .../tooling/structs/items/ent/instance.h | 36 +++++- .../tooling/structs/items/ent/tabulate.h | 14 ++- .../tooling/structs/items/rel/base.h | 8 +- .../tooling/structs/subjects/document.h | 26 ++-- src/andromeda/tooling/structs/subjects/text.h | 45 +++---- .../tooling/structs/tokens/word_token.h | 12 +- 22 files changed, 395 insertions(+), 236 deletions(-) diff --git a/deepsearch_glm/nlp_apply_on_docs.py b/deepsearch_glm/nlp_apply_on_docs.py index 10cd3609..99d2739d 100644 --- a/deepsearch_glm/nlp_apply_on_docs.py +++ b/deepsearch_glm/nlp_apply_on_docs.py @@ -7,8 +7,9 @@ import pandas as pd -from utils.ds_utils import convert_pdffiles, to_legacy_document_format +from tabulate import tabulate +from utils.ds_utils import convert_pdffiles, to_legacy_document_format from deepsearch_glm.andromeda_nlp import nlp_model def parse_arguments(): @@ -100,6 +101,14 @@ def init_nlp_model(models:str, filters:list[str]=[]): return model +def show_texts(doc_j): + + data=[] + for item in doc_j["texts"]: + data.append([item["hash"], item["text-hash"], item["text"][0:48]]) + + print(tabulate(data, headers=["hash", "text-hash", "text"])) + def show_doc(doc_j): """ @@ -125,6 +134,9 @@ def show_doc(doc_j): print(json.dumps(doc_j["tables"][0], indent=2)) """ + if "texts" in doc_j: + show_texts(doc_j) + if "properties" in doc_j: props = pd.DataFrame(doc_j["properties"]["data"], columns=doc_j["properties"]["headers"]) diff --git a/src/andromeda/enums/structs.h b/src/andromeda/enums/structs.h index 208b6d19..855c9752 100644 --- a/src/andromeda/enums/structs.h +++ b/src/andromeda/enums/structs.h @@ -6,18 +6,14 @@ namespace andromeda { enum subject_name { UNDEF, - //TEXT, PROMPT, - //PARAGRAPH, TABLE, FIGURE, TEXT, TABLE, FIGURE, DOCUMENT}; const static std::vector SUBJECT_NAMES = { UNDEF, - //TEXT, PROMPT, - //PARAGRAPH, TABLE, FIGURE, TEXT, TABLE, FIGURE, DOCUMENT }; @@ -30,7 +26,6 @@ namespace andromeda case PROMPT: return "PROMPT"; - //case PARAGRAPH: return "PARAGRAPH"; case TEXT: return "TEXT"; case TABLE: return "TABLE"; case FIGURE: return "FIGURE"; diff --git a/src/andromeda/glm/model_cli/create/model_creator.h b/src/andromeda/glm/model_cli/create/model_creator.h index b7a55cd0..ffc6428b 100644 --- a/src/andromeda/glm/model_cli/create/model_creator.h +++ b/src/andromeda/glm/model_cli/create/model_creator.h @@ -393,16 +393,16 @@ namespace andromeda for(auto itr=subj.insts_beg({i,j}); itr!=subj.insts_end({i,j}); itr++) { - assert(i==(itr->coor)[0]); - assert(j==(itr->coor)[1]); + assert(i==itr->get_coor(0)); + assert(j==itr->get_coor(1)); const base_instance& inst = *itr; //LOG_S(INFO) << "inst: " << inst.to_json().dump(); - auto rng = inst.wtok_range; + auto rng = inst.get_wtok_range(); - if(inst.model_type==andromeda::TERM and - inst.model_subtype=="single-term") + if(inst.is_model(TERM) and + inst.is_subtype("single-term")) { std::vector term_hashes={}; for(std::size_t i=rng[0]; i sent_rngs={}; for(auto& inst:instances) { - if(inst.model_type==andromeda::SENTENCE) + if(inst.is_model(SENTENCE)) { - auto rng = inst.wtok_range; + auto rng = inst.get_wtok_range(); sent_rngs.insert(rng); } } @@ -725,12 +726,12 @@ namespace andromeda { for(auto& inst:instances) { - if(inst.model_type==andromeda::TERM) + if(inst.is_model(TERM)) { nodes.get(beg_term_hash).incr_word_cnt();// += 1; nodes.get(end_term_hash).incr_word_cnt();// += 1; - auto rng = inst.wtok_range; + auto rng = inst.get_wtok_range(); edges.insert(edge_names::to_label, tok_hashes.at(rng[0] ), beg_term_hash, false); edges.insert(edge_names::to_label, tok_hashes.at(rng[1]-1), end_term_hash, false); @@ -741,12 +742,12 @@ namespace andromeda edges.insert(edge_names::tax_up, end_term_hash, tok_hashes.at(rng[1]-1), false); } - if(inst.model_type==andromeda::SENTENCE) + if(inst.is_model(SENTENCE)) { nodes.get(beg_sent_hash).incr_word_cnt();// += 1; nodes.get(end_sent_hash).incr_word_cnt();// += 1; - auto rng = inst.wtok_range; + auto rng = inst.get_wtok_range(); edges.insert(edge_names::to_label, tok_hashes.at(rng[0] ), beg_sent_hash, false); edges.insert(edge_names::to_label, tok_hashes.at(rng[1]-1), end_sent_hash, false); @@ -779,15 +780,15 @@ namespace andromeda { for(auto& inst:instances) { - if(inst.model_type==andromeda::EXPRESSION and - (inst.model_subtype=="name-concatenation" or - inst.model_subtype=="word-concatenation" or - inst.model_subtype=="latex-concatenation") and - inst.name.find("-")!=std::string::npos and - inst.name.find(" ")==std::string::npos and - (inst.wtok_range[1]-inst.wtok_range[0])==1) + if(inst.is_model(EXPRESSION) and + (inst.is_subtype("name-concatenation") or + inst.is_subtype("word-concatenation") or + inst.is_subtype("latex-concatenation")) and + inst.get_name().find("-")!=std::string::npos and + inst.get_name().find(" ")==std::string::npos and + (inst.get_wtok_range(1)-inst.get_wtok_range(0))==1) { - auto rng = inst.wtok_range; + auto rng = inst.get_wtok_range(); hash_type hash = tok_hashes.at(rng[0]); auto& node = nodes.get(hash); @@ -808,7 +809,7 @@ namespace andromeda base_node path(node_names::CONT, cont_hashes); nodes.insert(path, false); - rng_to_hash.emplace(inst.wtok_range, path.get_hash()); + rng_to_hash.emplace(inst.get_wtok_range(), path.get_hash()); for(std::size_t i=0; i hashes={}; for(std::size_t i=rng[0]; i term_hashes={}; for(std::size_t i=rng[0]; i verb_hashes={}; std::vector pos={}; @@ -994,7 +995,7 @@ namespace andromeda base_node path(node_names::VERB, verb_hashes); nodes.insert(path, false); - rng_to_verb.emplace(inst.wtok_range, path.get_hash()); + rng_to_verb.emplace(inst.get_wtok_range(), path.get_hash()); for(std::size_t i=0; i path_hashes={}; - auto rng = inst.wtok_range; + auto rng = inst.get_wtok_range(); for(index_type l=rng[0]; l::apply(subject& subj) { if(not satisfies_dependencies(subj)) @@ -141,10 +142,12 @@ namespace andromeda { this->apply(*para); - base_property prop("null", "null", 0.0); + base_property prop(para->get_hash(), TEXT, para->get_sref(), + "null", "null", 0.0); + if(get(*para, prop)) { - std::string key = prop.get_name(); + std::string key = prop.get_label(); std::size_t dst = para->dst; if(lang_mapping.count(key)==1) @@ -160,19 +163,20 @@ namespace andromeda } } - base_property prop(this->get_key(), "null", 0.0); + base_property prop(subj.get_hash(), DOCUMENT, "#", + this->get_key(), "null", 0.0); for(auto itr=lang_mapping.begin(); itr!=lang_mapping.end(); itr++) { double confidence = std::round(1000*(itr->second)/(0.0+total))/1000.0; if(itr==lang_mapping.begin()) { - prop.set_name(itr->first); + prop.set_label(itr->first); prop.set_conf(confidence); } else if(prop.get_conf()first); + prop.set_label(itr->first); prop.set_conf(confidence); } else @@ -183,6 +187,80 @@ namespace andromeda return update_applied_models(subj); } + */ + + bool nlp_model::apply(subject& subj) + { + if(not satisfies_dependencies(subj)) + { + return false; + } + + std::string text="", label="null"; + double conf=0.0; + + std::map lang_mapping; + std::size_t total=0; + + for(uint64_t ind=0; indget_len(); + total += para->get_len(); + } + else + { + lang_mapping[label] = para->get_len(); + total += para->get_len(); + } + } + + para->properties.emplace_back(para->get_hash(), TEXT, "#/texts/"+std::to_string(ind), + get_name(), label, conf); + para->applied_models.insert(get_key()); + + subj.properties.emplace_back(para->get_hash(), TEXT, "#/texts/"+std::to_string(ind), + get_name(), label, conf); + subj.applied_models.insert(get_key()); + } + + base_property prop(subj.get_hash(), DOCUMENT, "#", + get_name(), "null", 0.0); + for(auto itr=lang_mapping.begin(); itr!=lang_mapping.end(); itr++) + { + double confidence = std::round(1000*(itr->second)/(0.0+total))/1000.0; + + if(itr==lang_mapping.begin()) + { + prop.set_label(itr->first); + prop.set_conf(confidence); + } + else if(prop.get_conf()first); + prop.set_conf(confidence); + } + else + {} + } + subj.properties.push_back(prop); + + return update_applied_models(subj); + } } diff --git a/src/andromeda/nlp/cls/semantic.h b/src/andromeda/nlp/cls/semantic.h index cc49b758..84653474 100644 --- a/src/andromeda/nlp/cls/semantic.h +++ b/src/andromeda/nlp/cls/semantic.h @@ -290,7 +290,8 @@ namespace andromeda if(known_headers.count(text)) { - subj.properties.emplace_back(get_key(), "meta-data", 1.0); + subj.properties.emplace_back(subj.get_hash(), TEXT, "#", + get_name(), "meta-data", 1.0); return true; } else @@ -361,10 +362,15 @@ namespace andromeda label = "text"; } - std::string key = get_key(); + //std::string key = get_key(); - para->properties.emplace_back(key, label, conf); - para->applied_models.insert(key); + para->properties.emplace_back(para->get_hash(), TEXT, "#/texts/"+std::to_string(ind), + get_name(), label, conf); + para->applied_models.insert(get_key()); + + subj.properties.emplace_back(para->get_hash(), TEXT, "#/texts/"+std::to_string(ind), + get_name(), label, conf); + subj.applied_models.insert(get_key()); } return update_applied_models(subj); diff --git a/src/andromeda/nlp/ent/expression.h b/src/andromeda/nlp/ent/expression.h index c6a8f3da..0f2a3c21 100644 --- a/src/andromeda/nlp/ent/expression.h +++ b/src/andromeda/nlp/ent/expression.h @@ -359,13 +359,13 @@ namespace andromeda for(auto& ent:subj.instances) { - if(ent.model_type==EXPRESSION and ent.model_subtype=="common" and ent.wtoken_len()==1) + if(ent.is_model(EXPRESSION) and ent.is_subtype("common") and ent.wtoken_len()==1) { - subj.word_tokens.at(ent.wtok_range[0]).set_word(ent.name); + subj.word_tokens.at(ent.get_wtok_range(0)).set_word(ent.get_name()); } - else if(ent.model_type==EXPRESSION and ent.model_subtype=="apostrophe" and ent.wtoken_len()==1) + else if(ent.is_model(EXPRESSION) and ent.is_subtype("apostrophe") and ent.wtoken_len()==1) { - subj.word_tokens.at(ent.wtok_range[0]).set_word(ent.name); + subj.word_tokens.at(ent.get_wtok_range(0)).set_word(ent.get_name()); } else {} @@ -660,14 +660,12 @@ namespace andromeda for(auto& ent:subj.instances) { - if(ent.model_type==CITE) + if(ent.is_model(CITE)) { - utils::mask(text, ent.char_range); + utils::mask(text, ent.get_char_range()); } } - //std::size_t max_id = subj.get_max_ent_hash(); - // find all latex expressions bool found_new = true; while(found_new) @@ -716,9 +714,9 @@ namespace andromeda std::set forbidden_inds={}; for(auto& ent:subj.instances) { - if(ent.model_type==CITE) + if(ent.is_model(CITE)) { - for(std::size_t ind=ent.wtok_range[0]; ind words; - std::string text = ent.orig; + std::string text = ent.get_orig(); { while(true) @@ -938,14 +936,14 @@ namespace andromeda { for(auto itr_j=insts.begin(); itr_j!=insts.end(); itr_j++) { - auto cr_i = itr_i->char_range; - auto cr_j = itr_j->char_range; + auto cr_i = itr_i->get_char_range(); + auto cr_j = itr_j->get_char_range(); if(itr_i!=itr_j and - itr_i->model_type==EXPRESSION and - itr_j->model_type==EXPRESSION and + itr_i->is_model(EXPRESSION) and + itr_j->is_model(EXPRESSION) and cr_i==cr_j and - (itr_i->model_subtype)=="wtoken-concatenation") + itr_i->is_subtype("wtoken-concatenation")) { //LOG_S(INFO) << "removing: " << itr_i->orig << "; " << itr_i->name; @@ -953,8 +951,8 @@ namespace andromeda erasing=true; } if(itr_i!=itr_j and - itr_i->model_type==EXPRESSION and - itr_j->model_type==EXPRESSION and + itr_i->is_model(EXPRESSION) and + itr_j->is_model(EXPRESSION) and ((cr_j[0]<=cr_i[0] and cr_i[1]model_type==EXPRESSION and - (itr_j->model_type==NUMVAL or itr_j->model_type==NAME) and + else if(itr_i->is_model(EXPRESSION) and + (itr_j->is_model(NUMVAL) or itr_j->is_model(NAME)) and cr_i==cr_j) { //LOG_S(INFO) << "removing: " << itr_i->orig << "; " << itr_i->name; diff --git a/src/andromeda/nlp/ent/pos_pattern.h b/src/andromeda/nlp/ent/pos_pattern.h index 50c07e9d..44b47164 100644 --- a/src/andromeda/nlp/ent/pos_pattern.h +++ b/src/andromeda/nlp/ent/pos_pattern.h @@ -67,45 +67,19 @@ namespace andromeda { for(auto& ent_i:subj.instances) { - if((ent_i.model_type==PARENTHESIS and ent_i.model_subtype=="reference") or - (ent_i.model_type==LINK)) + if((ent_i.is_model(PARENTHESIS) and ent_i.is_subtype("reference")) or + (ent_i.is_model(LINK))) { - ranges_01.push_back(ent_i.char_range); + ranges_01.push_back(ent_i.get_char_range()); } - else if(ent_i.model_type==NAME or - ent_i.model_type==NUMVAL) + else if(ent_i.is_model(NAME) or + ent_i.is_model(NUMVAL)) { - ranges_02.push_back(ent_i.char_range); + ranges_02.push_back(ent_i.get_char_range()); } } } - /* - void base_pos_pattern::get_chunks(subject& subj, - std::vector& exprs, - std::vector& chunks) - { - chunks.clear(); - - std::stringstream ss; - for(std::size_t l=0; l& exprs, std::vector& chunks) diff --git a/src/andromeda/nlp/ent/quote.h b/src/andromeda/nlp/ent/quote.h index 15b44ff4..9a54bfcb 100644 --- a/src/andromeda/nlp/ent/quote.h +++ b/src/andromeda/nlp/ent/quote.h @@ -87,9 +87,9 @@ namespace andromeda std::string text = subj.text; for(auto& inst:subj.instances) { - if(dependencies.count(inst.model_type)==1) + if(dependencies.count(inst.get_model())==1) { - utils::mask(text, inst.char_range); + utils::mask(text, inst.get_char_range()); } } diff --git a/src/andromeda/nlp/ent/reference.h b/src/andromeda/nlp/ent/reference.h index 79540fa9..03950ab4 100644 --- a/src/andromeda/nlp/ent/reference.h +++ b/src/andromeda/nlp/ent/reference.h @@ -106,7 +106,7 @@ namespace andromeda for(auto& cls:subj.properties) { if((cls.get_type()==to_key(SEMANTIC)) and - (cls.get_name()=="reference")) + (cls.is_label("reference"))) { is_ref = true; } @@ -273,7 +273,7 @@ namespace andromeda auto itr=subj.instances.begin(); while(itr!=subj.instances.end()) { - if(itr->model_type!=REFERENCE) + if(not (itr->is_model(REFERENCE))) { itr = subj.instances.erase(itr); } diff --git a/src/andromeda/nlp/ent/sentence.h b/src/andromeda/nlp/ent/sentence.h index 3d721ab0..be861cd3 100644 --- a/src/andromeda/nlp/ent/sentence.h +++ b/src/andromeda/nlp/ent/sentence.h @@ -67,15 +67,15 @@ namespace andromeda for(auto& ent:subj.instances) { - if(dependencies.count(ent.model_type)==1) + if(dependencies.count(ent.get_model())==1) { - if(ent.model_type==NAME or - ent.model_type==EXPRESSION or - ent.model_type==QUOTE) + if(ent.is_model(NAME) or + ent.is_model(EXPRESSION) or + ent.is_model(QUOTE)) { - for(std::size_t i=ent.char_range[0]; i& subj); - void pre_process(std::vector& wtokens, - range_type& rng, + void pre_process(const std::vector& wtokens, + const range_type rng, std::vector& pos_tokens, std::map& ptid_to_wtid); @@ -134,9 +134,9 @@ namespace andromeda for(auto& prop:subj.properties) { if(prop.get_type()==to_key(LANGUAGE) and - pos_models.count(prop.get_name())==1) + pos_models.count(prop.get_label())==1) { - lang = prop.get_name(); + lang = prop.get_label(); dyn_dependency=true; } } @@ -177,12 +177,12 @@ namespace andromeda // iterate over the sentences ... for(auto& inst:instances) { - if(inst.model_type!=SENTENCE) + if(inst.is_model(SENTENCE)) { continue; } - pre_process(wtokens, inst.wtok_range, pos_tokens, ptid_to_wtid); + pre_process(wtokens, inst.get_wtok_range(), pos_tokens, ptid_to_wtid); pos_model->predict(pos_tokens); @@ -235,8 +235,8 @@ namespace andromeda } - void nlp_model::pre_process(std::vector& wtokens, - range_type& rng, + void nlp_model::pre_process(const std::vector& wtokens, + const range_type rng, std::vector& pos_tokens, std::map& ptid_to_wtid) { diff --git a/src/andromeda/nlp/rel/abbreviation.h b/src/andromeda/nlp/rel/abbreviation.h index 5a082843..378243f9 100644 --- a/src/andromeda/nlp/rel/abbreviation.h +++ b/src/andromeda/nlp/rel/abbreviation.h @@ -81,22 +81,25 @@ namespace andromeda for(auto& ent_j:subj.instances) { - auto& crng = ent_j.char_range; + auto crng = ent_j.get_char_range(); - auto& ctok_rng = ent_j.ctok_range; - auto& wtok_rng = ent_j.wtok_range; + auto ctok_rng = ent_j.get_ctok_range(); + auto wtok_rng = ent_j.get_wtok_range(); - if(ent_j.model_type==TERM and + auto name = ent_j.get_name(); + auto orig = ent_j.get_orig(); + + if(ent_j.is_model(TERM) and 0apply(*text_ptr); diff --git a/src/andromeda/tooling/models/base_fst_model/fasttext_supervised_model.h b/src/andromeda/tooling/models/base_fst_model/fasttext_supervised_model.h index 686009da..297667c2 100644 --- a/src/andromeda/tooling/models/base_fst_model/fasttext_supervised_model.h +++ b/src/andromeda/tooling/models/base_fst_model/fasttext_supervised_model.h @@ -790,10 +790,10 @@ namespace andromeda if(preprocess(subj, text) and classify(text, label, conf)) { - std::string key = get_key(); + //std::string key = get_key(); - subj.properties.emplace_back(key, label, conf); - subj.applied_models.insert(key); + subj.properties.emplace_back(subj.get_hash(), TEXT, "#", get_name(), label, conf); + subj.applied_models.insert(get_key()); } return update_applied_models(subj); @@ -813,13 +813,10 @@ namespace andromeda if(preprocess(subj, text) and classify(text, label, conf)) { - std::string key = get_key(); + //std::string key = get_key(); - subj.properties.emplace_back(key, label, conf); - subj.applied_models.insert(key); - - //LOG_S(INFO) << "text: " << text; - //LOG_S(INFO) << key << " (" << label << "): " << conf; + subj.properties.emplace_back(subj.get_hash(), TABLE, "#", get_name(), label, conf); + subj.applied_models.insert(get_key()); } return update_applied_models(subj); diff --git a/src/andromeda/tooling/structs/elements/text_element.h b/src/andromeda/tooling/structs/elements/text_element.h index f4d8f748..b04f22f2 100644 --- a/src/andromeda/tooling/structs/elements/text_element.h +++ b/src/andromeda/tooling/structs/elements/text_element.h @@ -24,6 +24,9 @@ namespace andromeda text_element(); bool is_valid(); + + std::size_t get_len() const { return len; } // number-of-chars + std::size_t get_dst() const { return dst; } // number-of-utf8-tokens void clear(); diff --git a/src/andromeda/tooling/structs/items/cls/base.h b/src/andromeda/tooling/structs/items/cls/base.h index 813b7453..c0a7874f 100644 --- a/src/andromeda/tooling/structs/items/cls/base.h +++ b/src/andromeda/tooling/structs/items/cls/base.h @@ -10,23 +10,41 @@ namespace andromeda public: const static inline std::string UNDEF = "__undef__"; - const static inline std::vector HEADERS = { "type", "label", "confidence"}; + + const static inline std::vector HEADERS + = { "type", + "subj_hash", "subj_name", "subj_path", + "label", "confidence"}; public: base_property(); - base_property(std::string type, - std::string name, - val_type conf); + base_property(hash_type subj_hash, // hash of the subject from which the entity comes + subject_name subj_name, + std::string subj_path, + //std::string type, + model_name model, + std::string label, + val_type conf); + + hash_type get_subj_hash() const { return subj_hash; } + subject_name get_subj_name() const { return subj_name; } + std::string get_subj_path() const { return subj_path; } - std::string get_type() { return this->type; } - std::string get_name() { return this->name; } + bool is_type(const std::string name) const { return (name==to_string(model)); } + bool is_label(const std::string label) const { return (label==this->label); } - float get_conf() { return this->conf; } + bool is_model(const model_name name) const { return (name==model); } + + model_name get_model() const { return this->model; } + std::string get_type() const { return to_string(this->model); } + + std::string get_label() const { return this->label; } + float get_conf() const { return this->conf; } - void set_name(const std::string& name) { this->name = name; } - void set_conf(const float& conf) { this->conf = conf; } + void set_label(const std::string label) { this->label=label; } + void set_conf(const float conf) { this->conf = conf; } std::vector to_row(); @@ -39,28 +57,46 @@ namespace andromeda private: - std::string type; - std::string name; - val_type conf; + hash_type subj_hash; // hash of the subject from which the entity comes + subject_name subj_name; + std::string subj_path; + + model_name model; + std::string label; + val_type conf; }; - + base_property::base_property(): - type(UNDEF), - name(UNDEF), + subj_hash(-1), + subj_name(TEXT), + subj_path("#"), + + model(NULL_MODEL), + label("UNDEF"), conf(0.0) {} + + base_property::base_property(hash_type subj_hash, + subject_name subj_name, + std::string subj_path, + model_name model, + std::string label, + val_type conf): + subj_hash(subj_hash), + subj_name(subj_name), + subj_path(subj_path), - base_property::base_property(std::string type, - std::string name, - val_type conf): - type(type), - name(name), - conf(conf) + model(model), + label(label), + conf(conf) {} - + std::vector base_property::to_row() { - std::vector row = { type, name, std::to_string(conf) }; + //std::vector row = { type, name, std::to_string(conf) }; + std::vector row = { to_string(model), + std::to_string(subj_hash), to_string(subj_name), subj_path, + label, std::to_string(utils::round_conf(conf)) }; assert(row.size()==HEADERS.size()); return row; @@ -70,8 +106,13 @@ namespace andromeda { nlohmann::json result = nlohmann::json::object(); { - result["type"] = type; - result["name"] = name; + result["type"] = to_string(model); + + result["subj_hash"] = subj_hash; + result["subj_name"] = to_string(subj_name); + result["subj_path"] = subj_path; + + result["label"] = label; result["confidence"] = utils::round_conf(conf); } @@ -80,7 +121,10 @@ namespace andromeda nlohmann::json base_property::to_json_row() { - nlohmann::json row = nlohmann::json::array({ type, name, utils::round_conf(conf)}); + nlohmann::json row = nlohmann::json::array({ + to_string(model), + subj_hash, to_string(subj_name), subj_path, + label, utils::round_conf(conf)}); assert(row.size()==HEADERS.size()); return row; @@ -90,9 +134,14 @@ namespace andromeda { if(row.size()>=HEADERS.size()) { - type = row[0].get(); - name = row[1].get(); - conf = row[2].get(); + model = to_modelname(row[0].get()); + + subj_hash = row[1].get(); + subj_name = to_subject_name(row[2].get()); + subj_path = row[3].get(); + + label = row[4].get(); + conf = row[5].get(); return true; } @@ -102,13 +151,13 @@ namespace andromeda bool operator<(const base_property& lhs, const base_property& rhs) { - if(lhs.type==rhs.type) + if(lhs.model==rhs.model) { return lhs.conf>rhs.conf; } else { - return (lhs.typerhs.char_range[1]; } return lhs.char_range[0]rhs.get_char_range(1); + } + return lhs.get_char_range(0) header = base_instance::short_text_headers(); diff --git a/src/andromeda/tooling/structs/items/rel/base.h b/src/andromeda/tooling/structs/items/rel/base.h index c2ad16ac..b08ce182 100644 --- a/src/andromeda/tooling/structs/items/rel/base.h +++ b/src/andromeda/tooling/structs/items/rel/base.h @@ -127,14 +127,14 @@ namespace andromeda flvr(to_flvr(name)), conf(conf), - hash_i(inst_i.ehash), + hash_i(inst_i.get_ehash()), //ihash_i(inst_i.ihash), - hash_j(inst_j.ehash), + hash_j(inst_j.get_ehash()), //ihash_j(inst_j.ihash), - name_i(inst_i.name), - name_j(inst_j.name) + name_i(inst_i.get_name()), + name_j(inst_j.get_name()) {} nlohmann::json base_relation::to_json_row() diff --git a/src/andromeda/tooling/structs/subjects/document.h b/src/andromeda/tooling/structs/subjects/document.h index 0197261f..7a442ab5 100644 --- a/src/andromeda/tooling/structs/subjects/document.h +++ b/src/andromeda/tooling/structs/subjects/document.h @@ -174,9 +174,9 @@ namespace andromeda auto& desc = result.at("description"); for(auto& prop:properties) { - if(prop.get_type()=="language") + if(prop.is_type("language")) { - std::vector langs = {prop.get_name()}; + std::vector langs = { prop.get_label() }; desc["languages"] = langs; } } @@ -453,15 +453,19 @@ namespace andromeda bool subject::finalise_properties() { - std::map property_total; - std::map, val_type> property_label_mapping; + /* + std::map property_total; + std::map, val_type> property_label_mapping; for(auto& text:texts) { for(auto& prop:text->properties) { - std::string mdl = prop.get_type(); - std::string lbl = prop.get_name(); + properties.push_back(prop); + + //std::string mdl = prop.get_type(); + model_name mdl = prop.get_model(); + std::string lbl = prop.get_label(); val_type conf = prop.get_conf(); val_type dst = text->dst; @@ -475,7 +479,7 @@ namespace andromeda property_total[mdl] = dst; } - std::pair key={mdl,lbl}; + std::pair key={mdl,lbl}; if(property_label_mapping.count(key)==1) { property_label_mapping[key] += dst*conf; @@ -490,10 +494,11 @@ namespace andromeda properties.clear(); for(auto itr=property_label_mapping.begin(); itr!=property_label_mapping.end(); itr++) { - std::string mdl = (itr->first).first; + model_name mdl = (itr->first).first; itr->second /= (property_total.at(mdl)); - base_property prop((itr->first).first, (itr->first).second, itr->second); + base_property prop(this->get_hash(), TEXT, "#/texts", + (itr->first).first, (itr->first).second, itr->second); properties.push_back(prop); } @@ -521,7 +526,8 @@ namespace andromeda itr++; } } - + */ + return true; } diff --git a/src/andromeda/tooling/structs/subjects/text.h b/src/andromeda/tooling/structs/subjects/text.h index 3eb4009d..eaf0fb48 100644 --- a/src/andromeda/tooling/structs/subjects/text.h +++ b/src/andromeda/tooling/structs/subjects/text.h @@ -223,13 +223,14 @@ namespace andromeda return std::upper_bound(instances.begin(), instances.end(), fake); } - bool subject::get_property_label(const std::string name, std::string& label) + bool subject::get_property_label(const std::string model_name, std::string& label) { for(auto& prop:properties) { - if(name==prop.get_type()) + //if(name==prop.get_type()) + if(prop.is_type(model_name)) { - label = prop.get_name(); + label = prop.get_label(); return true; } } @@ -248,25 +249,25 @@ namespace andromeda for(auto& inst:instances) { - inst.ctok_range = text_element::get_char_token_range(inst.char_range); - inst.wtok_range = text_element::get_word_token_range(inst.char_range); + inst.set_ctok_range(text_element::get_char_token_range(inst.get_char_range())); + inst.set_wtok_range(text_element::get_word_token_range(inst.get_char_range())); inst.verify_wtok_range_match(word_tokens); } } - void subject::contract_wtokens_from_instances(model_name name) + void subject::contract_wtokens_from_instances(model_name model) { std::vector candidates={}; for(auto& inst:instances) { - if(inst.model_type==name and - inst.wtok_range[0] get_tags() const { return tags; } // tags void set_word(std::string word); @@ -93,7 +93,7 @@ namespace andromeda void set_tag(std::string tag); void set_known(bool known); - bool has_tag(std::string tag); + bool has_tag(std::string tag) const; bool is_known(); word_token get_word_token(); @@ -252,7 +252,7 @@ namespace andromeda this->tags.insert(tag); } - bool word_token::has_tag(std::string tag) + bool word_token::has_tag(std::string tag) const { return ((this->tags.count(tag))>0); } From 24212309937b2e8c7505c914267387ae98a6bd63 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Wed, 15 Nov 2023 06:38:25 +0100 Subject: [PATCH 04/22] updated the tests Signed-off-by: Peter Staar --- src/andromeda/nlp/ent/pos_pattern/term.h | 1 + src/andromeda/nlp/pos/lapos.h | 66 ++++++++++++++++++- .../tooling/structs/items/cls/base.h | 10 +-- .../tooling/structs/items/ent/instance.h | 3 +- tests/data/docs/1806.02284.nlp.json | 2 +- tests/data/texts/references.nlp.jsonl | 4 +- tests/data/texts/semantics.nlp.jsonl | 14 ++-- tests/data/texts/terms.nlp.jsonl | 4 +- tests/data/texts/test_02A_text_01.jsonl | 2 +- tests/data/texts/test_02B_text_01.jsonl | 2 +- tests/test_nlp.py | 16 +++-- 11 files changed, 96 insertions(+), 28 deletions(-) diff --git a/src/andromeda/nlp/ent/pos_pattern/term.h b/src/andromeda/nlp/ent/pos_pattern/term.h index 47a8e495..4e252a39 100644 --- a/src/andromeda/nlp/ent/pos_pattern/term.h +++ b/src/andromeda/nlp/ent/pos_pattern/term.h @@ -124,6 +124,7 @@ namespace andromeda { if(not satisfies_dependencies(subj, text_dependencies)) { + //LOG_S(WARNING) << "skipping term ..."; return false; } diff --git a/src/andromeda/nlp/pos/lapos.h b/src/andromeda/nlp/pos/lapos.h index 0c4a85a5..67bf46e6 100644 --- a/src/andromeda/nlp/pos/lapos.h +++ b/src/andromeda/nlp/pos/lapos.h @@ -126,7 +126,9 @@ namespace andromeda template bool nlp_model::check_dependency(const std::set& deps, subject_type& subj, std::string& lang) - { + { + //LOG_S(INFO) << __FUNCTION__; + bool static_dependency = satisfies_dependencies(subj, deps); bool dyn_dependency=false; @@ -155,6 +157,7 @@ namespace andromeda std::string lang="null"; if(not check_dependency(text_dependencies, subj, lang)) { + //LOG_S(WARNING) << "skipping POS ..."; return false; } @@ -174,11 +177,17 @@ namespace andromeda auto& wtokens = subj.word_tokens; auto& instances = subj.instances; + /* // iterate over the sentences ... for(auto& inst:instances) { - if(inst.is_model(SENTENCE)) + //LOG_S(INFO) << "inst: " << to_key(inst.get_model()) + //<< "\t" << SENTENCE << "\t" << inst.get_model() + //<< "\t" << inst.is_model(SENTENCE); + + if(not inst.is_model(SENTENCE)) { + //LOG_S(WARNING) << " --> skipping inst ..."; continue; } @@ -188,6 +197,59 @@ namespace andromeda post_process(wtokens, pos_tokens, ptid_to_wtid); } + */ + + std::vector sent_ranges={}; + for(auto& inst:instances) + { + if(inst.is_model(SENTENCE)) + { + sent_ranges.push_back(inst.get_wtok_range()); + + //LOG_S(INFO) << "sentence: " + //<< sent_ranges.back().at(0) << ", " + //<< sent_ranges.back().at(1); + } + } + + std::vector ranges={}; + for(auto& rng:sent_ranges) + { + if(ranges.size()==0 and rng.at(0)==0) + { + ranges.push_back(rng); + } + else if(ranges.size()==0 and rng.at(0)>0) + { + ranges.push_back({0, rng.at(0)}); + ranges.push_back(rng); + } + else if(ranges.back().at(1)==rng.at(0)) + { + ranges.push_back(rng); + } + else if(ranges.back().at(1)0 and ranges.back().at(1)predict(pos_tokens); + + post_process(wtokens, pos_tokens, ptid_to_wtid); + } } bool nlp_model::apply(subject& subj) diff --git a/src/andromeda/tooling/structs/items/cls/base.h b/src/andromeda/tooling/structs/items/cls/base.h index c0a7874f..a905c4f3 100644 --- a/src/andromeda/tooling/structs/items/cls/base.h +++ b/src/andromeda/tooling/structs/items/cls/base.h @@ -32,13 +32,13 @@ namespace andromeda subject_name get_subj_name() const { return subj_name; } std::string get_subj_path() const { return subj_path; } - bool is_type(const std::string name) const { return (name==to_string(model)); } + bool is_type(const std::string name) const { return (name==to_key(model)); } bool is_label(const std::string label) const { return (label==this->label); } bool is_model(const model_name name) const { return (name==model); } model_name get_model() const { return this->model; } - std::string get_type() const { return to_string(this->model); } + std::string get_type() const { return to_key(this->model); } std::string get_label() const { return this->label; } float get_conf() const { return this->conf; } @@ -94,7 +94,7 @@ namespace andromeda std::vector base_property::to_row() { //std::vector row = { type, name, std::to_string(conf) }; - std::vector row = { to_string(model), + std::vector row = { to_key(model), std::to_string(subj_hash), to_string(subj_name), subj_path, label, std::to_string(utils::round_conf(conf)) }; assert(row.size()==HEADERS.size()); @@ -106,7 +106,7 @@ namespace andromeda { nlohmann::json result = nlohmann::json::object(); { - result["type"] = to_string(model); + result["type"] = to_key(model); result["subj_hash"] = subj_hash; result["subj_name"] = to_string(subj_name); @@ -122,7 +122,7 @@ namespace andromeda nlohmann::json base_property::to_json_row() { nlohmann::json row = nlohmann::json::array({ - to_string(model), + to_key(model), subj_hash, to_string(subj_name), subj_path, label, utils::round_conf(conf)}); assert(row.size()==HEADERS.size()); diff --git a/src/andromeda/tooling/structs/items/ent/instance.h b/src/andromeda/tooling/structs/items/ent/instance.h index 340e0fe7..86a36e31 100644 --- a/src/andromeda/tooling/structs/items/ent/instance.h +++ b/src/andromeda/tooling/structs/items/ent/instance.h @@ -127,7 +127,8 @@ namespace andromeda model_name get_model() const { return model_type; } - std::string get_type() const { return to_string(model_type); } + //std::string get_type() const { return to_string(model_type); } + std::string get_type() const { return to_key(model_type); } std::string get_subtype() const { return model_subtype; } bool is_in(subject_name sn) const { return (sn==subj_name);} diff --git a/tests/data/docs/1806.02284.nlp.json b/tests/data/docs/1806.02284.nlp.json index be9a5517..1c65374d 100644 --- a/tests/data/docs/1806.02284.nlp.json +++ b/tests/data/docs/1806.02284.nlp.json @@ -1 +1 @@ -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "body": [{"$ref": "#/texts/0"}, {"$ref": "#/texts/1"}, {"$ref": "#/texts/2"}, {"$ref": "#/texts/3"}, {"$ref": "#/texts/4"}, {"$ref": "#/texts/5"}, {"$ref": "#/texts/6"}, {"$ref": "#/texts/7"}, {"$ref": "#/texts/8"}, {"$ref": "#/texts/9"}, {"$ref": "#/texts/10"}, {"$ref": "#/texts/11"}, {"$ref": "#/texts/12"}, {"$ref": "#/texts/13"}, {"$ref": "#/figures/0"}, {"$ref": "#/texts/14"}, {"$ref": "#/texts/15"}, {"$ref": "#/texts/16"}, {"$ref": "#/texts/17"}, {"$ref": "#/texts/18"}, {"$ref": "#/texts/19"}, {"$ref": "#/texts/20"}, {"$ref": "#/texts/21"}, {"$ref": "#/texts/22"}, {"$ref": "#/figures/1"}, {"$ref": "#/figures/2"}, {"$ref": "#/texts/23"}, {"$ref": "#/texts/24"}, {"$ref": "#/texts/25"}, {"$ref": "#/texts/26"}, {"$ref": "#/texts/27"}, {"$ref": "#/texts/28"}, {"$ref": "#/texts/29"}, {"$ref": "#/texts/30"}, {"$ref": "#/texts/31"}, {"$ref": "#/texts/32"}, {"$ref": "#/texts/33"}, {"$ref": "#/texts/34"}, {"$ref": "#/texts/35"}, {"$ref": "#/texts/36"}, {"$ref": "#/texts/37"}, {"$ref": "#/figures/3"}, {"$ref": "#/texts/38"}, {"$ref": "#/texts/39"}, {"$ref": "#/texts/40"}, {"$ref": "#/texts/41"}, {"$ref": "#/figures/4"}, {"$ref": "#/texts/42"}, {"$ref": "#/texts/43"}, {"$ref": "#/texts/44"}, {"$ref": "#/texts/45"}, {"$ref": "#/texts/46"}, {"$ref": "#/texts/47"}, {"$ref": "#/texts/48"}, {"$ref": "#/tables/0/captions/0"}, {"$ref": "#/tables/0"}, {"$ref": "#/texts/49"}, {"$ref": "#/texts/50"}, {"$ref": "#/texts/51"}, {"$ref": "#/texts/52"}, {"$ref": "#/texts/53"}, {"$ref": "#/tables/1"}, {"$ref": "#/texts/54"}, {"$ref": "#/texts/55"}, {"$ref": "#/texts/56"}, {"$ref": "#/texts/57"}, {"$ref": "#/tables/1/captions/0"}, {"$ref": "#/tables/2"}, {"$ref": "#/texts/58"}, {"$ref": "#/texts/59"}, {"$ref": "#/texts/60"}, {"$ref": "#/texts/61"}, {"$ref": "#/texts/62"}, {"$ref": "#/texts/63"}, {"$ref": "#/texts/64"}, {"$ref": "#/texts/65"}, {"$ref": "#/texts/66"}, {"$ref": "#/texts/67"}, {"$ref": "#/texts/68"}, {"$ref": "#/texts/69"}, {"$ref": "#/texts/70"}, {"$ref": "#/texts/71"}, {"$ref": "#/texts/72"}, {"$ref": "#/figures/5"}, {"$ref": "#/figures/5/captions/0"}, {"$ref": "#/texts/73"}, {"$ref": "#/texts/74"}, {"$ref": "#/texts/75"}, {"$ref": "#/texts/76"}, {"$ref": "#/figures/6"}, {"$ref": "#/texts/77"}, {"$ref": "#/texts/78"}, {"$ref": "#/texts/79"}, {"$ref": "#/texts/80"}, {"$ref": "#/figures/7"}, {"$ref": "#/texts/81"}, {"$ref": "#/texts/82"}, {"$ref": "#/texts/83"}, {"$ref": "#/texts/84"}, {"$ref": "#/texts/85"}, {"$ref": "#/texts/86"}, {"$ref": "#/texts/87"}, {"$ref": "#/texts/88"}, {"$ref": "#/texts/89"}, {"$ref": "#/texts/90"}, {"$ref": "#/texts/91"}, {"$ref": "#/texts/92"}, {"$ref": "#/texts/93"}, {"$ref": "#/texts/94"}, {"$ref": "#/texts/95"}, {"$ref": "#/texts/96"}, {"$ref": "#/texts/97"}, {"$ref": "#/texts/98"}, {"$ref": "#/texts/99"}, {"$ref": "#/texts/100"}, {"$ref": "#/texts/101"}, {"$ref": "#/texts/102"}, {"$ref": "#/texts/103"}, {"$ref": "#/texts/104"}, {"$ref": "#/texts/105"}, {"$ref": "#/texts/106"}, {"$ref": "#/texts/107"}, {"$ref": "#/texts/108"}, {"$ref": "#/texts/109"}, {"$ref": "#/texts/110"}], "description": {"languages": ["en"], "logs": [{"agent": "CCS", "comment": "CCS v0.0.0-dev parsing of documents", "date": "2023-05-06T03:50:43.616725+00:00", "type": "parsing"}], "title": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale."}, "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#", "figures": [{"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/14", "hash": 16535999405521191333, "orig": "Figure 1: A diagram of the conversion pipeline in the Corpus Conversion Service platform. It consists of 5 components: (1) Parsing of the document and its contained bitmap images, (2) Annotating the text of the parsed documents with layout semantic labels, (3) Training models based on the ground-truth acquired by the annotations, (4) Applying machine learned models on the parsed documents to determine the layout semantic label of each cell and finally (5) Assembling the document into a structured data format (e. g. JSON). The main conversion pipeline is depicted in blue and allows you to process and convert documents at scale into a structured data format. The green and orange sections can be used optionally, in order to process scanned documents (green) or train new models based on human annotation (orange).", "prov": [{"$ref": "#/page-elements/21"}], "text": "Figure 1: A diagram of the conversion pipeline in the Corpus Conversion Service platform. It consists of 5 components: (1) Parsing of the document and its contained bitmap images, (2) Annotating the text of the parsed documents with layout semantic labels, (3) Training models based on the ground-truth acquired by the annotations, (4) Applying machine learned models on the parsed documents to determine the layout semantic label of each cell and finally (5) Assembling the document into a structured data format (e. g. JSON). The main conversion pipeline is depicted in blue and allows you to process and convert documents at scale into a structured data format. The green and orange sections can be used optionally, in order to process scanned documents (green) or train new models based on human annotation (orange).", "text-hash": 9615465947839001361, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/0", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/20"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/29", "hash": 9115121388992506886, "orig": "Figure 3: The labelled cells annotated on the title page of a poster abstract about the CCS [11]. Here, the title, authors, affiliation, subtitle, main-text, caption and picture labels are represented respectively as red, green, purple, dark-red, yellow, orange and ivory.", "prov": [{"$ref": "#/page-elements/43"}], "text": "Figure 3: The labelled cells annotated on the title page of a poster abstract about the CCS [11]. Here, the title, authors, affiliation, subtitle, main-text, caption and picture labels are represented respectively as red, green, purple, dark-red, yellow, orange and ivory.", "text-hash": 17324714532994059892, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/1", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/36"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/24", "hash": 14775249782836392461, "orig": "Figure 2: The cells obtained for the title page of a poster abstract about the CCS [11] after the parsing stage. During the parsing, we extract all bounding boxes of the text (or cells) in such a way that they all have: (1) a maximum width, (2) are only single line and (3) split into multiple cells in case of listidentifiers, multi-columns or crossing vertical lines (such as in tables).", "prov": [{"$ref": "#/page-elements/38"}], "text": "Figure 2: The cells obtained for the title page of a poster abstract about the CCS [11] after the parsing stage. During the parsing, we extract all bounding boxes of the text (or cells) in such a way that they all have: (1) a maximum width, (2) are only single line and (3) split into multiple cells in case of listidentifiers, multi-columns or crossing vertical lines (such as in tables).", "text-hash": 6754994759646241897, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/2", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/37"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/41", "hash": 7479698582664857938, "orig": "Figure 4: The annotation rate of pages for two different collections (Physical Review B and Elsevier papers) as a function of the number of annotated pages. As one can observe, the mean annotation rate is increasing after each training (depicted by a vertical dashed red line). After the first training, the human annotator is presented a pre-annotated page, using the predictions from the latest model. As the predictions become better with increasing size of the ground-truth, less corrections need to be made and hence more pages can be annotated in similar time intervals.", "prov": [{"$ref": "#/page-elements/59"}], "text": "Figure 4: The annotation rate of pages for two different collections (Physical Review B and Elsevier papers) as a function of the number of annotated pages. As one can observe, the mean annotation rate is increasing after each training (depicted by a vertical dashed red line). After the first training, the human annotator is presented a pre-annotated page, using the predictions from the latest model. As the predictions become better with increasing size of the ground-truth, less corrections need to be made and hence more pages can be annotated in similar time intervals.", "text-hash": 504280783932681152, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/3", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/58"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/46", "hash": 17801697261174341699, "orig": "Figure 5: A typical image of a parsed PDF page that is fed to the default models. In red, we show the detection of the tables combined with the confidence of the model. The results displayed here originate from the YOLOv2 model.", "prov": [{"$ref": "#/page-elements/65"}], "text": "Figure 5: A typical image of a parsed PDF page that is fed to the default models. In red, we show the detection of the tables combined with the confidence of the model. The results displayed here originate from the YOLOv2 model.", "text-hash": 8628591081653072559, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/4", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/64"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/80", "hash": 3206590615695639432, "orig": "Figure 6: Diagram of the architecture of our platform. The architecture is composed from 4 layers: an interface layer with REST-API and frontend, an orchestration layer with a message broker and results backend, a compute layer consisting out of a variable number of asynchronous workers and finally a storage layer providing a NoSQL database and an object store. The NoSQL database stores the queryable meta-data of each file that is stored in the object store.", "prov": [{"$ref": "#/page-elements/105"}], "text": "Figure 6: Diagram of the architecture of our platform. The architecture is composed from 4 layers: an interface layer with REST-API and frontend, an orchestration layer with a message broker and results backend, a compute layer consisting out of a variable number of asynchronous workers and finally a storage layer providing a NoSQL database and an object store. The NoSQL database stores the queryable meta-data of each file that is stored in the object store.", "text-hash": 4488590919374042342, "type": "paragraph"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/5", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/104"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/85", "hash": 6667504298804810757, "orig": "Figure 7: Evolution of number of users and number of PDF pages on the platform. The jumps in the number of pages originates from big ingestions of documents performed by some users. This proves that the CCS platform is also able to accomodate these short burst of extreme activity.", "prov": [{"$ref": "#/page-elements/115"}], "text": "Figure 7: Evolution of number of users and number of PDF pages on the platform. The jumps in the number of pages originates from big ingestions of documents performed by some users. This proves that the CCS platform is also able to accomodate these short burst of extreme activity.", "text-hash": 14863303056159196785, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/6", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/114"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/90", "hash": 16175086861512378818, "orig": "Figure 8: Speedup in the pipeline components as a function of the number of worker nodes (each with four cores, running four local worker processes).", "prov": [{"$ref": "#/page-elements/122"}], "text": "Figure 8: Speedup in the pipeline components as a function of the number of worker nodes (each with four cores, running four local worker processes).", "text-hash": 9976536719025941296, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/7", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/121"}], "type": "figure"}], "footnotes": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/0", "hash": 13109829297289816265, "orig": "Permission to make digital or hard copies of all or part of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for components of this work owned by others than the author(s) must be honored. Abstracting with credit is permitted. To copy otherwise, or republish, to post on servers or to redistribute to lists, requires prior specific permission and/or a fee. Request permissions from permissions@acm.org.", "prov": [{"$ref": "#/page-elements/11"}], "text": "Permission to make digital or hard copies of all or part of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for components of this work owned by others than the author(s) must be honored. Abstracting with credit is permitted. To copy otherwise, or republish, to post on servers or to redistribute to lists, requires prior specific permission and/or a fee. Request permissions from permissions@acm.org.", "text-hash": 13032800243621120549, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/1", "hash": 6056950725387475159, "orig": "KDD \u201918, August 19-23, 2018, London, United Kingdom", "prov": [{"$ref": "#/page-elements/12"}], "text": "KDD \u201918, August 19-23, 2018, London, United Kingdom", "text-hash": 15473297532078357059, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/2", "hash": 82667377498161992, "orig": "\u00a9 2018 Copyright held by the owner/author(s). Publication rights licensed to ACM. ACM ISBN 978-1-4503-5552-0/18/08...$15.00", "prov": [{"$ref": "#/page-elements/13"}], "text": "\u00a9 2018 Copyright held by the owner/author(s). Publication rights licensed to ACM. ACM ISBN 978-1-4503-5552-0/18/08...$15.00", "text-hash": 3001373187661149606, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/3", "hash": 4157740687705909538, "orig": "https://doi.org/10.1145/3219819.3219834", "prov": [{"$ref": "#/page-elements/14"}], "text": "https://doi.org/10.1145/3219819.3219834", "text-hash": 3547103316902677392, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/4", "hash": 11592315251976452419, "orig": "$^{1}$This number originates from a keynote talk by Phil Ydens, Adobe\u2019s VP Engineering for Document Cloud. A video of the presentation can be found here: https://youtu.be/ 5Axw6OGPYHw", "prov": [{"$ref": "#/page-elements/18"}], "text": "$^{1}$This number originates from a keynote talk by Phil Ydens, Adobe\u2019s VP Engineering for Document Cloud. A video of the presentation can be found here: https://youtu.be/ 5Axw6OGPYHw", "text-hash": 14549584251446631343, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/5", "hash": 14606262418347792388, "orig": "$^{2}$This is clearly the case on the popular arXiv scientific online repository: https://arxiv. org/help/stats/2012_by_area/index", "prov": [{"$ref": "#/page-elements/19"}], "text": "$^{2}$This is clearly the case on the popular arXiv scientific online repository: https://arxiv. org/help/stats/2012_by_area/index", "text-hash": 7221931865252575858, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/6", "hash": 7599391434737032939, "orig": "$^{3}$https://www.xpdfreader.com", "prov": [{"$ref": "#/page-elements/26"}], "text": "$^{3}$https://www.xpdfreader.com", "text-hash": 104933780092600391, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/7", "hash": 9645151231942484724, "orig": "$^{4}$http://tabula.technology/", "prov": [{"$ref": "#/page-elements/27"}], "text": "$^{4}$http://tabula.technology/", "text-hash": 11894228156061308002, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/8", "hash": 4601317523235901886, "orig": "$^{5}$https://www.abbyy.com/", "prov": [{"$ref": "#/page-elements/28"}], "text": "$^{5}$https://www.abbyy.com/", "text-hash": 3391629868238619420, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/9", "hash": 1678429643964197526, "orig": "$^{6}$https://www.nuance.com/", "prov": [{"$ref": "#/page-elements/29"}], "text": "$^{6}$https://www.nuance.com/", "text-hash": 1693441792396921860, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/10", "hash": 9599864648545137978, "orig": "$^{7}$https://www.ibm.com/us-en/marketplace/data-capture-and-imaging", "prov": [{"$ref": "#/page-elements/30"}], "text": "$^{7}$https://www.ibm.com/us-en/marketplace/data-capture-and-imaging", "text-hash": 11939931591922575256, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/11", "hash": 11599600757439696813, "orig": "$^{8}$a line of text might be printed character-by-character, word-by-word or the entire text snippet.", "prov": [{"$ref": "#/page-elements/49"}], "text": "$^{8}$a line of text might be printed character-by-character, word-by-word or the entire text snippet.", "text-hash": 14551310605717713161, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/12", "hash": 8672351490975826115, "orig": "$^{9}$http://qpdf.sourceforge.net/", "prov": [{"$ref": "#/page-elements/50"}], "text": "$^{9}$http://qpdf.sourceforge.net/", "text-hash": 17478669388996915759, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/13", "hash": 13163501967272675186, "orig": "$^{10}$It is important to notice that there is no restriction on the number of labels nor the semantic meaning of these labels. The only limitation one has is that the set of semantic labels needs to be consistent across the dataset, but this is evidently true for any type of ML algorithm.", "prov": [{"$ref": "#/page-elements/57"}], "text": "$^{10}$It is important to notice that there is no restriction on the number of labels nor the semantic meaning of these labels. The only limitation one has is that the set of semantic labels needs to be consistent across the dataset, but this is evidently true for any type of ML algorithm.", "text-hash": 13266614683838167520, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/14", "hash": 16307739621375260129, "orig": "$^{11}$All the data is coming from the bulk data download https://arxiv.org/help/bulk_data_s3", "prov": [{"$ref": "#/page-elements/73"}], "text": "$^{11}$All the data is coming from the bulk data download https://arxiv.org/help/bulk_data_s3", "text-hash": 10131428201408538445, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/15", "hash": 16584453941359713372, "orig": "$^{12}$https://journals.aps.org/prb", "prov": [{"$ref": "#/page-elements/95"}], "text": "$^{12}$https://journals.aps.org/prb", "text-hash": 9846388834475228858, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/16", "hash": 7152618592130781617, "orig": "$^{13}$https://www.openapis.org/", "prov": [{"$ref": "#/page-elements/110"}], "text": "$^{13}$https://www.openapis.org/", "text-hash": 831347610428179229, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/17", "hash": 6593099618554401757, "orig": "$^{14}$https://www.rabbitmq.com/", "prov": [{"$ref": "#/page-elements/111"}], "text": "$^{14}$https://www.rabbitmq.com/", "text-hash": 15235037228412732729, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/18", "hash": 7200807455610600839, "orig": "$^{15}$https://www.redis.io/", "prov": [{"$ref": "#/page-elements/112"}], "text": "$^{15}$https://www.redis.io/", "text-hash": 782710111840296691, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/19", "hash": 1602196689966359724, "orig": "$^{16}$http://www.celeryproject.org/", "prov": [{"$ref": "#/page-elements/113"}], "text": "$^{16}$http://www.celeryproject.org/", "text-hash": 1778492971410642442, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/20", "hash": 4503261997707320357, "orig": "$^{17}$https://www.mongodb.com/", "prov": [{"$ref": "#/page-elements/120"}], "text": "$^{17}$https://www.mongodb.com/", "text-hash": 3489272016069066385, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/21", "hash": 2838531283607966593, "orig": "$^{18}$https://kubernetes.io/", "prov": [{"$ref": "#/page-elements/131"}], "text": "$^{18}$https://kubernetes.io/", "text-hash": 5145030134774826221, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/22", "hash": 3398848297472714606, "orig": "$^{19}$ibm.biz/privatecloud", "prov": [{"$ref": "#/page-elements/132"}], "text": "$^{19}$ibm.biz/privatecloud", "text-hash": 4585077909629360588, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/23", "hash": 6724984968154270143, "orig": "$^{20}$We don\u2019t show the number of documents, since the number of pages in a document can range from 1 to well above 1000. Consequently, the number of pages is a more robust metric to measure the scaling with regard to the corpus size.", "prov": [{"$ref": "#/page-elements/139"}], "text": "$^{20}$We don\u2019t show the number of documents, since the number of pages in a document can range from 1 to well above 1000. Consequently, the number of pages is a more robust metric to measure the scaling with regard to the corpus size.", "text-hash": 14814952417700014875, "type": "footnote"}], "hash": 18446744073709551615, "instances": {"data": [["numval", "year", 7377574370756688828, "TEXT", "#/texts/0", 1.0, 389609625548777054, 1345153950666588077, 18446744073709551615, 18446744073709551615, 34, 38, 34, 38, 4, 5, true, "2018", "2018"], ["numval", "ival", 7377574370756688828, "TEXT", "#/texts/0", 1.0, 15441160910541481790, 218889966910406464, 18446744073709551615, 18446744073709551615, 27, 29, 27, 29, 2, 3, true, "24", "24"], ["parenthesis", "square brackets", 7377574370756688828, "TEXT", "#/texts/0", 1.0, 8106340136782143757, 305332543809292699, 18446744073709551615, 18446744073709551615, 19, 26, 19, 26, 1, 2, true, "[cs.DL]", "[cs.DL]"], ["expression", "wtoken-concatenation", 7377574370756688828, "TEXT", "#/texts/0", 1.0, 5564484558542728887, 6260400721402515593, 18446744073709551615, 18446744073709551615, 0, 18, 0, 18, 0, 1, true, "arXiv:1806.02284v1", "arXiv:1806.02284v1"], ["expression", "wtoken-concatenation", 7377574370756688828, "TEXT", "#/texts/0", 1.0, 8106340136782143757, 305332543809292699, 18446744073709551615, 18446744073709551615, 19, 26, 19, 26, 1, 2, true, "[cs.DL]", "[cs.DL]"], ["sentence", "", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 11303007895399162817, 11350976242507888924, 18446744073709551615, 18446744073709551615, 0, 84, 0, 84, 0, 14, true, "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale."], ["term", "single-term", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 12638008641667971393, 2808934749433980912, 18446744073709551615, 18446744073709551615, 0, 25, 0, 25, 0, 3, true, "Corpus Conversion Service", "Corpus Conversion Service"], ["term", "single-term", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 3953336115302703444, 3908089371773344302, 18446744073709551615, 18446744073709551615, 29, 54, 29, 54, 5, 8, true, "Machine Learning Platform", "Machine Learning Platform"], ["term", "single-term", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 2543543638813814383, 14974042820297549065, 18446744073709551615, 18446744073709551615, 58, 74, 58, 74, 9, 11, true, "Ingest Documents", "Ingest Documents"], ["term", "single-term", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 329104162321612062, 9665794625919571011, 18446744073709551615, 18446744073709551615, 78, 83, 78, 83, 12, 13, true, "Scale", "Scale"], ["conn", "single-conn", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 15441160910541487054, 1862666054904793840, 18446744073709551615, 18446744073709551615, 75, 77, 75, 77, 11, 12, true, "at", "at"], ["conn", "single-conn", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 15441160910541485865, 1862717525379277583, 18446744073709551615, 18446744073709551615, 55, 57, 55, 57, 8, 9, true, "to", "to"], ["link", "email", 18258237174351515285, "TEXT", "#/texts/3", 1.0, 7883794643982446593, 9473083479424942219, 18446744073709551615, 18446744073709551615, 0, 30, 0, 30, 0, 11, true, "taa,dol,cau,bek@zurich.ibm.com", "taa,dol,cau,bek@zurich.ibm.com"], ["geoloc", "country", 11056873211244709904, "TEXT", "#/texts/5", 1.0, 2664439525053388608, 16906723856094244091, 18446744073709551615, 18446744073709551615, 13, 24, 13, 24, 2, 3, true, "Switzerland", "Switzerland"], ["numval", "ival", 3624246356859711021, "TEXT", "#/texts/7", 1.0, 17767354399704235161, 12573472761345255474, 18446744073709551615, 18446744073709551615, 0, 1, 0, 1, 0, 1, true, "1", "1"], ["numval", "ival", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12178341415896436703, 12968333296314215347, 18446744073709551615, 18446744073709551615, 1491, 1494, 1491, 1494, 249, 250, true, "250", "250"], ["parenthesis", "round brackets", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8624098978506921550, 8067551676911300261, 18446744073709551615, 18446744073709551615, 309, 347, 309, 347, 51, 60, true, "(e.g. the PDF format or bitmap images)", "(e.g. the PDF format or bitmap images)"], ["parenthesis", "round brackets", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 4552190965366435023, 5994729969442454976, 18446744073709551615, 18446744073709551615, 388, 409, 388, 409, 68, 73, true, "(e.g. complex tables)", "(e.g. complex tables)"], ["parenthesis", "round brackets", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104053210116957, 3393895258272698836, 18446744073709551615, 18446744073709551615, 628, 633, 628, 633, 109, 112, true, "(CCS)", "(CCS)"], ["parenthesis", "round brackets", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8912272716224106832, 12227152516026650269, 18446744073709551615, 18446744073709551615, 708, 735, 708, 735, 124, 131, true, "(i.e. collect ground-truth)", "(i.e. collect ground-truth)"], ["expression", "common", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486545, 11606670743807693522, 18446744073709551615, 18446744073709551615, 709, 713, 709, 713, 125, 126, true, "ie", "i.e."], ["expression", "common", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541487324, 11606670863251774055, 18446744073709551615, 18446744073709551615, 310, 314, 310, 314, 52, 53, true, "eg", "e.g."], ["expression", "common", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541487324, 11606670863251791461, 18446744073709551615, 18446744073709551615, 389, 393, 389, 393, 69, 70, true, "eg", "e.g."], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15169931585135175826, 17270979630715224833, 18446744073709551615, 18446744073709551615, 525, 536, 525, 536, 93, 94, true, "cloud-based", "cloud-based"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6307689511527468252, 12199545311202481186, 18446744073709551615, 18446744073709551615, 743, 759, 743, 759, 133, 134, true, "machine-learning", "machine-learning"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3932662928795581219, 3325076288347729928, 18446744073709551615, 18446744073709551615, 828, 844, 828, 844, 144, 145, true, "bitmap-documents", "bitmap-documents"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3753411203337468488, 16756051673090395246, 18446744073709551615, 18446744073709551615, 1102, 1114, 1102, 1114, 187, 188, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6307689511527468252, 12199545311202523423, 18446744073709551615, 18446744073709551615, 1133, 1149, 1133, 1149, 191, 192, true, "machine-learning", "machine-learning"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3753411203337468488, 16756051673090420119, 18446744073709551615, 18446744073709551615, 1244, 1256, 1244, 1256, 210, 211, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 10391722136816057200, 4465071482523967093, 18446744073709551615, 18446744073709551615, 1512, 1533, 1512, 1533, 253, 254, true, "knowledge-engineering", "knowledge-engineering"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 11355983594424639335, 375612941360355674, 18446744073709551615, 18446744073709551615, 1298, 1314, 1298, 1314, 219, 220, true, "precision/recall", "precision/recall"], ["expression", "wtoken-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12178341415896195376, 12963254028349616217, 18446744073709551615, 18446744073709551615, 1339, 1342, 1339, 1342, 225, 226, true, "99%", "99%"], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8311273775079009361, 18234444390399509646, 18446744073709551615, 18446744073709551615, 0, 122, 0, 122, 0, 20, true, "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size.", "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8652887973149281574, 1544181945594032747, 18446744073709551615, 18446744073709551615, 123, 258, 123, 258, 20, 43, true, "Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable.", "Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5682935857557389413, 3518340224243798686, 18446744073709551615, 18446744073709551615, 259, 487, 259, 487, 43, 84, true, "Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging.", "Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 18403546089192870947, 3375274648488008071, 18446744073709551615, 18446744073709551615, 488, 575, 488, 575, 84, 101, true, "In this paper, we present a modular, cloud-based platform to ingest documents at scale.", "In this paper, we present a modular, cloud-based platform to ingest documents at scale."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15870780009666831983, 2120332988466055117, 18446744073709551615, 18446744073709551615, 576, 891, 576, 891, 101, 152, true, "This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format.", "This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 10285604264132694933, 1782145150804012891, 18446744073709551615, 18446744073709551615, 892, 1045, 892, 1045, 152, 177, true, "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents.", "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 696858082777940132, 6587401266180559184, 18446744073709551615, 18446744073709551615, 1046, 1196, 1046, 1196, 177, 201, true, "Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude.", "Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 11949985654620491247, 6433012828858116708, 18446744073709551615, 18446744073709551615, 1197, 1398, 1197, 1398, 201, 235, true, "This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output.", "This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 11602122462230219692, 9062878903616548976, 18446744073709551615, 18446744073709551615, 1399, 1554, 1399, 1554, 235, 257, true, "The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements."], ["term", "enum-term-mark-1", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 9845754748010686003, 13443808248487347009, 18446744073709551615, 18446744073709551615, 433, 464, 433, 464, 77, 81, true, "qualitative and quantitive data", "qualitative and quantitive data"], ["term", "enum-term-mark-2", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14506873166110432521, 11857803489572599054, 18446744073709551615, 18446744073709551615, 323, 339, 323, 339, 55, 58, true, "format or bitmap", "format or bitmap"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16807436920751143074, 14986987871760575963, 18446744073709551615, 18446744073709551615, 9, 25, 9, 25, 2, 5, true, "past few decades", "past few decades"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 7863808487922385366, 2936430672705644663, 18446744073709551615, 18446744073709551615, 41, 60, 41, 60, 9, 11, true, "scientific articles", "scientific articles"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 7143078508811650826, 1305762834470469664, 18446744073709551615, 18446744073709551615, 65, 85, 65, 85, 12, 14, true, "technical literature", "technical literature"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 2831583870146744553, 1311385802074388264, 18446744073709551615, 18446744073709551615, 148, 158, 148, 158, 25, 27, true, "great need", "great need"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 1602384110795404989, 1921537330407092158, 18446744073709551615, 18446744073709551615, 319, 329, 319, 329, 54, 56, true, "PDF format", "PDF format"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 7850715239909526655, 8028877058422980465, 18446744073709551615, 18446744073709551615, 333, 346, 333, 346, 57, 59, true, "bitmap images", "bitmap images"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 1806804053579249155, 8335167387144157878, 18446744073709551615, 18446744073709551615, 389, 408, 389, 408, 69, 72, true, "eg complex tables", "e.g. complex tables"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 13450540556572295481, 4139295332657747437, 18446744073709551615, 18446744073709551615, 449, 464, 449, 464, 79, 81, true, "quantitive data", "quantitive data"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12206009578906402256, 12092500979427102718, 18446744073709551615, 18446744073709551615, 525, 545, 525, 545, 93, 95, true, "cloud-based platform", "cloud-based platform"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12638008641667971393, 6722150771778728224, 18446744073709551615, 18446744073709551615, 602, 627, 602, 627, 106, 109, true, "Corpus Conversion Service", "Corpus Conversion Service"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3735444463619010795, 10473776487201094119, 18446744073709551615, 18446744073709551615, 709, 728, 709, 728, 125, 128, true, "ie collect ground", "i.e. collect ground"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3416039644310333922, 4934158934704280837, 18446744073709551615, 18446744073709551615, 737, 785, 737, 785, 132, 136, true, "train machine-learning classification algorithms", "train machine-learning classification algorithms"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 2954625771153872709, 4652514773317300232, 18446744073709551615, 18446744073709551615, 850, 890, 850, 890, 147, 151, true, "structured content representation format", "structured content representation format"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 7838671148811051201, 3585713728473930092, 18446744073709551615, 18446744073709551615, 952, 990, 952, 990, 165, 168, true, "asynchronous microservice architecture", "asynchronous microservice architecture"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 11942859038914222878, 6623027391573465220, 18446744073709551615, 18446744073709551615, 1016, 1031, 1016, 1031, 172, 174, true, "massive amounts", "massive amounts"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5415884051047601374, 4355778428986290778, 18446744073709551615, 18446744073709551615, 1133, 1160, 1133, 1160, 191, 193, true, "machine-learning algorithms", "machine-learning algorithms"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 11805639520798919476, 8476511316725219115, 18446744073709551615, 18446744073709551615, 1227, 1240, 1227, 1240, 207, 209, true, "large amounts", "large amounts"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5928632445065269445, 14217942914367810037, 18446744073709551615, 18446744073709551615, 1265, 1276, 1265, 1276, 213, 215, true, "little time", "little time"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 10100743957883477761, 17954790962075745659, 18446744073709551615, 18446744073709551615, 1293, 1322, 1293, 1322, 218, 221, true, "good precision/recall metrics", "good precision/recall metrics"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14630472445500347050, 6260595242788033664, 18446744073709551615, 18446744073709551615, 1380, 1397, 1380, 1397, 232, 234, true, "structured output", "structured output"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 10465443055056631368, 58866334284871721, 18446744073709551615, 18446744073709551615, 1403, 1415, 1403, 1415, 236, 238, true, "CCS platform", "CCS platform"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 168078114375663109, 12852846298920524296, 18446744073709551615, 18446744073709551615, 1441, 1468, 1441, 1468, 242, 245, true, "IBM internal infrastructure", "IBM internal infrastructure"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8462871836886525200, 10493121872431814801, 18446744073709551615, 18446744073709551615, 1495, 1507, 1495, 1507, 250, 252, true, "active users", "active users"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12360325703059227080, 15341633962216548312, 18446744073709551615, 18446744073709551615, 1512, 1553, 1512, 1553, 253, 256, true, "knowledge-engineering project engagements", "knowledge-engineering project engagements"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206569333693762, 10666930667336151813, 18446744073709551615, 18446744073709551615, 31, 37, 31, 37, 7, 8, true, "amount", "amount"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625741058932, 1609635956783744714, 18446744073709551615, 18446744073709551615, 117, 121, 117, 121, 18, 19, true, "size", "size"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106478573663085763, 2644249028750571186, 18446744073709551615, 18446744073709551615, 163, 170, 163, 170, 28, 29, true, "systems", "systems"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6167933651658664291, 11942237281037682166, 18446744073709551615, 18446744073709551615, 193, 202, 193, 202, 33, 34, true, "documents", "documents"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104161785194305, 772802872201272523, 18446744073709551615, 18446744073709551615, 206, 211, 206, 211, 35, 36, true, "scale", "scale"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6184122545182835014, 10915241214874887145, 18446744073709551615, 18446744073709551615, 235, 244, 235, 244, 40, 41, true, "knowledge", "knowledge"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206548538896813, 17191059727726770924, 18446744073709551615, 18446744073709551615, 283, 289, 283, 289, 47, 48, true, "format", "format"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6167933651658664291, 11942237281037615868, 18446744073709551615, 18446744073709551615, 299, 308, 299, 308, 50, 51, true, "documents", "documents"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15493249494625550468, 17136530455551824273, 18446744073709551615, 18446744073709551615, 363, 375, 363, 375, 64, 65, true, "presentation", "presentation"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625696431489, 1272382058296184235, 18446744073709551615, 18446744073709551615, 383, 387, 383, 387, 67, 68, true, "data", "data"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5303544497514782120, 263131364412872028, 18446744073709551615, 18446744073709551615, 419, 429, 419, 429, 75, 76, true, "extraction", "extraction"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104161668023890, 773695676617294129, 18446744073709551615, 18446744073709551615, 496, 501, 496, 501, 86, 87, true, "paper", "paper"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6167933651658664291, 11942237281037632251, 18446744073709551615, 18446744073709551615, 556, 565, 556, 565, 97, 98, true, "documents", "documents"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104161785194305, 772802872201252868, 18446744073709551615, 18446744073709551615, 569, 574, 569, 574, 99, 100, true, "scale", "scale"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14814125365076808131, 9647025272576644413, 18446744073709551615, 18446744073709551615, 581, 589, 581, 589, 102, 103, true, "platform", "platform"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12178341415896221596, 12963251184768892790, 18446744073709551615, 18446744073709551615, 629, 632, 629, 632, 110, 111, true, "CCS", "CCS"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14814125852840540191, 2945478222614419396, 18446744073709551615, 18446744073709551615, 648, 656, 648, 656, 115, 116, true, "pipeline", "pipeline"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104159157820437, 995383834556884589, 18446744073709551615, 18446744073709551615, 670, 675, 670, 675, 118, 119, true, "users", "users"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6167933651658664291, 11942237281037582534, 18446744073709551615, 18446744073709551615, 698, 707, 698, 707, 123, 124, true, "documents", "documents"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104159241711235, 991946153785165058, 18446744073709551615, 18446744073709551615, 729, 734, 729, 734, 129, 130, true, "truth", "truth"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625631434316, 1612217538956723265, 18446744073709551615, 18446744073709551615, 813, 817, 813, 817, 140, 141, true, "type", "type"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12178341415896289890, 12968333890042400352, 18446744073709551615, 18446744073709551615, 821, 824, 821, 824, 142, 143, true, "PDF", "PDF"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3932662928795581219, 3325076288347729928, 18446744073709551615, 18446744073709551615, 828, 844, 828, 844, 144, 145, true, "bitmap-documents", "bitmap-documents"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106464525640940249, 12084772193525026048, 18446744073709551615, 18446744073709551615, 922, 929, 922, 929, 159, 160, true, "modules", "modules"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6167933651658664291, 11942237281037800116, 18446744073709551615, 18446744073709551615, 1035, 1044, 1035, 1044, 175, 176, true, "documents", "documents"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 2873671966753113989, 3590833722970570505, 18446744073709551615, 18446744073709551615, 1081, 1091, 1081, 1091, 184, 185, true, "capability", "capability"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104161571401725, 741255023938407211, 18446744073709551615, 18446744073709551615, 1177, 1182, 1177, 1182, 197, 198, true, "order", "order"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6179392101937111178, 13132284913272968426, 18446744073709551615, 18446744073709551615, 1186, 1195, 1186, 1195, 199, 200, true, "magnitude", "magnitude"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3753411203337468488, 16756051673090420119, 18446744073709551615, 18446744073709551615, 1244, 1256, 1244, 1256, 210, 211, true, "ground-truth", "ground-truth"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104161634702433, 739201814026917115, 18446744073709551615, 18446744073709551615, 1330, 1335, 1330, 1335, 223, 224, true, "range", "range"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206521526353544, 16720692448055193361, 18446744073709551615, 18446744073709551615, 1348, 1354, 1348, 1354, 227, 228, true, "regard", "regard"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 2703018679320364082, 15916371892854536925, 18446744073709551615, 18446744073709551615, 1366, 1376, 1366, 1376, 230, 231, true, "conversion", "conversion"], ["verb", "compound-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 11956062550033090038, 9437126490011979695, 18446744073709551615, 18446744073709551615, 86, 113, 86, 113, 14, 17, true, "has increased exponentially", "has increased exponentially"], ["verb", "compound-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5690225847229166303, 18320034715902341983, 18446744073709551615, 18446744073709551615, 1115, 1129, 1115, 1129, 188, 190, true, "is accelerated", "is accelerated"], ["verb", "compound-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 9791407429604398000, 14740221032007164243, 18446744073709551615, 18446744073709551615, 1281, 1292, 1281, 1292, 216, 218, true, "obtain very", "obtain very"], ["verb", "compound-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 2604368229451749231, 5954729608874990660, 18446744073709551615, 18446744073709551615, 1416, 1437, 1416, 1437, 238, 241, true, "is currently deployed", "is currently deployed"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486535, 11606670739881444005, 18446744073709551615, 18446744073709551615, 143, 145, 143, 145, 23, 24, true, "is", "is"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 2873440693780286732, 16242747501520400497, 18446744073709551615, 18446744073709551615, 176, 186, 176, 186, 30, 32, true, "can ingest", "can ingest"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625618412480, 1610868918855298631, 18446744073709551615, 18446744073709551615, 216, 220, 216, 220, 37, 38, true, "make", "make"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5947879769709188533, 15628690943209790850, 18446744073709551615, 18446744073709551615, 225, 234, 225, 234, 39, 40, true, "contained", "contained"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625618412480, 1610868918855286250, 18446744073709551615, 18446744073709551615, 410, 414, 410, 414, 73, 74, true, "make", "make"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106476016677076976, 2082360003734177772, 18446744073709551615, 18446744073709551615, 506, 513, 506, 513, 89, 90, true, "present", "present"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206560503286032, 18414709282119286416, 18446744073709551615, 18446744073709551615, 549, 555, 549, 555, 96, 97, true, "ingest", "ingest"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206563350835754, 16668546032725707234, 18446744073709551615, 18446744073709551615, 591, 597, 591, 597, 104, 105, true, "called", "called"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5584174880054122043, 1259340301497714443, 18446744073709551615, 18446744073709551615, 635, 645, 635, 645, 113, 114, true, "implements", "implements"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206569317834029, 10666754365487817153, 18446744073709551615, 18446744073709551615, 663, 669, 663, 669, 117, 118, true, "allows", "allows"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104161667983915, 773700989878712775, 18446744073709551615, 18446744073709551615, 679, 684, 679, 684, 120, 121, true, "parse", "parse"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14650452911780017077, 11510513167121376409, 18446744073709551615, 18446744073709551615, 689, 697, 689, 697, 122, 123, true, "annotate", "annotate"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106398484416229602, 5707746526356454429, 18446744073709551615, 18446744073709551615, 801, 808, 801, 808, 138, 139, true, "convert", "convert"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3534225588934870450, 17328851096576172964, 18446744073709551615, 18446744073709551615, 895, 904, 895, 904, 153, 155, true, "will show", "will show"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486535, 11606670739883745478, 18446744073709551615, 18446744073709551615, 930, 932, 930, 932, 160, 161, true, "is", "is"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206485955868973, 16260582896355405879, 18446744073709551615, 18446744073709551615, 1009, 1015, 1009, 1015, 171, 172, true, "handle", "handle"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3534225588934870450, 17328851096575956236, 18446744073709551615, 18446744073709551615, 1062, 1071, 1062, 1071, 180, 182, true, "will show", "will show"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206562264646932, 18168705856416964271, 18446744073709551615, 18446744073709551615, 1095, 1101, 1095, 1101, 186, 187, true, "gather", "gather"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206569317834029, 10666754365454877920, 18446744073709551615, 18446744073709551615, 1202, 1208, 1202, 1208, 202, 203, true, "allows", "allows"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106398484416916345, 5707744688882101082, 18446744073709551615, 18446744073709551615, 1358, 1365, 1358, 1365, 229, 230, true, "content", "content"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106478708506631920, 17126853238947237410, 18446744073709551615, 18446744073709551615, 1473, 1480, 1473, 1480, 246, 247, true, "serving", "serving"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14650945419058940869, 11656646489767977845, 18446744073709551615, 18446744073709551615, 0, 8, 0, 8, 0, 2, true, "Over the", "Over the"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821546960, 18446744073709551615, 18446744073709551615, 38, 40, 38, 40, 8, 9, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486538, 11606670739901094601, 18446744073709551615, 18446744073709551615, 114, 116, 114, 116, 17, 18, true, "in", "in"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12178341415895625940, 12963192413398852201, 18446744073709551615, 18446744073709551615, 159, 162, 159, 162, 27, 28, true, "for", "for"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541487054, 11606670851925858322, 18446744073709551615, 18446744073709551615, 203, 205, 203, 205, 34, 35, true, "at", "at"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14814148868025447689, 10464458716096298180, 18446744073709551615, 18446744073709551615, 290, 298, 290, 298, 48, 50, true, "of these", "of these"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206564601699726, 16611998392190665699, 18446744073709551615, 18446744073709551615, 310, 318, 310, 318, 52, 54, true, "eg the", "e.g. the"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206568455155979, 10578923885508625435, 18446744073709551615, 18446744073709551615, 356, 362, 356, 362, 62, 64, true, "as the", "as the"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206565712212855, 18288882301375407275, 18446744073709551615, 18446744073709551615, 376, 382, 376, 382, 65, 67, true, "of the", "of the"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821473010, 18446744073709551615, 18446744073709551615, 430, 432, 430, 432, 76, 77, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106396862006371970, 13002336324491202712, 18446744073709551615, 18446744073709551615, 488, 495, 488, 495, 84, 86, true, "In this", "In this"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541487054, 11606670851925882070, 18446744073709551615, 18446744073709551615, 566, 568, 566, 568, 98, 99, true, "at", "at"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821399597, 18446744073709551615, 18446744073709551615, 818, 820, 818, 820, 141, 142, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3504047303033029818, 12858913108667382047, 18446744073709551615, 18446744073709551615, 905, 914, 905, 914, 155, 157, true, "that each", "that each"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206565712212855, 18288882301375701872, 18446744073709551615, 18446744073709551615, 915, 921, 915, 921, 157, 159, true, "of the", "of the"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821377067, 18446744073709551615, 18446744073709551615, 1032, 1034, 1032, 1034, 174, 175, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625631229034, 1612226062922593249, 18446744073709551615, 18446744073709551615, 1072, 1076, 1072, 1076, 182, 183, true, "that", "that"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486989, 11606670853486674912, 18446744073709551615, 18446744073709551615, 1130, 1132, 1130, 1132, 190, 191, true, "by", "by"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486989, 11606670853486803944, 18446744073709551615, 18446744073709551615, 1161, 1163, 1161, 1163, 193, 194, true, "by", "by"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541487054, 11606670851925780672, 18446744073709551615, 18446744073709551615, 1164, 1166, 1164, 1166, 194, 195, true, "at", "at"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821349359, 18446744073709551615, 18446744073709551615, 1183, 1185, 1183, 1185, 198, 199, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821388621, 18446744073709551615, 18446744073709551615, 1241, 1243, 1241, 1243, 209, 210, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486538, 11606670739900210613, 18446744073709551615, 18446744073709551615, 1257, 1259, 1257, 1259, 211, 212, true, "in", "in"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206560518651853, 18414993880775571288, 18446744073709551615, 18446744073709551615, 1323, 1329, 1323, 1329, 221, 223, true, "in the", "in the"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821292551, 18446744073709551615, 18446744073709551615, 1336, 1338, 1336, 1338, 224, 225, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625618037948, 1610651885976451134, 18446744073709551615, 18446744073709551615, 1343, 1347, 1343, 1347, 226, 227, true, "with", "with"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485678, 11606670855875426468, 18446744073709551615, 18446744073709551615, 1438, 1440, 1438, 1440, 241, 242, true, "on", "on"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625631229040, 1612226037052379844, 18446744073709551615, 18446744073709551615, 1486, 1490, 1486, 1490, 248, 249, true, "than", "than"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12178341415895625940, 12963192413398671002, 18446744073709551615, 18446744073709551615, 1508, 1511, 1508, 1511, 252, 253, true, "for", "for"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485865, 11606670830397324540, 18446744073709551615, 18446744073709551615, 546, 548, 546, 548, 95, 96, true, "to", "to"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485865, 11606670830397301532, 18446744073709551615, 18446744073709551615, 676, 678, 676, 678, 119, 120, true, "to", "to"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625631408052, 1612210503630929212, 18446744073709551615, 18446744073709551615, 845, 849, 845, 849, 145, 147, true, "to a", "to a"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104159243175056, 993032465640498236, 18446744073709551615, 18446744073709551615, 946, 951, 946, 951, 163, 165, true, "to an", "to an"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485865, 11606670830397529924, 18446744073709551615, 18446744073709551615, 1092, 1094, 1092, 1094, 185, 186, true, "to", "to"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106351192274276906, 17899388016831785682, 18446744073709551615, 18446744073709551615, 1212, 1219, 1212, 1219, 204, 206, true, "to both", "to both"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485865, 11606670830397545800, 18446744073709551615, 18446744073709551615, 1355, 1357, 1355, 1357, 228, 229, true, "to", "to"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485865, 11606670830397544434, 18446744073709551615, 18446744073709551615, 1377, 1379, 1377, 1379, 231, 232, true, "to", "to"], ["numval", "year", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 389609625548777054, 918164764798402581, 18446744073709551615, 18446744073709551615, 62, 66, 62, 66, 14, 15, true, "2018", "2018"], ["numval", "year", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 389609625548777054, 918164764798382455, 18446744073709551615, 18446744073709551615, 263, 267, 263, 267, 52, 53, true, "2018", "2018"], ["numval", "fval", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 11541938200508964503, 6621613840590615166, 18446744073709551615, 18446744073709551615, 351, 366, 351, 366, 75, 76, true, "3219819.3219834", "3219819.3219834"], ["numval", "irng", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 329104147759644091, 11978218711906185056, 18446744073709551615, 18446744073709551615, 256, 261, 256, 261, 50, 51, true, "19-23", "19-23"], ["numval", "ival", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 15441160910541481862, 12820302901235644324, 18446744073709551615, 18446744073709551615, 162, 164, 162, 164, 34, 35, true, "18", "18"], ["numval", "ival", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 17767354399704235153, 5919416028440889582, 18446744073709551615, 18446744073709551615, 317, 318, 317, 318, 68, 69, true, "9", "9"], ["numval", "ival", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 389609625536247226, 914428205219130181, 18446744073709551615, 18446744073709551615, 346, 350, 346, 350, 73, 74, true, "1145", "1145"], ["link", "url", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 3534146179424153776, 16664784081959773586, 18446744073709551615, 18446744073709551615, 326, 344, 326, 344, 71, 72, true, "https://doi.org/10", "https://doi.org/10"], ["expression", "wtoken-concatenation", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 389609625548781308, 918163733627828877, 18446744073709551615, 18446744073709551615, 170, 174, 170, 174, 37, 38, true, "24th", "24th"], ["expression", "wtoken-concatenation", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 3534146179424153776, 16664784081959773586, 18446744073709551615, 18446744073709551615, 326, 344, 326, 344, 71, 72, true, "https://doi.org/10", "https://doi.org/10"], ["sentence", "", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 6693772321601013182, 4815660570213750530, 18446744073709551615, 18446744073709551615, 0, 61, 0, 61, 0, 14, true, "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas.", "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas."], ["sentence", "", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 11303007895399162817, 1663341249273745902, 18446744073709551615, 18446744073709551615, 68, 152, 68, 152, 16, 30, true, "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale."], ["sentence", "", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 7000258542330205625, 12590258289379456668, 18446744073709551615, 18446744073709551615, 154, 292, 154, 292, 31, 59, true, "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom.", "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom."], ["sentence", "", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 17980062243523090453, 3043178868879598133, 18446744073709551615, 18446744073709551615, 293, 325, 293, 325, 59, 71, true, "ACM, New York, NY, USA, 9 pages.", "ACM, New York, NY, USA, 9 pages."], ["sentence", "", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 3268516227836428987, 8296109392654892130, 18446744073709551615, 18446744073709551615, 326, 345, 326, 345, 71, 73, true, "https://doi.org/10.", "https://doi.org/10."], ["term", "enum-term-mark-4", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 795735363451947563, 16676628183188309306, 18446744073709551615, 18446744073709551615, 214, 247, 214, 247, 43, 48, true, "Knowledge Discovery & Data Mining", "Knowledge Discovery & Data Mining"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 4686361850733567621, 14659076240775980364, 18446744073709551615, 18446744073709551615, 0, 15, 0, 15, 0, 4, true, "Peter W J Staar", "Peter W J Staar"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 1571808557594152175, 2521268111811279239, 18446744073709551615, 18446744073709551615, 17, 30, 17, 30, 5, 7, true, "Michele Dolfi", "Michele Dolfi"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 9737597816447750448, 18360796446007226291, 18446744073709551615, 18446744073709551615, 32, 46, 32, 46, 8, 10, true, "Christoph Auer", "Christoph Auer"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 10999349626623612055, 7141385911209629847, 18446744073709551615, 18446744073709551615, 48, 60, 48, 60, 11, 13, true, "Costas Bekas", "Costas Bekas"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 12638008641667971393, 8431069953230460203, 18446744073709551615, 18446744073709551615, 68, 93, 68, 93, 16, 19, true, "Corpus Conversion Service", "Corpus Conversion Service"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 3953336115302703444, 2106931920663782483, 18446744073709551615, 18446744073709551615, 97, 122, 97, 122, 21, 24, true, "Machine Learning Platform", "Machine Learning Platform"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 2543543638813814383, 10045085706299781635, 18446744073709551615, 18446744073709551615, 126, 142, 126, 142, 25, 27, true, "Ingest Documents", "Ingest Documents"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 3830746689439412878, 10628297214120553798, 18446744073709551615, 18446744073709551615, 170, 210, 170, 210, 37, 42, true, "24th ACM SIGKDD International Conference", "24th ACM SIGKDD International Conference"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 3800822513940686388, 12726957854536290921, 18446744073709551615, 18446744073709551615, 214, 233, 214, 233, 43, 45, true, "Knowledge Discovery", "Knowledge Discovery"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 9639847902089872401, 15642530745605263941, 18446744073709551615, 18446744073709551615, 236, 247, 236, 247, 46, 48, true, "Data Mining", "Data Mining"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 17782056979161528852, 4690987004959947827, 18446744073709551615, 18446744073709551615, 277, 291, 277, 291, 56, 58, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 14650948201816210252, 2694576768786644093, 18446744073709551615, 18446744073709551615, 298, 306, 298, 306, 61, 63, true, "New York", "New York"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 329104162321612062, 11274361467332635215, 18446744073709551615, 18446744073709551615, 146, 151, 146, 151, 28, 29, true, "Scale", "Scale"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 12178341415896253943, 1738717073979978820, 18446744073709551615, 18446744073709551615, 157, 160, 157, 160, 32, 33, true, "KDD", "KDD"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 16381206562442326159, 10586055992353118926, 18446744073709551615, 18446744073709551615, 249, 255, 249, 255, 49, 50, true, "August", "August"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 16381206531301571445, 12510416255984707889, 18446744073709551615, 18446744073709551615, 269, 275, 269, 275, 54, 55, true, "London", "London"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 12178341415896228980, 1738757751107532979, 18446744073709551615, 18446744073709551615, 293, 296, 293, 296, 59, 60, true, "ACM", "ACM"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 15441160910541487804, 12820302595509217913, 18446744073709551615, 18446744073709551615, 308, 310, 308, 310, 64, 65, true, "NY", "NY"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 12178341415895650394, 1738736899274670576, 18446744073709551615, 18446744073709551615, 312, 315, 312, 315, 66, 67, true, "USA", "USA"], ["term", "single-term", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 329104161667992688, 12637076450269003134, 18446744073709551615, 18446744073709551615, 319, 324, 319, 324, 69, 70, true, "pages", "pages"], ["conn", "single-conn", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 15441160910541487054, 12820303060826396831, 18446744073709551615, 18446744073709551615, 143, 145, 143, 145, 27, 28, true, "at", "at"], ["conn", "single-conn", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 15441160910541480354, 12820298442232007515, 18446744073709551615, 18446744073709551615, 154, 156, 154, 156, 31, 32, true, "In", "In"], ["conn", "single-conn", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 15441160910541485678, 12820303021843804862, 18446744073709551615, 18446744073709551615, 211, 213, 211, 213, 42, 43, true, "on", "on"], ["conn", "single-conn", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 15441160910541485865, 12820302971854609335, 18446744073709551615, 18446744073709551615, 123, 125, 123, 125, 24, 25, true, "to", "to"], ["geoloc", "country", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 17782056979161528852, 4690987004959947827, 18446744073709551615, 18446744073709551615, 277, 291, 277, 291, 56, 58, true, "United Kingdom", "United Kingdom"], ["numval", "fval", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415896439107, 14800962307501710678, 18446744073709551615, 18446744073709551615, 39, 42, 39, 42, 7, 8, true, "2.5", "2.5"], ["parenthesis", "round brackets", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 2236437873379298599, 8971166239141287330, 18446744073709551615, 18446744073709551615, 1048, 1094, 1048, 1094, 170, 180, true, "(e.g. find me a phase-diagram of material XYZ)", "(e.g. find me a phase-diagram of material XYZ)"], ["parenthesis", "round brackets", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 4516846515356980393, 4935623304895828855, 18446744073709551615, 18446744073709551615, 1196, 1246, 1196, 1246, 199, 210, true, "(with the PDF format being the most prevalent one)", "(with the PDF format being the most prevalent one)"], ["parenthesis", "round brackets", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 5879944210728656410, 9673170177479615330, 18446744073709551615, 18446744073709551615, 1432, 1473, 1432, 1473, 246, 257, true, "(documents, images, authors, tables, etc)", "(documents, images, authors, tables, etc)"], ["expression", "common", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541487324, 9094674364011169527, 18446744073709551615, 18446744073709551615, 1049, 1053, 1049, 1053, 171, 172, true, "eg", "e.g."], ["expression", "word-concatenation", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12555128312158075374, 3585475568588858575, 18446744073709551615, 18446744073709551615, 1064, 1077, 1064, 1077, 175, 176, true, "phase-diagram", "phase-diagram"], ["expression", "wtoken-concatenation", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9623123605532099037, 12825981064550354106, 18446744073709551615, 18446744073709551615, 79, 96, 79, 96, 13, 14, true, "circulation^{1}", "circulation$^{1}$"], ["expression", "wtoken-concatenation", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9653568957037915764, 1159839439008018639, 18446744073709551615, 18446744073709551615, 863, 882, 863, 882, 138, 139, true, "exponentially^{2}", "exponentially$^{2}$"], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 17192639608086865650, 10639035648049775025, 18446744073709551615, 18446744073709551615, 0, 97, 0, 97, 0, 15, true, "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$.", "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9088786707146406857, 17567093053494849836, 18446744073709551615, 18446744073709551615, 98, 252, 98, 252, 15, 41, true, "These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery.", "These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9645560666016248506, 8355944213796053339, 18446744073709551615, 18446744073709551615, 253, 359, 253, 359, 41, 59, true, "It is needless to say that valuable qualitative and quantitative information is contained in many of them.", "It is needless to say that valuable qualitative and quantitative information is contained in many of them."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 17647932338360720997, 5716030233811874384, 18446744073709551615, 18446744073709551615, 360, 509, 360, 509, 59, 84, true, "However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout.", "However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15487015001052727581, 14484812293778889252, 18446744073709551615, 18446744073709551615, 510, 722, 510, 722, 84, 115, true, "Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery.", "Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 3574328216950930229, 4905315167294659186, 18446744073709551615, 18446744073709551615, 723, 883, 723, 883, 115, 140, true, "In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$.", "In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8347632587306657460, 16097912844310233617, 18446744073709551615, 18446744073709551615, 884, 988, 884, 988, 140, 160, true, "This poses a real problem, since more and more information published in the PDF documents is going dark.", "This poses a real problem, since more and more information published in the PDF documents is going dark."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 7315676043002615146, 3020292113144700597, 18446744073709551615, 18446744073709551615, 989, 1133, 989, 1133, 160, 187, true, "In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components.", "In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8292138896065382931, 17716571591104291388, 18446744073709551615, 18446744073709551615, 1134, 1345, 1134, 1345, 187, 228, true, "First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML.", "First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 18073096319598857596, 14789900833203243228, 18446744073709551615, 18446744073709551615, 1346, 1532, 1346, 1532, 228, 267, true, "Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context.", "Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context."], ["term", "enum-term-mark-1", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12322374974058800893, 6816531868111142674, 18446744073709551615, 18446744073709551615, 280, 329, 280, 329, 47, 52, true, "valuable qualitative and quantitative information", "valuable qualitative and quantitative information"], ["term", "enum-term-mark-4", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 11674491770136657522, 11680961660123138230, 18446744073709551615, 18446744073709551615, 1333, 1344, 1333, 1344, 224, 227, true, "JSON or XML", "JSON or XML"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 3693395590591757392, 2559252195012720165, 18446744073709551615, 18446744073709551615, 43, 65, 43, 65, 8, 11, true, "trillion PDF documents", "trillion PDF documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8414271082704541626, 9829432072489958078, 18446744073709551615, 18446744073709551615, 149, 163, 149, 163, 23, 25, true, "annual reports", "annual reports"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 3282133738476528713, 6601164231648618886, 18446744073709551615, 18446744073709551615, 193, 208, 193, 208, 32, 34, true, "research papers", "research papers"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 1102904554370006265, 13125714652652128474, 18446744073709551615, 18446744073709551615, 222, 251, 222, 251, 37, 40, true, "specific scientific discovery", "specific scientific discovery"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 7668210657519556598, 8800539397108400539, 18446744073709551615, 18446744073709551615, 305, 329, 305, 329, 50, 52, true, "quantitative information", "quantitative information"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 13935212089545515210, 4563100627799985741, 18446744073709551615, 18446744073709551615, 431, 452, 431, 452, 73, 75, true, "printing instructions", "printing instructions"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16582444977748815769, 16919788927196448661, 18446744073709551615, 18446744073709551615, 486, 508, 486, 508, 80, 83, true, "pleasing visual layout", "pleasing visual layout"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 4929058514881842733, 10224787839479118537, 18446744073709551615, 18446744073709551615, 519, 538, 519, 538, 86, 88, true, "data representation", "data representation"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14929125759175486455, 13997854025989108072, 18446744073709551615, 18446744073709551615, 547, 567, 547, 567, 90, 92, true, "enormous variability", "enormous variability"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 5746783959074166208, 15517192707477599154, 18446744073709551615, 18446744073709551615, 635, 649, 635, 649, 102, 104, true, "access content", "access content"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 2730405582718102128, 15726970596030809890, 18446744073709551615, 18446744073709551615, 702, 721, 702, 721, 112, 114, true, "knowledge discovery", "knowledge discovery"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16813764953769919795, 4260210876529689133, 18446744073709551615, 18446744073709551615, 742, 764, 742, 764, 119, 122, true, "sheer current quantity", "sheer current quantity"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16688986026552560644, 17177901629424753408, 18446744073709551615, 18446744073709551615, 783, 798, 783, 798, 126, 128, true, "submission rate", "submission rate"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12621877848489179259, 15237617635766653290, 18446744073709551615, 18446744073709551615, 829, 846, 829, 846, 133, 135, true, "scientific domain", "scientific domain"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 5853227681134087829, 1787086050256320443, 18446744073709551615, 18446744073709551615, 897, 909, 897, 909, 143, 145, true, "real problem", "real problem"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12653831733608918357, 6140974263001666382, 18446744073709551615, 18446744073709551615, 960, 973, 960, 973, 154, 156, true, "PDF documents", "PDF documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 10167329824705672383, 12701577379507576649, 18446744073709551615, 18446744073709551615, 1081, 1093, 1081, 1093, 177, 179, true, "material XYZ", "material XYZ"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 1602384110795404989, 18168403198260411892, 18446744073709551615, 18446744073709551615, 1206, 1216, 1206, 1216, 202, 204, true, "PDF format", "PDF format"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12595883072252114156, 7039273758002805758, 18446744073709551615, 18446744073709551615, 1232, 1245, 1232, 1245, 207, 209, true, "prevalent one", "prevalent one"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 4066887494406769292, 14849727204374093143, 18446744073709551615, 18446744073709551615, 1278, 1299, 1278, 1299, 215, 218, true, "structured data files", "structured data files"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14630472899120924944, 15550065915551638064, 18446744073709551615, 18446744073709551615, 1307, 1324, 1307, 1324, 220, 222, true, "structured format", "structured format"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 13018076357583391135, 18265178771346204830, 18446744073709551615, 18446744073709551615, 1365, 1377, 1365, 1377, 233, 235, true, "query engine", "query engine"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 11805624357079379862, 2927818536118337064, 18446744073709551615, 18446744073709551615, 1406, 1419, 1406, 1419, 242, 244, true, "large variety", "large variety"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9623123605532099037, 12825981064550354106, 18446744073709551615, 18446744073709551615, 79, 96, 79, 96, 13, 14, true, "circulation^{1}", "circulation$^{1}$"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950143797819, 18446744073709551615, 18446744073709551615, 104, 113, 104, 113, 16, 17, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106464587474035829, 6502274748172348363, 18446744073709551615, 18446744073709551615, 125, 132, 125, 132, 19, 20, true, "manuals", "manuals"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15361659830789508523, 8413399544610388116, 18446744073709551615, 18446744073709551615, 137, 147, 137, 147, 21, 22, true, "appliances", "appliances"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 5947879506556567994, 16771512443857485166, 18446744073709551615, 18446744073709551615, 167, 176, 167, 176, 26, 27, true, "companies", "companies"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415895525628, 14794601526936094944, 18446744073709551615, 18446744073709551615, 186, 189, 186, 189, 30, 31, true, "way", "way"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106398484416916345, 17530806449434366453, 18446744073709551615, 18446744073709551615, 369, 376, 369, 376, 61, 62, true, "content", "content"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415896289890, 14799990756781414830, 18446744073709551615, 18446744073709551615, 388, 391, 388, 391, 64, 65, true, "PDF", "PDF"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206597113188775, 13905938768963750102, 18446744073709551615, 18446744073709551615, 402, 408, 402, 408, 68, 69, true, "nature", "nature"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106478700233620678, 8336023496233777462, 18446744073709551615, 18446744073709551615, 420, 427, 420, 427, 71, 72, true, "streams", "streams"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106342461491420046, 4172004388378103877, 18446744073709551615, 18446744073709551615, 571, 578, 571, 578, 93, 94, true, "layouts", "layouts"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950143438881, 18446744073709551615, 18446744073709551615, 592, 601, 592, 601, 96, 97, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 11387678566946341343, 4163904415113468966, 18446744073709551615, 18446744073709551615, 674, 688, 674, 688, 109, 110, true, "representation", "representation"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14650447861280948245, 18066875144210692331, 18446744073709551615, 18446744073709551615, 726, 734, 726, 734, 116, 117, true, "addition", "addition"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142901645, 18446744073709551615, 18446744073709551615, 768, 777, 768, 777, 123, 124, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142902450, 18446744073709551615, 18446744073709551615, 812, 821, 812, 821, 130, 131, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9653568957037915764, 1159839439008018639, 18446744073709551615, 18446744073709551615, 863, 882, 863, 882, 138, 139, true, "exponentially^{2}", "exponentially$^{2}$"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14388065630035882329, 2686196032102535307, 18446744073709551615, 18446744073709551615, 931, 942, 931, 942, 150, 151, true, "information", "information"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 329104161571401725, 8768421271667196313, 18446744073709551615, 18446744073709551615, 992, 997, 992, 997, 161, 162, true, "order", "order"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106398484416916345, 17530806449433194901, 18446744073709551615, 18446744073709551615, 1010, 1017, 1010, 1017, 165, 166, true, "content", "content"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142885131, 18446744073709551615, 18446744073709551615, 1027, 1036, 1027, 1036, 168, 169, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541487324, 9094674364011169527, 18446744073709551615, 18446744073709551615, 1049, 1053, 1049, 1053, 171, 172, true, "eg", "e.g."], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12555128312158075374, 3585475568588858575, 18446744073709551615, 18446744073709551615, 1064, 1077, 1064, 1077, 175, 176, true, "phase-diagram", "phase-diagram"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 2703018952916355661, 10279229622173728080, 18446744073709551615, 18446744073709551615, 1122, 1132, 1122, 1132, 185, 186, true, "components", "components"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142942051, 18446744073709551615, 18446744073709551615, 1160, 1169, 1160, 1169, 193, 194, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106478777441543540, 773597955729195721, 18446744073709551615, 18446744073709551615, 1177, 1184, 1177, 1184, 196, 197, true, "variety", "variety"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106397728035763965, 11508792142722132367, 18446744073709551615, 18446744073709551615, 1188, 1195, 1188, 1195, 198, 199, true, "formats", "formats"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142865755, 18446744073709551615, 18446744073709551615, 1265, 1274, 1265, 1274, 213, 214, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625541450799, 476546803986815687, 18446744073709551615, 18446744073709551615, 1333, 1337, 1333, 1337, 224, 225, true, "JSON", "JSON"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415895541463, 14794406103722084656, 18446744073709551615, 18446744073709551615, 1341, 1344, 1341, 1344, 226, 227, true, "XML", "XML"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14652282388618227426, 14047491818249905874, 18446744073709551615, 18446744073709551615, 1423, 1431, 1423, 1431, 245, 246, true, "concepts", "concepts"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142859841, 18446744073709551615, 18446744073709551615, 1433, 1442, 1433, 1442, 247, 248, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206560620045048, 15910167584621803731, 18446744073709551615, 18446744073709551615, 1444, 1450, 1444, 1450, 249, 250, true, "images", "images"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106397759446161562, 17038239979594063466, 18446744073709551615, 18446744073709551615, 1452, 1459, 1452, 1459, 251, 252, true, "authors", "authors"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206513098478539, 8569522873910347573, 18446744073709551615, 18446744073709551615, 1461, 1467, 1461, 1467, 253, 254, true, "tables", "tables"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142863448, 18446744073709551615, 18446744073709551615, 1495, 1504, 1495, 1504, 260, 261, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106398484416909789, 17530798545720977035, 18446744073709551615, 18446744073709551615, 1524, 1531, 1524, 1531, 265, 266, true, "context", "context"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 17551793109234931072, 9841315996119329650, 18446744073709551615, 18446744073709551615, 3, 15, 3, 15, 1, 3, true, "is estimated", "is estimated"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 696181546770410912, 10657444457642612809, 18446744073709551615, 18446744073709551615, 27, 38, 27, 38, 5, 7, true, "are roughly", "are roughly"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 17466643417440400812, 16357177041782037840, 18446744073709551615, 18446744073709551615, 330, 342, 330, 342, 52, 54, true, "is contained", "is contained"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15984679469930005672, 16512266362137627548, 18446744073709551615, 18446744073709551615, 409, 419, 409, 419, 69, 71, true, "reduced to", "reduced to"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9871239675677535701, 8810214565092963488, 18446744073709551615, 18446744073709551615, 453, 483, 453, 483, 75, 79, true, "purposed to faithfully present", "purposed to faithfully present"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14133501046094794901, 4250240326135716646, 18446744073709551615, 18446744073709551615, 620, 634, 620, 634, 100, 102, true, "challenging to", "challenging to"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 18329554120394908623, 17010976290898309846, 18446744073709551615, 18446744073709551615, 847, 862, 847, 862, 135, 138, true, "is also growing", "is also growing"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14637952034068646347, 9688733531448391553, 18446744073709551615, 18446744073709551615, 974, 982, 974, 982, 156, 158, true, "is going", "is going"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14364253828417975278, 8778810672464165894, 18446744073709551615, 18446744073709551615, 1100, 1117, 1100, 1117, 182, 184, true, "needs essentially", "needs essentially"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16971139354256206394, 8359549146932405741, 18446744073709551615, 18446744073709551615, 1145, 1159, 1145, 1159, 190, 193, true, "need to ingest", "need to ingest"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 329104161634702433, 8726454204599928234, 18446744073709551615, 18446744073709551615, 114, 119, 114, 119, 17, 18, true, "range", "range"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6180169263126451304, 4214562769527423312, 18446744073709551615, 18446744073709551615, 210, 219, 210, 219, 35, 36, true, "detailing", "detailing"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486535, 9094674367324716363, 18446744073709551615, 18446744073709551615, 256, 258, 256, 258, 42, 43, true, "is", "is"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415895645562, 14799989741446549720, 18446744073709551615, 18446744073709551615, 271, 274, 271, 274, 45, 46, true, "say", "say"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106397531449655911, 14632270885483087688, 18446744073709551615, 18446744073709551615, 377, 384, 377, 384, 62, 63, true, "encoded", "encoded"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486535, 9094674367324724925, 18446744073709551615, 18446744073709551615, 392, 394, 392, 394, 65, 66, true, "is", "is"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625618412480, 541954499163841946, 18446744073709551615, 18446744073709551615, 602, 606, 602, 606, 97, 98, true, "make", "make"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 3503810711254267897, 327944184510617093, 18446744073709551615, 18446744073709551615, 654, 663, 654, 663, 105, 106, true, "transform", "transform"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106397529675133622, 9265134128656073394, 18446744073709551615, 18446744073709551615, 694, 701, 694, 701, 111, 112, true, "enables", "enables"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6185033796712833759, 8158902570488040634, 18446744073709551615, 18446744073709551615, 802, 811, 802, 811, 129, 130, true, "published", "published"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 329104161594697075, 8726414758277463017, 18446744073709551615, 18446744073709551615, 889, 894, 889, 894, 141, 142, true, "poses", "poses"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6185033796712833759, 8158902570488066017, 18446744073709551615, 18446744073709551615, 943, 952, 943, 952, 151, 152, true, "published", "published"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625618412480, 541954499163850098, 18446744073709551615, 18446744073709551615, 1001, 1005, 1001, 1005, 163, 164, true, "make", "make"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625697824147, 497671517323247955, 18446744073709551615, 18446744073709551615, 1054, 1058, 1054, 1058, 172, 173, true, "find", "find"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 329104159301007417, 8863033603552468338, 18446744073709551615, 18446744073709551615, 1217, 1222, 1217, 1222, 204, 205, true, "being", "being"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106398484416229602, 17530813820733868718, 18446744073709551615, 18446744073709551615, 1251, 1258, 1251, 1258, 211, 212, true, "convert", "convert"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625621532398, 554816074249930520, 18446744073709551615, 18446744073709551615, 1358, 1362, 1358, 1362, 231, 232, true, "need", "need"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486535, 9094674367323996407, 18446744073709551615, 18446744073709551615, 1383, 1385, 1383, 1385, 236, 237, true, "is", "is"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625696287852, 497722139527509467, 18446744073709551615, 18446744073709551615, 1394, 1398, 1394, 1398, 239, 240, true, "deal", "deal"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6168374324562720592, 185665609222125727, 18446744073709551615, 18446744073709551615, 1474, 1483, 1474, 1483, 257, 258, true, "extracted", "extracted"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415895640485, 14799993819747716499, 18446744073709551615, 18446744073709551615, 1509, 1512, 1509, 1512, 262, 263, true, "put", "put"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106464587478437030, 6502547855313104919, 18446744073709551615, 18446744073709551615, 346, 353, 346, 353, 55, 57, true, "many of", "many of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106478685702231057, 1428751967817183488, 18446744073709551615, 18446744073709551615, 1325, 1332, 1325, 1332, 222, 224, true, "such as", "such as"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625631229034, 542250596720887578, 18446744073709551615, 18446744073709551615, 16, 20, 16, 20, 3, 4, true, "that", "that"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486538, 9094674367373732264, 18446744073709551615, 18446744073709551615, 76, 78, 76, 78, 12, 13, true, "in", "in"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625697843734, 497670111222755023, 18446744073709551615, 18446744073709551615, 120, 124, 120, 124, 18, 19, true, "from", "from"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415895625940, 14799992967704466108, 18446744073709551615, 18446744073709551615, 133, 136, 133, 136, 20, 21, true, "for", "for"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219234676, 18446744073709551615, 18446744073709551615, 164, 166, 164, 166, 25, 26, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625631229034, 542250596720775319, 18446744073709551615, 18446744073709551615, 275, 279, 275, 279, 46, 47, true, "that", "that"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486538, 9094674367373513839, 18446744073709551615, 18446744073709551615, 343, 345, 343, 345, 54, 55, true, "in", "in"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486538, 9094674367373523345, 18446744073709551615, 18446744073709551615, 385, 387, 385, 387, 63, 64, true, "in", "in"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486989, 9094674356673776478, 18446744073709551615, 18446744073709551615, 395, 397, 395, 397, 66, 67, true, "by", "by"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219219846, 18446744073709551615, 18446744073709551615, 428, 430, 428, 430, 72, 73, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219227273, 18446744073709551615, 18446744073709551615, 568, 570, 568, 570, 92, 93, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14154242830791309661, 1004085954587590076, 18446744073709551615, 18446744073709551615, 579, 591, 579, 591, 94, 96, true, "across these", "across these"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206560517276114, 15945165859804744982, 18446744073709551615, 18446744073709551615, 667, 673, 667, 673, 107, 109, true, "into a", "into a"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541480354, 9094674546964354786, 18446744073709551615, 18446744073709551615, 723, 725, 723, 725, 115, 116, true, "In", "In"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219198352, 18446744073709551615, 18446744073709551615, 765, 767, 765, 767, 122, 123, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219210972, 18446744073709551615, 18446744073709551615, 799, 801, 799, 801, 128, 129, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206560518651853, 15945529371230903899, 18446744073709551615, 18446744073709551615, 822, 828, 822, 828, 131, 133, true, "in the", "in the"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 329104161786618045, 8725299555592485331, 18446744073709551615, 18446744073709551615, 911, 916, 911, 916, 146, 147, true, "since", "since"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206560518651853, 15945529371230859398, 18446744073709551615, 18446744073709551615, 953, 959, 953, 959, 152, 154, true, "in the", "in the"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541480354, 9094674546964371620, 18446744073709551615, 18446744073709551615, 989, 991, 989, 991, 160, 161, true, "In", "In"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14814148868025447689, 3694567760357366516, 18446744073709551615, 18446744073709551615, 1018, 1026, 1018, 1026, 166, 168, true, "of these", "of these"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219324564, 18446744073709551615, 18446744073709551615, 1078, 1080, 1078, 1080, 176, 177, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206549292198744, 15968280101146838290, 18446744073709551615, 18446744073709551615, 1170, 1176, 1170, 1176, 194, 196, true, "from a", "from a"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219318707, 18446744073709551615, 18446744073709551615, 1185, 1187, 1185, 1187, 197, 198, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14638857868319795209, 3807143954092066612, 18446744073709551615, 18446744073709551615, 1197, 1205, 1197, 1205, 200, 202, true, "with the", "with the"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206557726458966, 16025464328456092215, 18446744073709551615, 18446744073709551615, 1300, 1306, 1300, 1306, 218, 220, true, "with a", "with a"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206557726458966, 16025464328456099242, 18446744073709551615, 18446744073709551615, 1399, 1405, 1399, 1405, 240, 242, true, "with a", "with a"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219303614, 18446744073709551615, 18446744073709551615, 1420, 1422, 1420, 1422, 244, 245, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16057368201763467386, 216739275376297295, 18446744073709551615, 18446744073709551615, 1484, 1494, 1484, 1494, 258, 260, true, "from these", "from these"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 5748787292106066554, 4405126515520980867, 18446744073709551615, 18446744073709551615, 1513, 1523, 1513, 1523, 263, 265, true, "these into", "these into"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429163415, 18446744073709551615, 18446744073709551615, 190, 192, 190, 192, 31, 32, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429174755, 18446744073709551615, 18446744073709551615, 268, 270, 268, 270, 44, 45, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429173582, 18446744073709551615, 18446744073709551615, 417, 419, 417, 419, 70, 71, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429146067, 18446744073709551615, 18446744073709551615, 462, 464, 462, 464, 76, 77, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429194340, 18446744073709551615, 18446744073709551615, 632, 634, 632, 634, 101, 102, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206519425733256, 5984372374891954420, 18446744073709551615, 18446744073709551615, 735, 741, 735, 741, 117, 119, true, "to the", "to the"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429185213, 18446744073709551615, 18446744073709551615, 998, 1000, 998, 1000, 162, 163, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429226599, 18446744073709551615, 18446744073709551615, 1150, 1152, 1150, 1152, 191, 192, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429235584, 18446744073709551615, 18446744073709551615, 1275, 1277, 1275, 1277, 214, 215, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429209693, 18446744073709551615, 18446744073709551615, 1391, 1393, 1391, 1393, 238, 239, true, "to", "to"], ["parenthesis", "round brackets", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 329104053210116957, 4933919093561563747, 18446744073709551615, 18446744073709551615, 295, 300, 295, 300, 52, 55, true, "(CCS)", "(CCS)"], ["sentence", "", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 5306542014856411002, 14493189109864111156, 18446744073709551615, 18446744073709551615, 0, 132, 0, 132, 0, 24, true, "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files.", "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files."], ["sentence", "", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 2369217517028793827, 11890147189063173430, 18446744073709551615, 18446744073709551615, 133, 246, 133, 246, 24, 45, true, "The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms.", "The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms."], ["sentence", "", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 12064124790943537514, 9632597734224986436, 18446744073709551615, 18446744073709551615, 247, 375, 247, 375, 45, 69, true, "This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components.", "This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components."], ["sentence", "", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 1805453063572196406, 15284543814810892665, 18446744073709551615, 18446744073709551615, 376, 440, 376, 440, 69, 82, true, "Each of these microservices can be consumed by its own REST API.", "Each of these microservices can be consumed by its own REST API."], ["sentence", "", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 210366145485171616, 10779316463372138244, 18446744073709551615, 18446744073709551615, 441, 606, 441, 606, 82, 109, true, "This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform.", "This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform."], ["sentence", "", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 13863701154380798624, 5607686807400793153, 18446744073709551615, 18446744073709551615, 607, 891, 607, 891, 109, 153, true, "In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures.", "In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures."], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 3741141293805179509, 9675794815446093236, 18446744073709551615, 18446744073709551615, 40, 55, 40, 55, 9, 11, true, "first component", "first component"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 4066887494406769292, 15944572553884562120, 18446744073709551615, 18446744073709551615, 110, 131, 110, 131, 20, 23, true, "structured data files", "structured data files"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15684933964106580812, 12993940953903139083, 18446744073709551615, 18446744073709551615, 208, 225, 208, 225, 40, 42, true, "trainable machine", "trainable machine"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 12638008641667971393, 14590037144173376663, 18446744073709551615, 18446744073709551615, 269, 294, 269, 294, 49, 52, true, "Corpus Conversion Service", "Corpus Conversion Service"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 3812062755894317903, 5752895239615977865, 18446744073709551615, 18446744073709551615, 359, 374, 359, 374, 66, 68, true, "main components", "main components"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 7904009099850099728, 6069321302342300412, 18446744073709551615, 18446744073709551615, 427, 439, 427, 439, 78, 81, true, "own REST API", "own REST API"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14315066823203278267, 5715163301899035549, 18446744073709551615, 18446744073709551615, 483, 500, 483, 500, 90, 92, true, "complex pipelines", "complex pipelines"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 7501920923775581134, 5285457240038734782, 18446744073709551615, 18446744073709551615, 567, 584, 567, 584, 103, 105, true, "new microservices", "new microservices"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 9920918086675479799, 11129371561875838665, 18446744073709551615, 18446744073709551615, 689, 725, 689, 725, 122, 125, true, "asynchronous communication protocols", "asynchronous communication protocols"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 17000938524684089439, 12283057491291530260, 18446744073709551615, 18446744073709551615, 742, 755, 742, 755, 129, 131, true, "many benefits", "many benefits"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 4253886245479866309, 90465651070093109, 18446744073709551615, 18446744073709551615, 773, 799, 773, 799, 136, 139, true, "proper resource management", "proper resource management"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 17671651082391847352, 4285231550406356710, 18446744073709551615, 18446744073709551615, 812, 831, 812, 831, 141, 143, true, "strong dependencies", "strong dependencies"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 239702429653970881, 11301722290661797635, 18446744073709551615, 18446744073709551615, 870, 890, 870, 890, 149, 152, true, "single task failures", "single task failures"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 329104161668023890, 13427899720650205831, 18446744073709551615, 18446744073709551615, 8, 13, 8, 13, 2, 3, true, "paper", "paper"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 6182654480499682241, 9496359210917921791, 18446744073709551615, 18446744073709551615, 61, 70, 61, 70, 13, 14, true, "ingestion", "ingestion"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 6167933651658664291, 16598695715373476800, 18446744073709551615, 18446744073709551615, 74, 83, 74, 83, 15, 16, true, "documents", "documents"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 2703018679320364082, 14545924726949564279, 18446744073709551615, 18446744073709551615, 94, 104, 94, 104, 18, 19, true, "conversion", "conversion"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14635106751859230946, 3899039667786064358, 18446744073709551615, 18446744073709551615, 137, 145, 137, 145, 25, 26, true, "solution", "solution"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14814125365076808131, 9349977279496653565, 18446744073709551615, 18446744073709551615, 176, 184, 176, 184, 33, 34, true, "platform", "platform"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 389609625695918821, 3664761358525422290, 18446744073709551615, 18446744073709551615, 199, 203, 199, 203, 38, 39, true, "core", "core"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15359670209433732834, 1709633722429132795, 18446744073709551615, 18446744073709551615, 235, 245, 235, 245, 43, 44, true, "algorithms", "algorithms"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14814125365076808131, 9349977279496707806, 18446744073709551615, 18446744073709551615, 252, 260, 252, 260, 46, 47, true, "platform", "platform"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 12178341415896221596, 5842744026410738636, 18446744073709551615, 18446744073709551615, 296, 299, 296, 299, 53, 54, true, "CCS", "CCS"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 12178341415895638602, 5842694294408079134, 18446744073709551615, 18446744073709551615, 320, 323, 320, 323, 60, 61, true, "set", "set"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 990358581043194791, 393905999985964694, 18446744073709551615, 18446744073709551615, 327, 340, 327, 340, 62, 63, true, "microservices", "microservices"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 990358581043194791, 393905999985936006, 18446744073709551615, 18446744073709551615, 390, 403, 390, 403, 72, 73, true, "microservices", "microservices"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14650448032998792781, 15963759494992376767, 18446744073709551615, 18446744073709551615, 446, 454, 446, 454, 83, 84, true, "approach", "approach"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 6167933651658664291, 16598695715374233051, 18446744073709551615, 18446744073709551615, 512, 521, 512, 521, 94, 95, true, "documents", "documents"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14814125365076808131, 9349977279496610029, 18446744073709551615, 18446744073709551615, 597, 605, 597, 605, 107, 108, true, "platform", "platform"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 329104161571401725, 13426123714444340915, 18446744073709551615, 18446744073709551615, 610, 615, 610, 615, 110, 111, true, "order", "order"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14814125365076808131, 9349977279496698149, 18446744073709551615, 18446744073709551615, 629, 637, 629, 637, 114, 115, true, "platform", "platform"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 990358581043194791, 393905999985528944, 18446744073709551615, 18446744073709551615, 652, 665, 652, 665, 118, 119, true, "microservices", "microservices"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14814125365076808131, 9349977279496695017, 18446744073709551615, 18446744073709551615, 846, 854, 846, 854, 146, 147, true, "platform", "platform"], ["verb", "compound-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 8568388710680918302, 1832540720065690143, 18446744073709551615, 18446744073709551615, 18, 32, 18, 32, 5, 7, true, "focus entirely", "focus entirely"], ["verb", "compound-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 5237537207757377628, 6864205941272212007, 18446744073709551615, 18446744073709551615, 149, 167, 149, 167, 27, 30, true, "propose is thought", "propose is thought"], ["verb", "compound-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15903921305565697154, 7448795222128154927, 18446744073709551615, 18446744073709551615, 404, 419, 404, 419, 73, 76, true, "can be consumed", "can be consumed"], ["verb", "compound-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 8944903948136983007, 3100279804263702344, 18446744073709551615, 18446744073709551615, 666, 680, 666, 680, 119, 121, true, "are integrated", "are integrated"], ["verb", "compound-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 7780068026497460305, 562602692899396130, 18446744073709551615, 18446744073709551615, 760, 772, 760, 772, 133, 136, true, "allows to do", "allows to do"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 12178341415895601584, 5841349058796574805, 18446744073709551615, 18446744073709551615, 204, 207, 204, 207, 39, 40, true, "has", "has"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14639581097006750428, 4101766079705362430, 18446744073709551615, 18446744073709551615, 226, 234, 226, 234, 42, 43, true, "learning", "learning"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 16381206563350835754, 15338244529159273971, 18446744073709551615, 18446744073709551615, 262, 268, 262, 268, 48, 49, true, "called", "called"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14652282307475037790, 8362404979343840295, 18446744073709551615, 18446744073709551615, 302, 310, 302, 310, 56, 57, true, "consists", "consists"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 6167774653473311671, 8932714637044289580, 18446744073709551615, 18446744073709551615, 341, 350, 341, 350, 63, 64, true, "organized", "organized"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 16381206569317834029, 15127822949531520780, 18446744073709551615, 18446744073709551615, 464, 470, 464, 470, 86, 87, true, "allows", "allows"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 329104159303279946, 13502145352581782916, 18446744073709551615, 18446744073709551615, 477, 482, 477, 482, 89, 90, true, "build", "build"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 8106476000254393164, 1725287517912256023, 18446744073709551615, 18446744073709551615, 504, 511, 504, 511, 93, 94, true, "process", "process"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 16381206569317834029, 15127822949531294179, 18446744073709551615, 18446744073709551615, 546, 552, 546, 552, 99, 100, true, "allows", "allows"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 8106396517344986388, 5854485364096172979, 18446744073709551615, 18446744073709551615, 559, 566, 559, 566, 102, 103, true, "develop", "develop"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 389609625618412480, 3672855485569275414, 18446744073709551615, 18446744073709551615, 619, 623, 619, 623, 112, 113, true, "make", "make"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 329104159209890617, 13606843864069204390, 18446744073709551615, 18446744073709551615, 733, 738, 733, 738, 127, 128, true, "gives", "gives"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 5305301449677211216, 8681985492456152514, 18446744073709551615, 18446744073709551615, 801, 811, 801, 811, 140, 141, true, "eliminates", "eliminates"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 329104161505838030, 13472448784809337111, 18446744073709551615, 18446744073709551615, 836, 841, 836, 841, 144, 145, true, "makes", "makes"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 1993790582685692910, 3267300742396852093, 18446744073709551615, 18446744073709551615, 855, 869, 855, 869, 147, 149, true, "robust against", "robust against"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 8106396862006371970, 13009000795262405678, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 2, true, "In this", "In this"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 16381206566339127348, 15334506191466791715, 18446744073709551615, 18446744073709551615, 33, 39, 33, 39, 7, 9, true, "on the", "on the"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485670, 4857876500911665887, 18446744073709551615, 18446744073709551615, 71, 73, 71, 73, 14, 15, true, "of", "of"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 389609625698622943, 3653991554605439637, 18446744073709551615, 18446744073709551615, 105, 109, 105, 109, 19, 20, true, "into", "into"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485670, 4857876500911708855, 18446744073709551615, 18446744073709551615, 168, 170, 168, 170, 30, 31, true, "of", "of"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 389609625700764258, 3654402694655081504, 18446744073709551615, 18446744073709551615, 171, 175, 171, 175, 31, 33, true, "as a", "as a"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14638855195670894879, 12124056112419286236, 18446744073709551615, 18446744073709551615, 186, 194, 186, 194, 35, 37, true, "which at", "which at"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 12178341415895623120, 5842693827432037020, 18446744073709551615, 18446744073709551615, 311, 314, 311, 314, 57, 58, true, "out", "out"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 389609625620237736, 3672771697496836670, 18446744073709551615, 18446744073709551615, 315, 319, 315, 319, 58, 60, true, "of a", "of a"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485670, 4857876500911649386, 18446744073709551615, 18446744073709551615, 324, 326, 324, 326, 61, 62, true, "of", "of"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541486538, 4857876073127839401, 18446744073709551615, 18446744073709551615, 351, 353, 351, 353, 64, 65, true, "in", "in"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 13852121904094090198, 14590995273314953312, 18446744073709551615, 18446744073709551615, 376, 389, 376, 389, 69, 72, true, "Each of these", "Each of these"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541486989, 4857876114906482442, 18446744073709551615, 18446744073709551615, 420, 422, 420, 422, 76, 77, true, "by", "by"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 752127337293867046, 13713074507145666172, 18446744073709551615, 18446744073709551615, 585, 596, 585, 596, 105, 107, true, "against the", "against the"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541480354, 4857876037199396344, 18446744073709551615, 18446744073709551615, 607, 609, 607, 609, 109, 110, true, "In", "In"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 8106478041484051995, 2311188108209868134, 18446744073709551615, 18446744073709551615, 681, 688, 681, 688, 121, 122, true, "through", "through"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485865, 4857876500092540787, 18446744073709551615, 18446744073709551615, 474, 476, 474, 476, 88, 89, true, "to", "to"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485865, 4857876500092539243, 18446744073709551615, 18446744073709551615, 501, 503, 501, 503, 92, 93, true, "to", "to"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485865, 4857876500092543847, 18446744073709551615, 18446744073709551615, 556, 558, 556, 558, 101, 102, true, "to", "to"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485865, 4857876500092547312, 18446744073709551615, 18446744073709551615, 616, 618, 616, 618, 111, 112, true, "to", "to"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485865, 4857876500092426670, 18446744073709551615, 18446744073709551615, 767, 769, 767, 769, 134, 135, true, "to", "to"], ["numval", "ival", 3409470577915009676, "TEXT", "#/texts/13", 1.0, 17767354399704235162, 16337218082829608086, 18446744073709551615, 18446744073709551615, 142, 143, 142, 143, 27, 28, true, "2", "2"], ["expression", "word-concatenation", 3409470577915009676, "TEXT", "#/texts/13", 1.0, 5044385734724420019, 14795950652192688492, 18446744073709551615, 18446744073709551615, 175, 191, 175, 191, 34, 35, true, "state-of-the-art", "state-of-the-art"], ["numval", "ival", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 17767354399704235163, 1719697440342653142, 18446744073709551615, 18446744073709551615, 33, 34, 33, 34, 5, 6, true, "3", "3"], ["numval", "ival", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 17767354399704235156, 1719697438695412307, 18446744073709551615, 18446744073709551615, 105, 106, 105, 106, 20, 21, true, "4", "4"], ["numval", "ival", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 17767354399704235157, 1719697440128552642, 18446744073709551615, 18446744073709551615, 301, 302, 301, 302, 58, 59, true, "5", "5"], ["parenthesis", "round brackets", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 12960504640524214008, 7549890404163577655, 18446744073709551615, 18446744073709551615, 216, 243, 216, 243, 41, 48, true, "(both in users and content)", "(both in users and content)"], ["expression", "wtoken-concatenation", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 329104161622136223, 9304407318657891408, 18446744073709551615, 18446744073709551615, 334, 339, 334, 339, 65, 66, true, "w.r.t", "w.r.t"], ["sentence", "", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 13562905925698502846, 15259172910127558115, 18446744073709551615, 18446744073709551615, 22, 93, 22, 93, 3, 18, true, "In Section 3, we present the design of the platform and its components.", "In Section 3, we present the design of the platform and its components."], ["sentence", "", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 1592324435600311370, 6679784309178480570, 18446744073709551615, 18446744073709551615, 94, 280, 94, 280, 18, 54, true, "In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively.", "In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively."], ["sentence", "", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 18017856606572388707, 11119000415778134338, 18446744073709551615, 18446744073709551615, 281, 340, 281, 340, 54, 67, true, "Finally, in Section 5, we discuss the open questions w.r.t.", "Finally, in Section 5, we discuss the open questions w.r.t."], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 7362111305564357210, 14399545547382599450, 18446744073709551615, 18446744073709551615, 141, 159, 141, 159, 28, 30, true, "deployment methods", "deployment methods"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 14592782398836220527, 13536879568188234094, 18446744073709551615, 18446744073709551615, 178, 193, 178, 193, 35, 37, true, "platform scales", "platform scales"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 4421383392096991748, 6284876151106966992, 18446744073709551615, 18446744073709551615, 248, 265, 248, 265, 49, 51, true, "compute resources", "compute resources"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8051609034415273401, 10487247228020021805, 18446744073709551615, 18446744073709551615, 319, 333, 319, 333, 63, 65, true, "open questions", "open questions"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106352240078799135, 10120178145746215787, 18446744073709551615, 18446744073709551615, 25, 32, 25, 32, 4, 5, true, "Section", "Section"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 16381206568241679420, 8738387660838128289, 18446744073709551615, 18446744073709551615, 51, 57, 51, 57, 10, 11, true, "design", "design"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 14814125365076808131, 2092259178040575550, 18446744073709551615, 18446744073709551615, 65, 73, 65, 73, 13, 14, true, "platform", "platform"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 2703018952916355661, 11574998382432588793, 18446744073709551615, 18446744073709551615, 82, 92, 82, 92, 16, 17, true, "components", "components"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106352240078799135, 10120178145746219109, 18446744073709551615, 18446744073709551615, 97, 104, 97, 104, 19, 20, true, "Section", "Section"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 11899564443746965611, 8824822780498807299, 18446744073709551615, 18446744073709551615, 123, 135, 123, 135, 25, 26, true, "architecture", "architecture"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 16381206521526353544, 8079494218851857408, 18446744073709551615, 18446744073709551615, 199, 205, 199, 205, 38, 39, true, "regard", "regard"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 16381206519640398140, 414969871814550286, 18446744073709551615, 18446744073709551615, 209, 215, 209, 215, 40, 41, true, "volume", "volume"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 329104159157820437, 13127829657860064361, 18446744073709551615, 18446744073709551615, 225, 230, 225, 230, 44, 45, true, "users", "users"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106398484416916345, 11293518884131724477, 18446744073709551615, 18446744073709551615, 235, 242, 235, 242, 46, 47, true, "content", "content"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106352240078799135, 10120178145746003140, 18446744073709551615, 18446744073709551615, 293, 300, 293, 300, 57, 58, true, "Section", "Section"], ["verb", "single-verb", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106476016677076976, 9844196961628278464, 18446744073709551615, 18446744073709551615, 39, 46, 39, 46, 8, 9, true, "present", "present"], ["verb", "single-verb", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106397868479560363, 8170627791942563001, 18446744073709551615, 18446744073709551615, 111, 118, 111, 118, 23, 24, true, "discuss", "discuss"], ["verb", "single-verb", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106397868479560363, 8170627791941832362, 18446744073709551615, 18446744073709551615, 307, 314, 307, 314, 61, 62, true, "discuss", "discuss"], ["verb", "single-verb", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 329104161622136223, 9304407318657891408, 18446744073709551615, 18446744073709551615, 334, 339, 334, 339, 65, 66, true, "w.r.t", "w.r.t"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 15441160910541480354, 13110915667349394507, 18446744073709551615, 18446744073709551615, 22, 24, 22, 24, 3, 4, true, "In", "In"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 16381206565712212855, 8774362010989401403, 18446744073709551615, 18446744073709551615, 58, 64, 58, 64, 11, 13, true, "of the", "of the"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 15441160910541480354, 13110915667349571689, 18446744073709551615, 18446744073709551615, 94, 96, 94, 96, 18, 19, true, "In", "In"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 389609625618037948, 4823273682945992581, 18446744073709551615, 18446744073709551615, 194, 198, 194, 198, 37, 38, true, "with", "with"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106396909821462677, 1088815676641410103, 18446744073709551615, 18446744073709551615, 217, 224, 217, 224, 42, 44, true, "both in", "both in"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 15441160910541486538, 13110916059597983243, 18446744073709551615, 18446744073709551615, 290, 292, 290, 292, 56, 57, true, "in", "in"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 15441160910541485865, 13110915963300809577, 18446744073709551615, 18446744073709551615, 206, 208, 206, 208, 39, 40, true, "to", "to"], ["numval", "ival", 697648145931166262, "TEXT", "#/texts/15", 1.0, 17767354399704235162, 7083995155582974975, 18446744073709551615, 18446744073709551615, 0, 1, 0, 1, 0, 1, true, "2", "2"], ["numval", "ival", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 17767354399704235163, 2552838057434759723, 18446744073709551615, 18446744073709551615, 130, 131, 130, 131, 20, 21, true, "3", "3"], ["numval", "ival", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 17767354399704235156, 2552838057671732941, 18446744073709551615, 18446744073709551615, 133, 134, 133, 134, 22, 23, true, "4", "4"], ["parenthesis", "square brackets", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206577288742091, 6894361769431189204, 18446744073709551615, 18446744073709551615, 129, 135, 129, 135, 19, 24, true, "[3, 4]", "[3, 4]"], ["expression", "common", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541486545, 16301782680726802891, 18446744073709551615, 18446744073709551615, 558, 562, 558, 562, 101, 102, true, "ie", "i.e."], ["expression", "word-concatenation", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14650469740546809126, 13134297167790756810, 18446744073709551615, 18446744073709551615, 741, 749, 741, 749, 135, 136, true, "JSON/XML", "JSON/XML"], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 7413762744011699502, 5112650843480238838, 18446744073709551615, 18446744073709551615, 0, 136, 0, 136, 0, 25, true, "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4].", "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4]."], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 5916877018351655351, 15887401132590714495, 18446744073709551615, 18446744073709551615, 137, 205, 137, 205, 25, 38, true, "Broadly speaking, there are two types of approaches to this problem.", "Broadly speaking, there are two types of approaches to this problem."], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 6270962906961324285, 7804853914853957296, 18446744073709551615, 18446744073709551615, 206, 359, 206, 359, 38, 66, true, "In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document.", "In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document."], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 1323125914001755357, 2708959011598697473, 18446744073709551615, 18446744073709551615, 360, 443, 360, 443, 66, 83, true, "This can be done through a conversion from PDF towards HTML or MS Word for example.", "This can be done through a conversion from PDF towards HTML or MS Word for example."], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 7959677268287021834, 6471587455066159969, 18446744073709551615, 18446744073709551615, 444, 711, 444, 711, 83, 128, true, "The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format.", "The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format."], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 9906268904976001851, 12420000417227776440, 18446744073709551615, 18446744073709551615, 712, 780, 712, 780, 128, 142, true, "For example, this could be a JSON/XML file with a particular schema.", "For example, this could be a JSON/XML file with a particular schema."], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 9067254065901696428, 10388894100200420496, 18446744073709551615, 18446744073709551615, 781, 955, 781, 955, 142, 173, true, "Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution.", "Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution."], ["term", "enum-term-mark-4", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 2074372556278321470, 3687797441781668801, 18446744073709551615, 18446744073709551615, 415, 430, 415, 430, 76, 80, true, "HTML or MS Word", "HTML or MS Word"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12653831733608918357, 1251885133784117773, 18446744073709551615, 18446744073709551615, 23, 36, 23, 36, 4, 6, true, "PDF documents", "PDF documents"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 1649772470814702484, 1849781250727403708, 18446744073709551615, 18446744073709551615, 41, 73, 41, 73, 7, 10, true, "automatic content reconstruction", "automatic content reconstruction"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 4649638595618642234, 17675128594551486840, 18446744073709551615, 18446744073709551615, 86, 105, 86, 105, 13, 15, true, "outstanding problem", "outstanding problem"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 9088977435888678827, 7025359603537163328, 18446744073709551615, 18446744073709551615, 213, 227, 213, 227, 40, 42, true, "first approach", "first approach"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 5396697874491186037, 9700463201577231321, 18446744073709551615, 18446744073709551615, 320, 342, 320, 342, 59, 62, true, "original visual layout", "original visual layout"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106471324341093100, 10896171766474086033, 18446744073709551615, 18446744073709551615, 423, 430, 423, 430, 78, 80, true, "MS Word", "MS Word"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 10632085908481842480, 3848207310545898370, 18446744073709551615, 18446744073709551615, 448, 472, 448, 472, 84, 87, true, "second approach attempts", "second approach attempts"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 11738704476441755021, 15052719376970997774, 18446744073709551615, 18446744073709551615, 670, 687, 670, 687, 121, 123, true, "original document", "original document"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14630472899120924944, 11642528133024722414, 18446744073709551615, 18446744073709551615, 693, 710, 693, 710, 125, 127, true, "structured format", "structured format"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 673611805924135293, 4470122145607424586, 18446744073709551615, 18446744073709551615, 741, 754, 741, 754, 135, 137, true, "JSON/XML file", "JSON/XML file"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 3982493928589580498, 8690888332062541868, 18446744073709551615, 18446744073709551615, 762, 779, 762, 779, 139, 141, true, "particular schema", "particular schema"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12638008641667971393, 2648999888003643003, 18446744073709551615, 18446744073709551615, 791, 816, 791, 816, 144, 147, true, "Corpus Conversion Service", "Corpus Conversion Service"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 5385563887835458888, 7655934123629815969, 18446744073709551615, 18446744073709551615, 836, 846, 836, 846, 152, 154, true, "first step", "first step"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 13157956405326233364, 1973865905648942248, 18446744073709551615, 18446744073709551615, 857, 885, 857, 885, 156, 159, true, "knowledge discovery platform", "knowledge discovery platform"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 2940970869648856259, 4641698687139622359, 18446744073709551615, 18446744073709551615, 923, 938, 923, 938, 167, 169, true, "second approach", "second approach"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625631210899, 11923225543122978149, 18446744073709551615, 18446744073709551615, 4, 8, 4, 8, 1, 2, true, "task", "task"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106396543030413423, 13754047564658723918, 18446744073709551615, 18446744073709551615, 121, 128, 121, 128, 18, 19, true, "decades", "decades"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 329104159243796903, 14986068250266130028, 18446744073709551615, 18446744073709551615, 169, 174, 169, 174, 31, 32, true, "types", "types"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15361660588616680195, 14902329689287095849, 18446744073709551615, 18446744073709551615, 178, 188, 178, 188, 33, 34, true, "approaches", "approaches"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106476000253296785, 13943577597598710603, 18446744073709551615, 18446744073709551615, 197, 204, 197, 204, 36, 37, true, "problem", "problem"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 6167933651658664291, 4335834381970813744, 18446744073709551615, 18446744073709551615, 229, 238, 229, 238, 43, 44, true, "documents", "documents"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625699055241, 11924235864545270440, 18446744073709551615, 18446744073709551615, 262, 266, 262, 266, 48, 49, true, "goal", "goal"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106398484416916345, 14750854907318105695, 18446744073709551615, 18446744073709551615, 284, 291, 284, 291, 52, 53, true, "content", "content"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14650401089286948001, 3940465185316459202, 18446744073709551615, 18446744073709551615, 350, 358, 350, 358, 64, 65, true, "document", "document"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 2703018679320364082, 9140199708408016140, 18446744073709551615, 18446744073709551615, 387, 397, 387, 397, 72, 73, true, "conversion", "conversion"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12178341415896289890, 5663892799429042575, 18446744073709551615, 18446744073709551615, 403, 406, 403, 406, 74, 75, true, "PDF", "PDF"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625536535062, 4886157403217231277, 18446744073709551615, 18446744073709551615, 415, 419, 415, 419, 76, 77, true, "HTML", "HTML"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106397496085150773, 18393269004923492619, 18446744073709551615, 18446744073709551615, 435, 442, 435, 442, 81, 82, true, "example", "example"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14650401089286948001, 3940465185316402512, 18446744073709551615, 18446744073709551615, 488, 496, 488, 496, 90, 91, true, "document", "document"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206548538896813, 5095866240506163040, 18446744073709551615, 18446744073709551615, 504, 510, 504, 510, 93, 94, true, "format", "format"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 11387678566946341343, 17509744267177528169, 18446744073709551615, 18446744073709551615, 565, 579, 565, 579, 103, 104, true, "representation", "representation"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14650401089286948001, 3940465185316385541, 18446744073709551615, 18446744073709551615, 587, 595, 587, 595, 106, 107, true, "document", "document"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206590620761857, 2909000255032340916, 18446744073709551615, 18446744073709551615, 624, 630, 624, 630, 112, 113, true, "layout", "layout"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106398484416916345, 14750854907318660619, 18446744073709551615, 18446744073709551615, 653, 660, 653, 660, 118, 119, true, "content", "content"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106397496085150773, 18393269004923502689, 18446744073709551615, 18446744073709551615, 716, 723, 716, 723, 129, 130, true, "example", "example"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 6167933651658664291, 4335834381973654488, 18446744073709551615, 18446744073709551615, 890, 899, 890, 899, 160, 161, true, "documents", "documents"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14635106751859230946, 4735627980056120373, 18446744073709551615, 18446744073709551615, 946, 954, 946, 954, 171, 172, true, "solution", "solution"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14637910066599595367, 12407342884124908229, 18446744073709551615, 18446744073709551615, 74, 82, 74, 82, 10, 12, true, "has been", "has been"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 11306777524314851869, 10312513804613748064, 18446744073709551615, 18446744073709551615, 239, 252, 239, 252, 44, 46, true, "are converted", "are converted"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14892762836290175599, 11884720085873805949, 18446744073709551615, 18446744073709551615, 365, 376, 365, 376, 67, 70, true, "can be done", "can be done"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 5725510425841190313, 6793581731148837080, 18446744073709551615, 18446744073709551615, 516, 556, 516, 556, 95, 100, true, "can be easily processed programmatically", "can be easily processed programmatically"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12324009607163840510, 17415295047313605608, 18446744073709551615, 18446744073709551615, 602, 619, 602, 619, 108, 111, true, "is not preserving", "is not preserving"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15603860853961168192, 1871538778850020675, 18446744073709551615, 18446744073709551615, 817, 827, 817, 827, 147, 149, true, "is thought", "is thought"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 5518720680045185536, 10887625114734223201, 18446744073709551615, 18446744073709551615, 904, 914, 904, 914, 163, 165, true, "have opted", "have opted"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 2703018679320640424, 9140221220687091912, 18446744073709551615, 18446744073709551615, 12, 22, 12, 22, 3, 4, true, "converting", "converting"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14635107222397821279, 10264365568526316164, 18446744073709551615, 18446744073709551615, 145, 153, 145, 153, 26, 27, true, "speaking", "speaking"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12178341415895564896, 5663898048848086764, 18446744073709551615, 18446744073709551615, 161, 164, 161, 164, 29, 30, true, "are", "are"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 6168331468892821959, 4002607006832828199, 18446744073709551615, 18446744073709551615, 270, 279, 270, 279, 50, 51, true, "represent", "represent"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106398484416229602, 14750847589478427809, 18446744073709551615, 18446744073709551615, 476, 483, 476, 483, 88, 89, true, "convert", "convert"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541486545, 16301782680726802891, 18446744073709551615, 18446744073709551615, 558, 562, 558, 562, 101, 102, true, "ie", "i.e."], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14652282307552191074, 15854614648765054100, 18446744073709551615, 18446744073709551615, 636, 644, 636, 644, 115, 116, true, "contains", "contains"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14652284122054107911, 17209935458057025089, 18446744073709551615, 18446744073709551615, 730, 738, 730, 738, 132, 134, true, "could be", "could be"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14652285297651805672, 1311477044897965993, 18446744073709551615, 18446744073709551615, 295, 303, 295, 303, 54, 56, true, "close as", "close as"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541485670, 16301784280431244556, 18446744073709551615, 18446744073709551615, 9, 11, 9, 11, 2, 3, true, "of", "of"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12178341415895625940, 5663899155610812530, 18446744073709551615, 18446744073709551615, 106, 109, 106, 109, 15, 16, true, "for", "for"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625618865305, 11914918762736598377, 18446744073709551615, 18446744073709551615, 110, 114, 110, 114, 16, 17, true, "over", "over"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541485670, 16301784280431329095, 18446744073709551615, 18446744073709551615, 175, 177, 175, 177, 32, 33, true, "of", "of"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16380809977974811061, 2944931246915770910, 18446744073709551615, 18446744073709551615, 206, 212, 206, 212, 38, 40, true, "In the", "In the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14638857868319795209, 15171163761431783385, 18446744073709551615, 18446744073709551615, 253, 261, 253, 261, 46, 48, true, "with the", "with the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206565712212855, 13403044955273571907, 18446744073709551615, 18446744073709551615, 343, 349, 343, 349, 62, 64, true, "of the", "of the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 3505887731517758060, 7420350098754473395, 18446744073709551615, 18446744073709551615, 377, 386, 377, 386, 70, 72, true, "through a", "through a"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625697843734, 11917857471064369379, 18446744073709551615, 18446744073709551615, 398, 402, 398, 402, 73, 74, true, "from", "from"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106351183251325893, 13733355420818968171, 18446744073709551615, 18446744073709551615, 407, 414, 407, 414, 75, 76, true, "towards", "towards"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12178341415895625940, 5663899155610803742, 18446744073709551615, 18446744073709551615, 431, 434, 431, 434, 80, 81, true, "for", "for"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206560517276114, 13525661071352042922, 18446744073709551615, 18446744073709551615, 497, 503, 497, 503, 91, 93, true, "into a", "into a"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206565712212855, 13403044955274076230, 18446744073709551615, 18446744073709551615, 580, 586, 580, 586, 104, 106, true, "of the", "of the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14637917359887717745, 184091670553687293, 18446744073709551615, 18446744073709551615, 661, 669, 661, 669, 119, 121, true, "from the", "from the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625698530964, 11924245681540490403, 18446744073709551615, 18446744073709551615, 688, 692, 688, 692, 123, 125, true, "in a", "in a"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12178341415896108722, 5663904553857312325, 18446744073709551615, 18446744073709551615, 712, 715, 712, 715, 128, 129, true, "For", "For"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206557726458966, 13428361439789699478, 18446744073709551615, 18446744073709551615, 755, 761, 755, 761, 137, 139, true, "with a", "with a"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 329104162323265917, 13893873386515814556, 18446744073709551615, 18446744073709551615, 781, 786, 781, 786, 142, 143, true, "Since", "Since"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541485670, 16301784280431091326, 18446744073709551615, 18446744073709551615, 828, 830, 828, 830, 149, 150, true, "of", "of"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625700764258, 11918714484856028265, 18446744073709551615, 18446744073709551615, 831, 835, 831, 835, 150, 152, true, "as a", "as a"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 3512299892331381400, 9603465650093366657, 18446744073709551615, 18446744073709551615, 847, 856, 847, 856, 154, 156, true, "towards a", "towards a"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12178341415895625940, 5663899155610829905, 18446744073709551615, 18446744073709551615, 886, 889, 886, 889, 159, 160, true, "for", "for"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106397727991264470, 13733498763290197426, 18446744073709551615, 18446744073709551615, 915, 922, 915, 922, 165, 167, true, "for the", "for the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541486538, 16301782677078975107, 18446744073709551615, 18446744073709551615, 939, 941, 939, 941, 169, 170, true, "in", "in"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106351192298715310, 13572120365031968055, 18446744073709551615, 18446744073709551615, 189, 196, 189, 196, 34, 36, true, "to this", "to this"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541485865, 16301784282097407071, 18446744073709551615, 18446744073709551615, 267, 269, 267, 269, 49, 50, true, "to", "to"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206519425733256, 5705382060992960144, 18446744073709551615, 18446744073709551615, 313, 319, 313, 319, 57, 59, true, "to the", "to the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541485865, 16301784282097404239, 18446744073709551615, 18446744073709551615, 473, 475, 473, 475, 87, 88, true, "to", "to"], ["numval", "ival", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 17767354399704235163, 4202447182575023190, 18446744073709551615, 18446744073709551615, 146, 147, 146, 147, 23, 24, true, "3", "3"], ["numval", "ival", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 17767354399704235158, 4202447181925614568, 18446744073709551615, 18446744073709551615, 231, 232, 231, 232, 38, 39, true, "6", "6"], ["expression", "word-concatenation", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 17169426750435213530, 6067173531611560767, 18446744073709551615, 18446744073709551615, 112, 123, 112, 123, 18, 19, true, "open-source", "open-source"], ["expression", "word-concatenation", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 17169426750435213530, 6067173531611378187, 18446744073709551615, 18446744073709551615, 270, 281, 270, 281, 46, 47, true, "open-source", "open-source"], ["expression", "wtoken-concatenation", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 2465266896652791125, 1865592824578355441, 18446744073709551615, 18446744073709551615, 152, 164, 152, 164, 25, 26, true, "Tabula^{4}", "Tabula$^{4}$"], ["expression", "wtoken-concatenation", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 14650455842370127505, 14507811374734524059, 18446744073709551615, 18446744073709551615, 212, 222, 212, 222, 35, 36, true, "Abby^{5}", "Abby$^{5}$"], ["expression", "wtoken-concatenation", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 9454541915838194653, 7851877499332286379, 18446744073709551615, 18446744073709551615, 236, 249, 236, 249, 40, 41, true, "DataCap^{7}", "DataCap$^{7}$"], ["sentence", "", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 9662107284464852524, 16955385700801276233, 18446744073709551615, 18446744073709551615, 0, 90, 0, 90, 0, 14, true, "Many solutions have already been developed that tackle the problem of document conversion.", "Many solutions have already been developed that tackle the problem of document conversion."], ["sentence", "", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 17696925929616730455, 10132403727341574990, 18446744073709551615, 18446744073709551615, 91, 165, 91, 165, 14, 27, true, "There are well known open-source programs such as Xpdf 3 and Tabula$^{4}$.", "There are well known open-source programs such as Xpdf 3 and Tabula$^{4}$."], ["sentence", "", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 9728244801126698359, 3329400039246394270, 18446744073709551615, 18446744073709551615, 166, 250, 166, 250, 27, 42, true, "There are also proprietary solutions, such as Abby$^{5}$, Nuance 6 or DataCap$^{7}$.", "There are also proprietary solutions, such as Abby$^{5}$, Nuance 6 or DataCap$^{7}$."], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 344850509042108766, 1631224707378500329, 18446744073709551615, 18446744073709551615, 0, 14, 0, 14, 0, 2, true, "Many solutions", "Many solutions"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 17523665367407867975, 16853212651814283089, 18446744073709551615, 18446744073709551615, 70, 89, 70, 89, 11, 13, true, "document conversion", "document conversion"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 9901611344946969001, 12100342176073920155, 18446744073709551615, 18446744073709551615, 112, 132, 112, 132, 18, 20, true, "open-source programs", "open-source programs"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 1906608866734760053, 9360530039227743648, 18446744073709551615, 18446744073709551615, 181, 202, 181, 202, 30, 32, true, "proprietary solutions", "proprietary solutions"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 8106476000253296785, 7348997524064119191, 18446744073709551615, 18446744073709551615, 59, 66, 59, 66, 9, 10, true, "problem", "problem"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 389609625540497480, 14650540096238819920, 18446744073709551615, 18446744073709551615, 141, 145, 141, 145, 22, 23, true, "Xpdf", "Xpdf"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 2465266896652791125, 1865592824578355441, 18446744073709551615, 18446744073709551615, 152, 164, 152, 164, 25, 26, true, "Tabula^{4}", "Tabula$^{4}$"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 14650455842370127505, 14507811374734524059, 18446744073709551615, 18446744073709551615, 212, 222, 212, 222, 35, 36, true, "Abby^{5}", "Abby$^{5}$"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 16381206554997419890, 18238264595526178929, 18446744073709551615, 18446744073709551615, 224, 230, 224, 230, 37, 38, true, "Nuance", "Nuance"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 9454541915838194653, 7851877499332286379, 18446744073709551615, 18446744073709551615, 236, 249, 236, 249, 40, 41, true, "DataCap^{7}", "DataCap$^{7}$"], ["verb", "compound-verb", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 48606693242407270, 11184224818946054040, 18446744073709551615, 18446744073709551615, 15, 42, 15, 42, 2, 6, true, "have already been developed", "have already been developed"], ["verb", "compound-verb", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 13108247384673291996, 6754522763130422012, 18446744073709551615, 18446744073709551615, 97, 111, 97, 111, 15, 18, true, "are well known", "are well known"], ["verb", "compound-verb", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 14650447942981204525, 15164378737851800332, 18446744073709551615, 18446744073709551615, 172, 180, 172, 180, 28, 30, true, "are also", "are also"], ["verb", "single-verb", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 16381206513070605866, 8871660528632519573, 18446744073709551615, 18446744073709551615, 48, 54, 48, 54, 7, 8, true, "tackle", "tackle"], ["conn", "single-conn", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 8106478685702231057, 14269895558410210677, 18446744073709551615, 18446744073709551615, 133, 140, 133, 140, 20, 22, true, "such as", "such as"], ["conn", "single-conn", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 8106478685702231057, 14269895558410270685, 18446744073709551615, 18446744073709551615, 204, 211, 204, 211, 33, 35, true, "such as", "such as"], ["conn", "single-conn", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 15441160910541485670, 9244456599053778727, 18446744073709551615, 18446744073709551615, 67, 69, 67, 69, 10, 11, true, "of", "of"], ["parenthesis", "reference", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 12178341415895551530, 7007012457190989418, 18446744073709551615, 18446744073709551615, 299, 302, 299, 302, 49, 50, true, "[1]", "[1]"], ["expression", "wtoken-concatenation", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 12178341415895551530, 7007012457190989418, 18446744073709551615, 18446744073709551615, 299, 302, 299, 302, 49, 50, true, "[1]", "[1]"], ["sentence", "", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 431376792375667906, 14327044623450240949, 18446744073709551615, 18446744073709551615, 35, 161, 35, 161, 5, 25, true, "Besides the well known opensource and proprietary solutions, there are also countless academic solutions as well as libraries.", "Besides the well known opensource and proprietary solutions, there are also countless academic solutions as well as libraries."], ["sentence", "", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 5097963969909323475, 6200497828341591337, 18446744073709551615, 18446744073709551615, 162, 298, 162, 298, 25, 49, true, "For example, the challenge of segmenting complex page layouts is actively addressed by recurring competitions posed by ICDAR, as in Ref.", "For example, the challenge of segmenting complex page layouts is actively addressed by recurring competitions posed by ICDAR, as in Ref."], ["sentence", "", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 3625891827316137320, 6327186273637832744, 18446744073709551615, 18446744073709551615, 299, 325, 299, 325, 49, 54, true, "[1] and previous editions.", "[1] and previous editions."], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 1906608866734760053, 3011555850208054480, 18446744073709551615, 18446744073709551615, 73, 94, 73, 94, 11, 13, true, "proprietary solutions", "proprietary solutions"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 13555405798633159070, 12440619026730950763, 18446744073709551615, 18446744073709551615, 111, 139, 111, 139, 17, 20, true, "countless academic solutions", "countless academic solutions"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 224884857456525093, 8800751904675089400, 18446744073709551615, 18446744073709551615, 203, 223, 203, 223, 32, 35, true, "complex page layouts", "complex page layouts"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 7206649825919626318, 10082144117564749216, 18446744073709551615, 18446744073709551615, 307, 324, 307, 324, 51, 53, true, "previous editions", "previous editions"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 13985989196784208042, 6397101251702544653, 18446744073709551615, 18446744073709551615, 58, 68, 58, 68, 9, 10, true, "opensource", "opensource"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 5943277062935477155, 1344882994493538740, 18446744073709551615, 18446744073709551615, 151, 160, 151, 160, 23, 24, true, "libraries", "libraries"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 8106397496085150773, 15757418808623037344, 18446744073709551615, 18446744073709551615, 166, 173, 166, 173, 26, 27, true, "example", "example"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 5948328731789408366, 18296547164130656702, 18446744073709551615, 18446744073709551615, 179, 188, 179, 188, 29, 30, true, "challenge", "challenge"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 4575915900499789029, 3616265821829049845, 18446744073709551615, 18446744073709551615, 259, 271, 259, 271, 40, 41, true, "competitions", "competitions"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 329104161878859757, 6665347482264844132, 18446744073709551615, 18446744073709551615, 281, 286, 281, 286, 43, 44, true, "ICDAR", "ICDAR"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 12178341415896316683, 7006656170865370612, 18446744073709551615, 18446744073709551615, 294, 297, 294, 297, 47, 48, true, "Ref", "Ref"], ["verb", "compound-verb", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 14650447942981204525, 14209441569513214592, 18446744073709551615, 18446744073709551615, 102, 110, 102, 110, 15, 17, true, "are also", "are also"], ["verb", "compound-verb", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 13582317933598830804, 14887377263055835155, 18446744073709551615, 18446744073709551615, 224, 245, 224, 245, 35, 38, true, "is actively addressed", "is actively addressed"], ["verb", "single-verb", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 329104158563393892, 6347084796372522947, 18446744073709551615, 18446744073709551615, 52, 57, 52, 57, 8, 9, true, "known", "known"], ["verb", "single-verb", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15942873022707780098, 17012788479993724570, 18446744073709551615, 18446744073709551615, 192, 202, 192, 202, 31, 32, true, "segmenting", "segmenting"], ["verb", "single-verb", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 6165973069391114794, 11863768156487224924, 18446744073709551615, 18446744073709551615, 249, 258, 249, 258, 39, 40, true, "recurring", "recurring"], ["verb", "single-verb", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 329104161594697084, 6627718053570720382, 18446744073709551615, 18446744073709551615, 272, 277, 272, 277, 41, 42, true, "posed", "posed"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 2330907114827395751, 7417154358652385512, 18446744073709551615, 18446744073709551615, 35, 46, 35, 46, 5, 7, true, "Besides the", "Besides the"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15441160910541487053, 8488079551389036732, 18446744073709551615, 18446744073709551615, 148, 150, 148, 150, 22, 23, true, "as", "as"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 12178341415896108722, 7006564727319883066, 18446744073709551615, 18446744073709551615, 162, 165, 162, 165, 25, 26, true, "For", "For"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15441160910541485670, 8488079584067276180, 18446744073709551615, 18446744073709551615, 189, 191, 189, 191, 30, 31, true, "of", "of"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15441160910541486989, 8488079561168943847, 18446744073709551615, 18446744073709551615, 246, 248, 246, 248, 38, 39, true, "by", "by"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15441160910541486989, 8488079561168941359, 18446744073709551615, 18446744073709551615, 278, 280, 278, 280, 42, 43, true, "by", "by"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15441160910541487053, 8488079551389030241, 18446744073709551615, 18446744073709551615, 288, 290, 288, 290, 45, 46, true, "as", "as"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15441160910541486538, 8488079680818651488, 18446744073709551615, 18446744073709551615, 291, 293, 291, 293, 46, 47, true, "in", "in"], ["numval", "ival", 11495493007651807568, "TEXT", "#/texts/19", 1.0, 17767354399704235163, 1677212978845340209, 18446744073709551615, 18446744073709551615, 0, 1, 0, 1, 0, 1, true, "3", "3"], ["sentence", "", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 333520156392116834, 15811852122116104463, 18446744073709551615, 18446744073709551615, 0, 174, 0, 174, 0, 33, true, "Given the plethora of existing solutions, we would like to point out how our solution differs from these, and thus approaches the problem of document conversion in a new way.", "Given the plethora of existing solutions, we would like to point out how our solution differs from these, and thus approaches the problem of document conversion in a new way."], ["term", "single-term", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 17523665367407867975, 12904404874656037431, 18446744073709551615, 18446744073709551615, 141, 160, 141, 160, 26, 28, true, "document conversion", "document conversion"], ["term", "single-term", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 8106342689900857659, 11867579569069297583, 18446744073709551615, 18446744073709551615, 166, 173, 166, 173, 30, 32, true, "new way", "new way"], ["term", "single-term", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 14814124842239312745, 3078034766037725322, 18446744073709551615, 18446744073709551615, 10, 18, 10, 18, 2, 3, true, "plethora", "plethora"], ["term", "single-term", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 6168765157982633013, 14939317711471735014, 18446744073709551615, 18446744073709551615, 31, 40, 31, 40, 5, 6, true, "solutions", "solutions"], ["term", "single-term", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 14635106751859230946, 2774775706852130043, 18446744073709551615, 18446744073709551615, 77, 85, 77, 85, 15, 16, true, "solution", "solution"], ["term", "single-term", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 8106476000253296785, 9669543175138669254, 18446744073709551615, 18446744073709551615, 130, 137, 130, 137, 24, 25, true, "problem", "problem"], ["verb", "compound-verb", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 17737636265695672887, 18332533904749557797, 18446744073709551615, 18446744073709551615, 45, 64, 45, 64, 8, 12, true, "would like to point", "would like to point"], ["verb", "single-verb", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 329104162248557356, 752941156226945092, 18446744073709551615, 18446744073709551615, 0, 5, 0, 5, 0, 1, true, "Given", "Given"], ["verb", "single-verb", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 14652255875895390162, 653651603361381634, 18446744073709551615, 18446744073709551615, 22, 30, 22, 30, 4, 5, true, "existing", "existing"], ["verb", "single-verb", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 8106396886952123470, 12838801329929880418, 18446744073709551615, 18446744073709551615, 86, 93, 86, 93, 16, 17, true, "differs", "differs"], ["verb", "single-verb", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 15361660588616680195, 3054906681832241655, 18446744073709551615, 18446744073709551615, 115, 125, 115, 125, 22, 23, true, "approaches", "approaches"], ["conn", "single-conn", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 15441160910541485670, 15411912721590762410, 18446744073709551615, 18446744073709551615, 19, 21, 19, 21, 3, 4, true, "of", "of"], ["conn", "single-conn", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 16057368201763467386, 7544318374715286400, 18446744073709551615, 18446744073709551615, 94, 104, 94, 104, 17, 19, true, "from these", "from these"], ["conn", "single-conn", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 15441160910541485670, 15411912721590754628, 18446744073709551615, 18446744073709551615, 138, 140, 138, 140, 25, 26, true, "of", "of"], ["conn", "single-conn", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 389609625698530964, 7158441064407457660, 18446744073709551615, 18446744073709551615, 161, 165, 161, 165, 28, 30, true, "in a", "in a"], ["conn", "single-conn", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 15441160910541485865, 15411912723377752993, 18446744073709551615, 18446744073709551615, 56, 58, 56, 58, 10, 11, true, "to", "to"], ["expression", "word-concatenation", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15982564436466431745, 16753386948407940471, 18446744073709551615, 18446744073709551615, 41, 51, 41, 51, 10, 11, true, "rule-based", "rule-based"], ["expression", "word-concatenation", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 3753411203337468488, 15547653955780150850, 18446744073709551615, 18446744073709551615, 193, 205, 193, 205, 32, 33, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 1645908499873224608, 218341843414296, 18446744073709551615, 18446744073709551615, 388, 402, 388, 402, 61, 62, true, "time-consuming", "time-consuming"], ["expression", "word-concatenation", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15982564436466431745, 16753386948408616238, 18446744073709551615, 18446744073709551615, 436, 446, 436, 446, 67, 68, true, "rule-based", "rule-based"], ["sentence", "", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 6925086339785270582, 4967815864262610827, 18446744073709551615, 18446744073709551615, 0, 236, 0, 236, 0, 38, true, "The key idea is that we do not write any rule-based conversion algorithms, but rather utilize generic machine learning algorithms which produce models that can be easily and quickly trained on ground-truth acquired via human annotation.", "The key idea is that we do not write any rule-based conversion algorithms, but rather utilize generic machine learning algorithms which produce models that can be easily and quickly trained on ground-truth acquired via human annotation."], ["sentence", "", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 6407641312915953840, 824387259931288587, 18446744073709551615, 18446744073709551615, 237, 469, 237, 469, 38, 71, true, "This flexible mechanism allows us to adapt very quickly to certain templates of documents, achieve very accurate results and ultimately eliminates the time-consuming and costly tuning of traditional rule-based conversion algorithms.", "This flexible mechanism allows us to adapt very quickly to certain templates of documents, achieve very accurate results and ultimately eliminates the time-consuming and costly tuning of traditional rule-based conversion algorithms."], ["sentence", "", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 8689787920237942067, 876894582857790117, 18446744073709551615, 18446744073709551615, 470, 594, 470, 594, 71, 93, true, "This approach is in stark contrast to the previously mentioned state of the art conversion systems, which are all rulebased.", "This approach is in stark contrast to the previously mentioned state of the art conversion systems, which are all rulebased."], ["term", "enum-term-mark-1", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 17700892478496312051, 10821180144678536592, 18446744073709551615, 18446744073709551615, 388, 420, 388, 420, 61, 65, true, "time-consuming and costly tuning", "time-consuming and costly tuning"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 14634111720801022362, 3195488238150353861, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 1, 3, true, "key idea", "key idea"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 16546787775173266207, 17746572894245856976, 18446744073709551615, 18446744073709551615, 41, 73, 41, 73, 10, 13, true, "rule-based conversion algorithms", "rule-based conversion algorithms"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 268562468797662458, 10942359132766872226, 18446744073709551615, 18446744073709551615, 94, 109, 94, 109, 17, 19, true, "generic machine", "generic machine"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 1385949438436740444, 1066431205762218852, 18446744073709551615, 18446744073709551615, 219, 235, 219, 235, 35, 37, true, "human annotation", "human annotation"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 8882320954266966503, 11671713689267052644, 18446744073709551615, 18446744073709551615, 242, 260, 242, 260, 39, 41, true, "flexible mechanism", "flexible mechanism"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 9530639523283095495, 1716193621416828213, 18446744073709551615, 18446744073709551615, 296, 313, 296, 313, 48, 50, true, "certain templates", "certain templates"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 363090472507169169, 8647809295474703230, 18446744073709551615, 18446744073709551615, 341, 357, 341, 357, 55, 57, true, "accurate results", "accurate results"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 4501351594407908143, 8920783459215236990, 18446744073709551615, 18446744073709551615, 407, 420, 407, 420, 63, 65, true, "costly tuning", "costly tuning"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 3911758940362647139, 2852281587036452792, 18446744073709551615, 18446744073709551615, 424, 468, 424, 468, 66, 70, true, "traditional rule-based conversion algorithms", "traditional rule-based conversion algorithms"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 4914830112961611503, 16550272884551547652, 18446744073709551615, 18446744073709551615, 490, 504, 490, 504, 75, 77, true, "stark contrast", "stark contrast"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 1888718621804377149, 4793363709933306705, 18446744073709551615, 18446744073709551615, 546, 568, 546, 568, 84, 87, true, "art conversion systems", "art conversion systems"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15359670209433732834, 14480479537813821910, 18446744073709551615, 18446744073709551615, 119, 129, 119, 129, 20, 21, true, "algorithms", "algorithms"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 16381206567230470443, 14326281487546411484, 18446744073709551615, 18446744073709551615, 144, 150, 144, 150, 23, 24, true, "models", "models"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 3753411203337468488, 15547653955780150850, 18446744073709551615, 18446744073709551615, 193, 205, 193, 205, 32, 33, true, "ground-truth", "ground-truth"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 6167933651658664291, 15122240631633557724, 18446744073709551615, 18446744073709551615, 317, 326, 317, 326, 51, 52, true, "documents", "documents"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 14650448032998792781, 15879979342361769675, 18446744073709551615, 18446744073709551615, 475, 483, 475, 483, 72, 73, true, "approach", "approach"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 329104161640023790, 11881717169008578620, 18446744073709551615, 18446744073709551615, 533, 538, 533, 538, 81, 82, true, "state", "state"], ["verb", "compound-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 1652981674969121277, 5345522223761467591, 18446744073709551615, 18446744073709551615, 24, 36, 24, 36, 6, 9, true, "do not write", "do not write"], ["verb", "compound-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 14953425203724984554, 9921592199341111751, 18446744073709551615, 18446744073709551615, 156, 169, 156, 169, 25, 28, true, "can be easily", "can be easily"], ["verb", "compound-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 5286109702157457123, 14247791188942099735, 18446744073709551615, 18446744073709551615, 274, 295, 274, 295, 44, 48, true, "adapt very quickly to", "adapt very quickly to"], ["verb", "compound-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15545418999534241127, 5045193591230796924, 18446744073709551615, 18446744073709551615, 328, 340, 328, 340, 53, 55, true, "achieve very", "achieve very"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541486535, 7262104292544184272, 18446744073709551615, 18446744073709551615, 13, 15, 13, 15, 3, 4, true, "is", "is"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 8106477998160600386, 17975660313851847310, 18446744073709551615, 18446744073709551615, 86, 93, 86, 93, 16, 17, true, "utilize", "utilize"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 14639581097006750428, 13078762433064859990, 18446744073709551615, 18446744073709551615, 110, 118, 110, 118, 19, 20, true, "learning", "learning"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 8106476000256008955, 13110106039936157176, 18446744073709551615, 18446744073709551615, 136, 143, 136, 143, 22, 23, true, "produce", "produce"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 8106351024635822250, 6482860715812624614, 18446744073709551615, 18446744073709551615, 182, 189, 182, 189, 30, 31, true, "trained", "trained"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 14650442552334623127, 18275297430362112280, 18446744073709551615, 18446744073709551615, 206, 214, 206, 214, 33, 34, true, "acquired", "acquired"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 16381206569317834029, 14325451716460439025, 18446744073709551615, 18446744073709551615, 261, 267, 261, 267, 41, 42, true, "allows", "allows"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 5305301449677211216, 12271059041548086334, 18446744073709551615, 18446744073709551615, 373, 383, 373, 383, 59, 60, true, "eliminates", "eliminates"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541486535, 7262104292544005935, 18446744073709551615, 18446744073709551615, 484, 486, 484, 486, 73, 74, true, "is", "is"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 6182925164797550141, 4080506602497980982, 18446744073709551615, 18446744073709551615, 523, 532, 523, 532, 80, 81, true, "mentioned", "mentioned"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 12178341415895564896, 11980759772482637587, 18446744073709551615, 18446744073709551615, 576, 579, 576, 579, 89, 90, true, "are", "are"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 6168252184209599630, 11624964441948682836, 18446744073709551615, 18446744073709551615, 584, 593, 584, 593, 91, 92, true, "rulebased", "rulebased"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 389609625631229034, 13897665868783500837, 18446744073709551615, 18446744073709551615, 16, 20, 16, 20, 4, 5, true, "that", "that"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541485678, 7262104301308027569, 18446744073709551615, 18446744073709551615, 190, 192, 190, 192, 31, 32, true, "on", "on"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 12178341415896456267, 11980837516659458599, 18446744073709551615, 18446744073709551615, 215, 218, 215, 218, 34, 35, true, "via", "via"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541485670, 7262104379839096841, 18446744073709551615, 18446744073709551615, 314, 316, 314, 316, 50, 51, true, "of", "of"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541485670, 7262104379839071574, 18446744073709551615, 18446744073709551615, 421, 423, 421, 423, 65, 66, true, "of", "of"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541486538, 7262104292909614113, 18446744073709551615, 18446744073709551615, 487, 489, 487, 489, 74, 75, true, "in", "in"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 16381206565712212855, 14516857138090705799, 18446744073709551615, 18446744073709551615, 539, 545, 539, 545, 82, 84, true, "of the", "of the"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541485865, 7262104290391125417, 18446744073709551615, 18446744073709551615, 271, 273, 271, 273, 43, 44, true, "to", "to"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541485865, 7262104290391122531, 18446744073709551615, 18446744073709551615, 293, 295, 293, 295, 47, 48, true, "to", "to"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 16381206519425733256, 12408277484933909023, 18446744073709551615, 18446744073709551615, 505, 511, 505, 511, 77, 79, true, "to the", "to the"], ["parenthesis", "round brackets", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6550638212396612728, 10771033758102967319, 18446744073709551615, 18446744073709551615, 359, 385, 359, 385, 65, 72, true, "(or a corpus of documents)", "(or a corpus of documents)"], ["parenthesis", "round brackets", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6314058359297877881, 9707414947244523127, 18446744073709551615, 18446744073709551615, 513, 578, 513, 578, 97, 110, true, "(e.g. scientific articles, patents, regulations, contracts, etc.)", "(e.g. scientific articles, patents, regulations, contracts, etc.)"], ["parenthesis", "round brackets", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8057413709704099528, 1050132540479424454, 18446744073709551615, 18446744073709551615, 723, 745, 723, 745, 135, 141, true, "(no matter its origin)", "(no matter its origin)"], ["expression", "common", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541487324, 2236281911783920424, 18446744073709551615, 18446744073709551615, 514, 518, 514, 518, 98, 99, true, "eg", "e.g."], ["expression", "common", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 12178341415895450733, 6844731054316482558, 18446744073709551615, 18446744073709551615, 573, 577, 573, 577, 108, 109, true, "etc", "etc."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 9012657468363944610, 8765370141450009242, 18446744073709551615, 18446744073709551615, 0, 216, 0, 216, 0, 35, true, "While the approach of swapping rule based solutions with machine learning solutions might appear very natural in the current era of artificial intelligence, it has some serious consequences with regard to its design.", "While the approach of swapping rule based solutions with machine learning solutions might appear very natural in the current era of artificial intelligence, it has some serious consequences with regard to its design."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 17113763778403085212, 5836750622876672848, 18446744073709551615, 18446744073709551615, 217, 291, 217, 291, 35, 52, true, "First of all, one can not think anymore at the level of a single document.", "First of all, one can not think anymore at the level of a single document."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 234003328160338467, 7991365496276900509, 18446744073709551615, 18446744073709551615, 292, 386, 292, 386, 52, 73, true, "Rather, one should think at the level of a collection of documents (or a corpus of documents).", "Rather, one should think at the level of a collection of documents (or a corpus of documents)."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 10764335410877048383, 9149814586004775117, 18446744073709551615, 18446744073709551615, 387, 592, 387, 592, 73, 113, true, "A machine learned model for a single document is not very useful, but a machine learned model for a certain type of documents (e.g. scientific articles, patents, regulations, contracts, etc.) obviously is.", "A machine learned model for a single document is not very useful, but a machine learned model for a certain type of documents (e.g. scientific articles, patents, regulations, contracts, etc.) obviously is."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 12007288182356882448, 17097814921561813617, 18446744073709551615, 18446744073709551615, 593, 788, 593, 788, 113, 150, true, "This is the first big distinction between the current existing solutions and ours: Existing solutions take one document at a time (no matter its origin) and convert it to a desired output format.", "This is the first big distinction between the current existing solutions and ours: Existing solutions take one document at a time (no matter its origin) and convert it to a desired output format."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 17200018804130084830, 4710316362795570196, 18446744073709551615, 18446744073709551615, 789, 895, 789, 895, 150, 169, true, "Our solution can ingest an entire collection of documents and build machine learned models on top of that.", "Our solution can ingest an entire collection of documents and build machine learned models on top of that."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 2832657334028567700, 16598107722788837982, 18446744073709551615, 18446744073709551615, 896, 983, 896, 983, 169, 190, true, "Of course, once the the model is trained, one can convert documents one at a time, too.", "Of course, once the the model is trained, one can convert documents one at a time, too."], ["term", "enum-term-mark-3", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6940993203873682013, 11180826619488609321, 18446744073709551615, 18446744073709551615, 656, 674, 656, 674, 123, 126, true, "solutions and ours", "solutions and ours"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14803416660534245041, 14676542655277815623, 18446744073709551615, 18446744073709551615, 117, 128, 117, 128, 18, 20, true, "current era", "current era"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 12494727366192470008, 14865632197249103574, 18446744073709551615, 18446744073709551615, 132, 155, 132, 155, 21, 23, true, "artificial intelligence", "artificial intelligence"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 10965400769022712461, 10837956255692384921, 18446744073709551615, 18446744073709551615, 169, 189, 169, 189, 27, 29, true, "serious consequences", "serious consequences"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 17817695283880367270, 1923283193609983271, 18446744073709551615, 18446744073709551615, 275, 290, 275, 290, 49, 51, true, "single document", "single document"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 17817695283880367270, 1923283193609994591, 18446744073709551615, 18446744073709551615, 417, 432, 417, 432, 79, 81, true, "single document", "single document"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 5446369751016637748, 2451284751526455026, 18446744073709551615, 18446744073709551615, 487, 499, 487, 499, 93, 95, true, "certain type", "certain type"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 4921841042246041201, 10983409214681496834, 18446744073709551615, 18446744073709551615, 514, 538, 514, 538, 98, 101, true, "eg scientific articles", "e.g. scientific articles"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8853087811719781330, 14038095559600948860, 18446744073709551615, 18446744073709551615, 605, 626, 605, 626, 116, 119, true, "first big distinction", "first big distinction"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8644330667942088529, 12068348818841150869, 18446744073709551615, 18446744073709551615, 774, 787, 774, 787, 147, 149, true, "output format", "output format"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 9670577903281859793, 4784628170822326470, 18446744073709551615, 18446744073709551615, 816, 833, 816, 833, 155, 157, true, "entire collection", "entire collection"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14650448032998792781, 3638043485221670922, 18446744073709551615, 18446744073709551615, 10, 18, 10, 18, 2, 3, true, "approach", "approach"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625633008101, 5827484634985091758, 18446744073709551615, 18446744073709551615, 31, 35, 31, 35, 5, 6, true, "rule", "rule"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6168765157982633013, 17080776441400876326, 18446744073709551615, 18446744073709551615, 42, 51, 42, 51, 7, 8, true, "solutions", "solutions"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106464587473865376, 10863088468137730445, 18446744073709551615, 18446744073709551615, 57, 64, 57, 64, 9, 10, true, "machine", "machine"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6168765157982633013, 17080776441400829230, 18446744073709551615, 18446744073709551615, 74, 83, 74, 83, 11, 12, true, "solutions", "solutions"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206521526353544, 17837005405950977614, 18446744073709551615, 18446744073709551615, 195, 201, 195, 201, 30, 31, true, "regard", "regard"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206568241679420, 6463362562880610794, 18446744073709551615, 18446744073709551615, 209, 215, 209, 215, 33, 34, true, "design", "design"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161872148023, 4146693174861139295, 18446744073709551615, 18446744073709551615, 217, 222, 217, 222, 35, 36, true, "First", "First"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161602483077, 4501508833607620084, 18446744073709551615, 18446744073709551615, 264, 269, 264, 269, 46, 47, true, "level", "level"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161602483077, 4501508833607664683, 18446744073709551615, 18446744073709551615, 324, 329, 324, 329, 59, 60, true, "level", "level"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 2702984786539193186, 5625500215606040366, 18446744073709551615, 18446744073709551615, 335, 345, 335, 345, 62, 63, true, "collection", "collection"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6167933651658664291, 17130544635473657019, 18446744073709551615, 18446744073709551615, 349, 358, 349, 358, 64, 65, true, "documents", "documents"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206562408205435, 6662313161856348398, 18446744073709551615, 18446744073709551615, 365, 371, 365, 371, 68, 69, true, "corpus", "corpus"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6167933651658664291, 17130544635473655477, 18446744073709551615, 18446744073709551615, 375, 384, 375, 384, 70, 71, true, "documents", "documents"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106464587473865376, 10863088468137624072, 18446744073709551615, 18446744073709551615, 389, 396, 389, 396, 74, 75, true, "machine", "machine"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161610777240, 4148531692542071489, 18446744073709551615, 18446744073709551615, 405, 410, 405, 410, 76, 77, true, "model", "model"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106464587473865376, 10863088468137655677, 18446744073709551615, 18446744073709551615, 459, 466, 459, 466, 88, 89, true, "machine", "machine"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161610777240, 4148531692542068691, 18446744073709551615, 18446744073709551615, 475, 480, 475, 480, 90, 91, true, "model", "model"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6167933651658664291, 17130544635473631637, 18446744073709551615, 18446744073709551615, 503, 512, 503, 512, 96, 97, true, "documents", "documents"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106479143938802112, 7753532983827535730, 18446744073709551615, 18446744073709551615, 540, 547, 540, 547, 102, 103, true, "patents", "patents"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 4973525406703593304, 3125450270057085908, 18446744073709551615, 18446744073709551615, 549, 560, 549, 560, 104, 105, true, "regulations", "regulations"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 5947882010261766213, 2807085994503528596, 18446744073709551615, 18446744073709551615, 562, 571, 562, 571, 106, 107, true, "contracts", "contracts"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6168765157982633013, 17080776441399891140, 18446744073709551615, 18446744073709551615, 656, 665, 656, 665, 123, 124, true, "solutions", "solutions"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625618862505, 5824689271058039311, 18446744073709551615, 18446744073709551615, 670, 674, 670, 674, 125, 126, true, "ours", "ours"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6168765157982633013, 17080776441399897623, 18446744073709551615, 18446744073709551615, 685, 694, 685, 694, 128, 129, true, "solutions", "solutions"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14650401089286948001, 13693331144318097274, 18446744073709551615, 18446744073709551615, 704, 712, 704, 712, 131, 132, true, "document", "document"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625631241985, 5827513608331038644, 18446744073709551615, 18446744073709551615, 718, 722, 718, 722, 134, 135, true, "time", "time"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206594266096010, 12213703933534801602, 18446744073709551615, 18446744073709551615, 727, 733, 727, 733, 137, 138, true, "matter", "matter"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206566166820610, 6036520036764853357, 18446744073709551615, 18446744073709551615, 738, 744, 738, 744, 139, 140, true, "origin", "origin"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14635106751859230946, 8364773269850029437, 18446744073709551615, 18446744073709551615, 793, 801, 793, 801, 151, 152, true, "solution", "solution"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6167933651658664291, 17130544635473593661, 18446744073709551615, 18446744073709551615, 837, 846, 837, 846, 158, 159, true, "documents", "documents"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106464587473865376, 10863088468137810773, 18446744073709551615, 18446744073709551615, 857, 864, 857, 864, 161, 162, true, "machine", "machine"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206567230470443, 6024418531468018651, 18446744073709551615, 18446744073709551615, 873, 879, 873, 879, 163, 164, true, "models", "models"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 12178341415895527965, 6844715057235761660, 18446744073709551615, 18446744073709551615, 883, 886, 883, 886, 165, 166, true, "top", "top"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206562412792821, 6657216968808098043, 18446744073709551615, 18446744073709551615, 899, 905, 899, 905, 170, 171, true, "course", "course"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161610777240, 4148531692542039942, 18446744073709551615, 18446744073709551615, 920, 925, 920, 925, 175, 176, true, "model", "model"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6167933651658664291, 17130544635473603162, 18446744073709551615, 18446744073709551615, 954, 963, 954, 963, 182, 183, true, "documents", "documents"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625631241985, 5827513608331054986, 18446744073709551615, 18446744073709551615, 973, 977, 973, 977, 186, 187, true, "time", "time"], ["verb", "compound-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16649210781200388969, 11737823472753388342, 18446744073709551615, 18446744073709551615, 84, 101, 84, 101, 12, 15, true, "might appear very", "might appear very"], ["verb", "compound-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 5898608652259485892, 8273749972636893410, 18446744073709551615, 18446744073709551615, 243, 256, 243, 256, 42, 44, true, "think anymore", "think anymore"], ["verb", "compound-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 9168974707522175422, 15268643747920788932, 18446744073709551615, 18446744073709551615, 433, 444, 433, 444, 81, 84, true, "is not very", "is not very"], ["verb", "compound-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15603860833301917639, 10275051749542929918, 18446744073709551615, 18446744073709551615, 926, 936, 926, 936, 176, 178, true, "is trained", "is trained"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14634110115213467595, 3902842052710630018, 18446744073709551615, 18446744073709551615, 22, 30, 22, 30, 4, 5, true, "swapping", "swapping"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104159219515955, 4537815630564757009, 18446744073709551615, 18446744073709551615, 36, 41, 36, 41, 6, 7, true, "based", "based"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14639581097006750428, 17137957296901215778, 18446744073709551615, 18446744073709551615, 65, 73, 65, 73, 10, 11, true, "learning", "learning"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 12178341415895601584, 6844715253719182703, 18446744073709551615, 18446744073709551615, 160, 163, 160, 163, 25, 26, true, "has", "has"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 232434223375785884, 14053882561685075584, 18446744073709551615, 18446744073709551615, 304, 316, 304, 316, 55, 57, true, "should think", "should think"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106342444693204894, 15950879447442477786, 18446744073709551615, 18446744073709551615, 397, 404, 397, 404, 75, 76, true, "learned", "learned"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106342444693204894, 15950879447442486216, 18446744073709551615, 18446744073709551615, 467, 474, 467, 474, 89, 90, true, "learned", "learned"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541486535, 2236282123090341336, 18446744073709551615, 18446744073709551615, 589, 591, 589, 591, 111, 112, true, "is", "is"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541486535, 2236282123090341658, 18446744073709551615, 18446744073709551615, 598, 600, 598, 600, 114, 115, true, "is", "is"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14652255875895390162, 17681309128804460766, 18446744073709551615, 18446744073709551615, 647, 655, 647, 655, 122, 123, true, "existing", "existing"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14650277091100196516, 16447833424682109222, 18446744073709551615, 18446744073709551615, 676, 684, 676, 684, 127, 128, true, "Existing", "Existing"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625631208371, 5827506598805952653, 18446744073709551615, 18446744073709551615, 695, 699, 695, 699, 129, 130, true, "take", "take"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106398484416229602, 7463572213204385535, 18446744073709551615, 18446744073709551615, 750, 757, 750, 757, 142, 143, true, "convert", "convert"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106396517639247034, 13209657695880625575, 18446744073709551615, 18446744073709551615, 766, 773, 766, 773, 146, 147, true, "desired", "desired"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 2873440693780286732, 16051197931505562856, 18446744073709551615, 18446744073709551615, 802, 812, 802, 812, 152, 154, true, "can ingest", "can ingest"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104159303279946, 4494576876171974588, 18446744073709551615, 18446744073709551615, 851, 856, 851, 856, 160, 161, true, "build", "build"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106342444693204894, 15950879447442468527, 18446744073709551615, 18446744073709551615, 865, 872, 865, 872, 162, 163, true, "learned", "learned"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14892592691705778163, 9208424151114403903, 18446744073709551615, 18446744073709551615, 942, 953, 942, 953, 180, 182, true, "can convert", "can convert"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 2452542797585455507, 2035939665976407914, 18446744073709551615, 18446744073709551615, 102, 112, 102, 112, 15, 17, true, "natural in", "natural in"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6179252389649895475, 8191807611968745212, 18446744073709551615, 18446744073709551615, 0, 9, 0, 9, 0, 2, true, "While the", "While the"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485670, 2236282107242376600, 18446744073709551615, 18446744073709551615, 19, 21, 19, 21, 3, 4, true, "of", "of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625618037948, 5823902957159802163, 18446744073709551615, 18446744073709551615, 52, 56, 52, 56, 8, 9, true, "with", "with"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485670, 2236282107242384356, 18446744073709551615, 18446744073709551615, 129, 131, 129, 131, 20, 21, true, "of", "of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625618037948, 5823902957159794765, 18446744073709551615, 18446744073709551615, 190, 194, 190, 194, 29, 30, true, "with", "with"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206565712007226, 5986290651637987665, 18446744073709551615, 18446744073709551615, 223, 229, 223, 229, 36, 38, true, "of all", "of all"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206568372064271, 6462428766212968916, 18446744073709551615, 18446744073709551615, 257, 263, 257, 263, 44, 46, true, "at the", "at the"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625620237736, 5824650452540823625, 18446744073709551615, 18446744073709551615, 270, 274, 270, 274, 47, 49, true, "of a", "of a"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206568372064271, 6462428766212960915, 18446744073709551615, 18446744073709551615, 317, 323, 317, 323, 57, 59, true, "at the", "at the"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625620237736, 5824650452540841788, 18446744073709551615, 18446744073709551615, 330, 334, 330, 334, 60, 62, true, "of a", "of a"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485670, 2236282107242430314, 18446744073709551615, 18446744073709551615, 346, 348, 346, 348, 63, 64, true, "of", "of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485670, 2236282107242428103, 18446744073709551615, 18446744073709551615, 372, 374, 372, 374, 69, 70, true, "of", "of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161711024499, 4147931113686279460, 18446744073709551615, 18446744073709551615, 411, 416, 411, 416, 77, 79, true, "for a", "for a"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161711024499, 4147931113686284098, 18446744073709551615, 18446744073709551615, 481, 486, 481, 486, 91, 93, true, "for a", "for a"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485670, 2236282107242403559, 18446744073709551615, 18446744073709551615, 500, 502, 500, 502, 95, 96, true, "of", "of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 2011002864325523456, 7710761867679380926, 18446744073709551615, 18446744073709551615, 627, 638, 627, 638, 119, 121, true, "between the", "between the"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625700792947, 5823321478145663200, 18446744073709551615, 18446744073709551615, 713, 717, 713, 717, 132, 134, true, "at a", "at a"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485670, 2236282107240472436, 18446744073709551615, 18446744073709551615, 834, 836, 834, 836, 157, 158, true, "of", "of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485678, 2236282107108170895, 18446744073709551615, 18446744073709551615, 880, 882, 880, 882, 164, 165, true, "on", "on"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106342927224204147, 12374411364497639723, 18446744073709551615, 18446744073709551615, 887, 894, 887, 894, 166, 168, true, "of that", "of that"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541487694, 2236281866955609411, 18446744073709551615, 18446744073709551615, 896, 898, 896, 898, 169, 170, true, "Of", "Of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625700792947, 5823321478145646816, 18446744073709551615, 18446744073709551615, 968, 972, 968, 972, 184, 186, true, "at a", "at a"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485865, 2236282073726956815, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 31, 32, true, "to", "to"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625631408052, 5827512620732698108, 18446744073709551615, 18446744073709551615, 761, 765, 761, 765, 144, 146, true, "to a", "to a"], ["expression", "word-concatenation", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 3753411203337468488, 10952332446895423423, 18446744073709551615, 18446744073709551615, 110, 122, 110, 122, 19, 20, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 3753411203337468488, 10952332446895315527, 18446744073709551615, 18446744073709551615, 383, 395, 383, 395, 70, 71, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15169931585135175826, 16208226134974990802, 18446744073709551615, 18446744073709551615, 814, 825, 814, 825, 151, 152, true, "cloud-based", "cloud-based"], ["sentence", "", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 5932144278161606896, 5324910960360793763, 18446744073709551615, 18446744073709551615, 0, 165, 0, 165, 0, 30, true, "A second discriminator between the existing solutions and ours is that we need to provide the tools to gather ground-truth, since no model can be trained without it.", "A second discriminator between the existing solutions and ours is that we need to provide the tools to gather ground-truth, since no model can be trained without it."], ["sentence", "", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16765361859705871972, 1374856743232605456, 18446744073709551615, 18446744073709551615, 166, 347, 166, 347, 30, 64, true, "Hence, not only do we need the ability to manage collections of documents, we also need the ability for people to annotate documents and store these annotations in an efficient way.", "Hence, not only do we need the ability to manage collections of documents, we also need the ability for people to annotate documents and store these annotations in an efficient way."], ["sentence", "", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 5468526205922080258, 1940547468406540901, 18446744073709551615, 18446744073709551615, 348, 417, 348, 417, 64, 76, true, "These annotations are then used as ground-truth data to train models.", "These annotations are then used as ground-truth data to train models."], ["sentence", "", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 7094528533515465642, 13371907670247459710, 18446744073709551615, 18446744073709551615, 418, 666, 418, 666, 76, 123, true, "It is clear then that ML models add an extra level of complexity: One has to provide the ability to store a collection of documents, annotate these documents, store the annotations, train a model and ultimately apply this model on unseen documents.", "It is clear then that ML models add an extra level of complexity: One has to provide the ability to store a collection of documents, annotate these documents, store the annotations, train a model and ultimately apply this model on unseen documents."], ["sentence", "", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 7539280031005994041, 11157353677670739305, 18446744073709551615, 18446744073709551615, 667, 776, 667, 776, 123, 143, true, "For the authors of this paper, it was therefore evident that our solution cannot be a monolithic application.", "For the authors of this paper, it was therefore evident that our solution cannot be a monolithic application."], ["sentence", "", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 12765583623534385435, 13069085147009366023, 18446744073709551615, 18446744073709551615, 777, 916, 777, 916, 143, 167, true, "It fits much better the concept of a cloud-based platform that can execute the previously mentioned tasks in an efficient and scalable way.", "It fits much better the concept of a cloud-based platform that can execute the previously mentioned tasks in an efficient and scalable way."], ["term", "enum-term-mark-1", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16627740256570112677, 4583225854042791398, 18446744073709551615, 18446744073709551615, 889, 915, 889, 915, 162, 166, true, "efficient and scalable way", "efficient and scalable way"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16649733772742282194, 16236977419730296615, 18446744073709551615, 18446744073709551615, 2, 22, 2, 22, 1, 3, true, "second discriminator", "second discriminator"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 41055560552253761, 8373548184892428504, 18446744073709551615, 18446744073709551615, 333, 346, 333, 346, 61, 63, true, "efficient way", "efficient way"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 7255057471482664248, 12334180322615590615, 18446744073709551615, 18446744073709551615, 383, 400, 383, 400, 70, 72, true, "ground-truth data", "ground-truth data"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6611502802514240854, 5277397327434795381, 18446744073709551615, 18446744073709551615, 440, 449, 440, 449, 81, 83, true, "ML models", "ML models"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 948606581867615032, 15713215180443530866, 18446744073709551615, 18446744073709551615, 457, 468, 457, 468, 85, 87, true, "extra level", "extra level"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 3290422095559676021, 14164410702381973396, 18446744073709551615, 18446744073709551615, 649, 665, 649, 665, 120, 122, true, "unseen documents", "unseen documents"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 2751272921550991289, 7405187680881990384, 18446744073709551615, 18446744073709551615, 753, 775, 753, 775, 140, 142, true, "monolithic application", "monolithic application"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 12206009578906402256, 4547981104066793380, 18446744073709551615, 18446744073709551615, 814, 834, 814, 834, 151, 153, true, "cloud-based platform", "cloud-based platform"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 4399570346043001090, 4520288225740080432, 18446744073709551615, 18446744073709551615, 903, 915, 903, 915, 164, 166, true, "scalable way", "scalable way"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6168765157982633013, 8701842085527306720, 18446744073709551615, 18446744073709551615, 44, 53, 44, 53, 6, 7, true, "solutions", "solutions"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104159242674854, 11255009296337504749, 18446744073709551615, 18446744073709551615, 94, 99, 94, 99, 16, 17, true, "tools", "tools"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161610777240, 6745146534538645369, 18446744073709551615, 18446744073709551615, 133, 138, 133, 138, 23, 24, true, "model", "model"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106397680705385749, 10181601809432141973, 18446744073709551615, 18446744073709551615, 197, 204, 197, 204, 38, 39, true, "ability", "ability"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14759757818438587716, 16662422774652150485, 18446744073709551615, 18446744073709551615, 215, 226, 215, 226, 41, 42, true, "collections", "collections"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6167933651658664291, 5250243073519619651, 18446744073709551615, 18446744073709551615, 230, 239, 230, 239, 43, 44, true, "documents", "documents"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106397680705385749, 10181601809432137128, 18446744073709551615, 18446744073709551615, 258, 265, 258, 265, 49, 50, true, "ability", "ability"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16381206523622331561, 13908633511487514086, 18446744073709551615, 18446744073709551615, 270, 276, 270, 276, 51, 52, true, "people", "people"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6167933651658664291, 5250243073519632368, 18446744073709551615, 18446744073709551615, 289, 298, 289, 298, 54, 55, true, "documents", "documents"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 1037258523789473353, 14806284881759797213, 18446744073709551615, 18446744073709551615, 315, 326, 315, 326, 58, 59, true, "annotations", "annotations"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 1037258523789473353, 14806284881759605125, 18446744073709551615, 18446744073709551615, 354, 365, 354, 365, 65, 66, true, "annotations", "annotations"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16381206567230470443, 12706729533498022281, 18446744073709551615, 18446744073709551615, 410, 416, 410, 416, 74, 75, true, "models", "models"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 2703018890300243966, 14454961743665027442, 18446744073709551615, 18446744073709551615, 472, 482, 472, 482, 88, 89, true, "complexity", "complexity"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106397680705385749, 10181601809432183454, 18446744073709551615, 18446744073709551615, 507, 514, 507, 514, 95, 96, true, "ability", "ability"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 2702984786539193186, 8321306199832412002, 18446744073709551615, 18446744073709551615, 526, 536, 526, 536, 99, 100, true, "collection", "collection"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6167933651658664291, 5250243073519635188, 18446744073709551615, 18446744073709551615, 540, 549, 540, 549, 101, 102, true, "documents", "documents"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6167933651658664291, 5250243073519124599, 18446744073709551615, 18446744073709551615, 566, 575, 566, 575, 105, 106, true, "documents", "documents"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 1037258523789473353, 14806284881759620361, 18446744073709551615, 18446744073709551615, 587, 598, 587, 598, 109, 110, true, "annotations", "annotations"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161610777240, 6745146534539012137, 18446744073709551615, 18446744073709551615, 608, 613, 608, 613, 113, 114, true, "model", "model"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161610777240, 6745146534539001905, 18446744073709551615, 18446744073709551615, 640, 645, 640, 645, 118, 119, true, "model", "model"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106397759446161562, 7436678281325725738, 18446744073709551615, 18446744073709551615, 675, 682, 675, 682, 125, 126, true, "authors", "authors"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161668023890, 5928708168625868047, 18446744073709551615, 18446744073709551615, 691, 696, 691, 696, 128, 129, true, "paper", "paper"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14635106751859230946, 3996697737790679411, 18446744073709551615, 18446744073709551615, 732, 740, 732, 740, 136, 137, true, "solution", "solution"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106398484423890147, 9444965948200230503, 18446744073709551615, 18446744073709551615, 801, 808, 801, 808, 148, 149, true, "concept", "concept"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104159214088329, 11255154458886190531, 18446744073709551615, 18446744073709551615, 877, 882, 877, 882, 159, 160, true, "tasks", "tasks"], ["verb", "compound-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 10428082093831915533, 17222226489144724218, 18446744073709551615, 18446744073709551615, 74, 89, 74, 89, 12, 15, true, "need to provide", "need to provide"], ["verb", "compound-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 7137504529039753077, 6726253915406803145, 18446744073709551615, 18446744073709551615, 139, 153, 139, 153, 24, 27, true, "can be trained", "can be trained"], ["verb", "compound-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 11484405948387455014, 6676833556353250435, 18446744073709551615, 18446744073709551615, 366, 379, 366, 379, 66, 69, true, "are then used", "are then used"], ["verb", "compound-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 13631356157997264976, 9759866056759424533, 18446744073709551615, 18446744073709551615, 488, 502, 488, 502, 91, 94, true, "has to provide", "has to provide"], ["verb", "compound-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 13060376269584473124, 17779441535114424202, 18446744073709551615, 18446744073709551615, 701, 714, 701, 714, 131, 133, true, "was therefore", "was therefore"], ["verb", "compound-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6187720399501537329, 12024430126344431887, 18446744073709551615, 18446744073709551615, 780, 789, 780, 789, 144, 146, true, "fits much", "fits much"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14652255875895390162, 58103428862529223, 18446744073709551615, 18446744073709551615, 35, 43, 35, 43, 5, 6, true, "existing", "existing"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541486535, 5881896978058135498, 18446744073709551615, 18446744073709551615, 63, 65, 63, 65, 9, 10, true, "is", "is"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16381206562264646932, 12839380256362217334, 18446744073709551615, 18446744073709551615, 103, 109, 103, 109, 18, 19, true, "gather", "gather"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541486853, 5881896976958836184, 18446744073709551615, 18446744073709551615, 182, 184, 182, 184, 34, 35, true, "do", "do"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 389609625621532398, 16554256739818959541, 18446744073709551615, 18446744073709551615, 188, 192, 188, 192, 36, 37, true, "need", "need"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16381206594265787492, 13689520830003550219, 18446744073709551615, 18446744073709551615, 208, 214, 208, 214, 40, 41, true, "manage", "manage"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 389609625621532398, 16554256739818932222, 18446744073709551615, 18446744073709551615, 249, 253, 249, 253, 47, 48, true, "need", "need"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14650452911780017077, 13628007533839560087, 18446744073709551615, 18446744073709551615, 280, 288, 280, 288, 53, 54, true, "annotate", "annotate"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161640489114, 5930309379214940791, 18446744073709551615, 18446744073709551615, 303, 308, 303, 308, 56, 57, true, "store", "store"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104159241569908, 11255026260054777739, 18446744073709551615, 18446744073709551615, 404, 409, 404, 409, 73, 74, true, "train", "train"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541486535, 5881896978058160307, 18446744073709551615, 18446744073709551615, 421, 423, 421, 423, 77, 78, true, "is", "is"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 12178341415895571674, 4440978130441335086, 18446744073709551615, 18446744073709551615, 450, 453, 450, 453, 83, 84, true, "add", "add"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161640489114, 5930309379214737079, 18446744073709551615, 18446744073709551615, 518, 523, 518, 523, 97, 98, true, "store", "store"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14650452911780017077, 13628007533839577361, 18446744073709551615, 18446744073709551615, 551, 559, 551, 559, 103, 104, true, "annotate", "annotate"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161640489114, 5930309379214724500, 18446744073709551615, 18446744073709551615, 577, 582, 577, 582, 107, 108, true, "store", "store"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104159241569908, 11255026260054879024, 18446744073709551615, 18446744073709551615, 600, 605, 600, 605, 111, 112, true, "train", "train"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104159174415764, 11322547402815110125, 18446744073709551615, 18446744073709551615, 629, 634, 629, 634, 116, 117, true, "apply", "apply"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 5949058521807306708, 12297004478926220295, 18446744073709551615, 18446744073709551615, 741, 750, 741, 750, 137, 139, true, "cannot be", "cannot be"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14892762526608873515, 2571932422246093124, 18446744073709551615, 18446744073709551615, 840, 851, 840, 851, 154, 156, true, "can execute", "can execute"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6182925164797550141, 11632110577094548091, 18446744073709551615, 18446744073709551615, 867, 876, 867, 876, 158, 159, true, "mentioned", "mentioned"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 3610960565517946112, 13846595853905053965, 18446744073709551615, 18446744073709551615, 715, 727, 715, 727, 133, 135, true, "evident that", "evident that"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 2011002864325523456, 11117571940292766640, 18446744073709551615, 18446744073709551615, 23, 34, 23, 34, 3, 5, true, "between the", "between the"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 389609625631229034, 16541888869203540071, 18446744073709551615, 18446744073709551615, 66, 70, 66, 70, 10, 11, true, "that", "that"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14635108304726888554, 4994234956068005418, 18446744073709551615, 18446744073709551615, 124, 132, 124, 132, 21, 23, true, "since no", "since no"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106477988668695541, 8919684696585615962, 18446744073709551615, 18446744073709551615, 154, 161, 154, 161, 27, 28, true, "without", "without"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485670, 5881896997604331986, 18446744073709551615, 18446744073709551615, 227, 229, 227, 229, 42, 43, true, "of", "of"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 12178341415895625940, 4441099339010746501, 18446744073709551615, 18446744073709551615, 266, 269, 266, 269, 50, 51, true, "for", "for"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161828310801, 6278535120429486187, 18446744073709551615, 18446744073709551615, 327, 332, 327, 332, 59, 61, true, "in an", "in an"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541487053, 5881896969451503173, 18446744073709551615, 18446744073709551615, 380, 382, 380, 382, 69, 70, true, "as", "as"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 389609625631229034, 16541888869203466950, 18446744073709551615, 18446744073709551615, 435, 439, 435, 439, 80, 81, true, "that", "that"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485670, 5881896997604348786, 18446744073709551615, 18446744073709551615, 469, 471, 469, 471, 87, 88, true, "of", "of"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485670, 5881896997581907497, 18446744073709551615, 18446744073709551615, 537, 539, 537, 539, 100, 101, true, "of", "of"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485678, 5881897000469726729, 18446744073709551615, 18446744073709551615, 646, 648, 646, 648, 119, 120, true, "on", "on"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106351438779293396, 11391757905802355543, 18446744073709551615, 18446744073709551615, 667, 674, 667, 674, 123, 125, true, "For the", "For the"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106342927224204628, 1463267918920609598, 18446744073709551615, 18446744073709551615, 683, 690, 683, 690, 126, 128, true, "of this", "of this"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 389609625620237736, 16539075808312872292, 18446744073709551615, 18446744073709551615, 809, 813, 809, 813, 149, 151, true, "of a", "of a"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161828310801, 6278535120429453110, 18446744073709551615, 18446744073709551615, 883, 888, 883, 888, 160, 162, true, "in an", "in an"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974655770, 18446744073709551615, 18446744073709551615, 79, 81, 79, 81, 13, 14, true, "to", "to"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974655477, 18446744073709551615, 18446744073709551615, 100, 102, 100, 102, 17, 18, true, "to", "to"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974647689, 18446744073709551615, 18446744073709551615, 205, 207, 205, 207, 39, 40, true, "to", "to"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974660107, 18446744073709551615, 18446744073709551615, 277, 279, 277, 279, 52, 53, true, "to", "to"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974652202, 18446744073709551615, 18446744073709551615, 401, 403, 401, 403, 72, 73, true, "to", "to"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974633331, 18446744073709551615, 18446744073709551615, 492, 494, 492, 494, 92, 93, true, "to", "to"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974643919, 18446744073709551615, 18446744073709551615, 515, 517, 515, 517, 96, 97, true, "to", "to"], ["numval", "fval", 4203835122307823579, "TEXT", "#/texts/24", 1.0, 12178341415896435198, 13889986935520845304, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "3.1", "3.1"], ["parenthesis", "round brackets", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 8772307426918626408, 4362940361408460484, 18446744073709551615, 18446744073709551615, 152, 230, 152, 230, 27, 42, true, "(scanned or programmatically created PDF, bitmap images, Word documents, etc.)", "(scanned or programmatically created PDF, bitmap images, Word documents, etc.)"], ["parenthesis", "round brackets", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 10173882594842541874, 14046368793434208065, 18446744073709551615, 18446744073709551615, 261, 279, 261, 279, 47, 53, true, "(e.g. JSON or XML)", "(e.g. JSON or XML)"], ["expression", "common", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 15441160910541487324, 1240198994439187449, 18446744073709551615, 18446744073709551615, 262, 266, 262, 266, 48, 49, true, "eg", "e.g."], ["expression", "common", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 12178341415895450733, 13632414567216290152, 18446744073709551615, 18446744073709551615, 225, 229, 225, 229, 40, 41, true, "etc", "etc."], ["sentence", "", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 12910497814715733387, 16125145498285443347, 18446744073709551615, 18446744073709551615, 0, 280, 0, 280, 0, 54, true, "Our platform implements a processing pipeline to ingest, manage, parse, annotate, train and eventually convert the data contained in any type of format (scanned or programmatically created PDF, bitmap images, Word documents, etc.) into a structured data format (e.g. JSON or XML).", "Our platform implements a processing pipeline to ingest, manage, parse, annotate, train and eventually convert the data contained in any type of format (scanned or programmatically created PDF, bitmap images, Word documents, etc.) into a structured data format (e.g. JSON or XML)."], ["term", "enum-term-mark-4", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 11674491770136657522, 9761837904635132795, 18446744073709551615, 18446744073709551615, 267, 278, 267, 278, 49, 52, true, "JSON or XML", "JSON or XML"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 15641968049220564486, 3104330311046848316, 18446744073709551615, 18446744073709551615, 26, 45, 26, 45, 4, 6, true, "processing pipeline", "processing pipeline"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 7850715239909526655, 10564541552852027327, 18446744073709551615, 18446744073709551615, 194, 207, 194, 207, 34, 36, true, "bitmap images", "bitmap images"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 3850832741059734738, 14883287060771742157, 18446744073709551615, 18446744073709551615, 209, 223, 209, 223, 37, 39, true, "Word documents", "Word documents"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 18077282349116974352, 7474364917469372700, 18446744073709551615, 18446744073709551615, 238, 260, 238, 260, 44, 47, true, "structured data format", "structured data format"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 8106398377759843082, 9276542563935260018, 18446744073709551615, 18446744073709551615, 262, 271, 262, 271, 48, 50, true, "eg JSON", "e.g. JSON"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 14814125365076808131, 4535720155345020195, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 1, 2, true, "platform", "platform"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 329104161667983915, 17161321767071624063, 18446744073709551615, 18446744073709551615, 65, 70, 65, 70, 11, 12, true, "parse", "parse"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 14650452911780017077, 6687850948757700034, 18446744073709551615, 18446744073709551615, 72, 80, 72, 80, 13, 14, true, "annotate", "annotate"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 329104159241569908, 14824530430944127370, 18446744073709551615, 18446744073709551615, 82, 87, 82, 87, 15, 16, true, "train", "train"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 389609625696431489, 1561240778132274355, 18446744073709551615, 18446744073709551615, 115, 119, 115, 119, 20, 21, true, "data", "data"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 389609625631434316, 1567151555792390751, 18446744073709551615, 18446744073709551615, 137, 141, 137, 141, 24, 25, true, "type", "type"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 16381206548538896813, 9505947236812447429, 18446744073709551615, 18446744073709551615, 145, 151, 145, 151, 26, 27, true, "format", "format"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 12178341415896289890, 13633304747499991014, 18446744073709551615, 18446744073709551615, 189, 192, 189, 192, 32, 33, true, "PDF", "PDF"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 12178341415895541463, 13632419820854921580, 18446744073709551615, 18446744073709551615, 275, 278, 275, 278, 51, 52, true, "XML", "XML"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 5584174880054122043, 18300515772242224763, 18446744073709551615, 18446744073709551615, 13, 23, 13, 23, 2, 3, true, "implements", "implements"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 16381206560503286032, 10754571117046369273, 18446744073709551615, 18446744073709551615, 49, 55, 49, 55, 7, 8, true, "ingest", "ingest"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 16381206594265787492, 15598034062765456434, 18446744073709551615, 18446744073709551615, 57, 63, 57, 63, 9, 10, true, "manage", "manage"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 8106398484416229602, 1704556939207365093, 18446744073709551615, 18446744073709551615, 103, 110, 103, 110, 18, 19, true, "convert", "convert"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 5947879769709188533, 17603125067012762843, 18446744073709551615, 18446744073709551615, 120, 129, 120, 129, 21, 22, true, "contained", "contained"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 8106478648743879659, 297744879721386987, 18446744073709551615, 18446744073709551615, 153, 160, 153, 160, 28, 29, true, "scanned", "scanned"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 8106398513399298373, 11748900618323850732, 18446744073709551615, 18446744073709551615, 181, 188, 181, 188, 31, 32, true, "created", "created"], ["conn", "single-conn", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 16381206560519231294, 10754552323754925291, 18446744073709551615, 18446744073709551615, 130, 136, 130, 136, 22, 24, true, "in any", "in any"], ["conn", "single-conn", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 15441160910541485670, 1240196378682103108, 18446744073709551615, 18446744073709551615, 142, 144, 142, 144, 25, 26, true, "of", "of"], ["conn", "single-conn", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 16381206560517276114, 10755905799470375423, 18446744073709551615, 18446744073709551615, 231, 237, 231, 237, 42, 44, true, "into a", "into a"], ["conn", "single-conn", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 15441160910541485865, 1240198973604396181, 18446744073709551615, 18446744073709551615, 46, 48, 46, 48, 6, 7, true, "to", "to"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235161, 16653745466189901500, 18446744073709551615, 18446744073709551615, 76, 77, 76, 77, 12, 13, true, "1", "1"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235161, 16653745466189901235, 18446744073709551615, 18446744073709551615, 80, 81, 80, 81, 15, 16, true, "1", "1"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235162, 16653745466240377931, 18446744073709551615, 18446744073709551615, 147, 148, 147, 148, 29, 30, true, "2", "2"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235163, 16653745466559202271, 18446744073709551615, 18446744073709551615, 208, 209, 208, 209, 40, 41, true, "3", "3"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235156, 16653745470875858812, 18446744073709551615, 18446744073709551615, 262, 263, 262, 263, 51, 52, true, "4", "4"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235157, 16653745466591923593, 18446744073709551615, 18446744073709551615, 299, 300, 299, 300, 60, 61, true, "5", "5"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235161, 16653745466190505939, 18446744073709551615, 18446744073709551615, 409, 410, 409, 410, 80, 81, true, "1", "1"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235156, 16653745470875849461, 18446744073709551615, 18446744073709551615, 412, 413, 412, 413, 82, 83, true, "4", "4"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235157, 16653745466591931154, 18446744073709551615, 18446744073709551615, 418, 419, 418, 419, 84, 85, true, "5", "5"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235162, 16653745466240134538, 18446744073709551615, 18446744073709551615, 558, 559, 558, 559, 107, 108, true, "2", "2"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235163, 16653745466559183526, 18446744073709551615, 18446744073709551615, 564, 565, 564, 565, 109, 110, true, "3", "3"], ["parenthesis", "reference", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415896395122, 9951911260307984303, 18446744073709551615, 18446744073709551615, 79, 82, 79, 82, 14, 17, true, "(1)", "(1)"], ["parenthesis", "reference", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415896395187, 9951911292760260815, 18446744073709551615, 18446744073709551615, 146, 149, 146, 149, 28, 31, true, "(2)", "(2)"], ["parenthesis", "reference", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415896394992, 9951911291908364343, 18446744073709551615, 18446744073709551615, 207, 210, 207, 210, 39, 42, true, "(3)", "(3)"], ["parenthesis", "reference", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415896395057, 9951911280683983222, 18446744073709551615, 18446744073709551615, 261, 264, 261, 264, 50, 53, true, "(4)", "(4)"], ["parenthesis", "reference", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415896395383, 9951911289823187961, 18446744073709551615, 18446744073709551615, 298, 301, 298, 301, 59, 62, true, "(5)", "(5)"], ["expression", "word-concatenation", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 3753411203337468488, 9177120008899156041, 18446744073709551615, 18446744073709551615, 174, 186, 174, 186, 35, 36, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 7955973489010605030, 9831476142603144664, 18446744073709551615, 18446744073709551615, 463, 480, 463, 480, 94, 95, true, "template-specific", "template-specific"], ["expression", "word-concatenation", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 3753411203337468488, 9177120008899393700, 18446744073709551615, 18446744073709551615, 594, 606, 594, 606, 116, 117, true, "ground-truth", "ground-truth"], ["expression", "wtoken-concatenation", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14638289750758744304, 11302210173902484934, 18446744073709551615, 18446744073709551615, 288, 296, 288, 296, 57, 58, true, "model(s)", "model(s)"], ["expression", "wtoken-concatenation", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 1476026390672618576, 10107149952848687202, 18446744073709551615, 18446744073709551615, 317, 328, 317, 328, 64, 65, true, "document(s)", "document(s)"], ["sentence", "", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17525205624056980079, 17040712729727507001, 18446744073709551615, 18446744073709551615, 0, 359, 0, 359, 0, 71, true, "This processing pipeline is formed by five components as depicted in Figure 1: (1) parsing of documents into an internal format optimised for ML, (2) Annotation of the label ground-truth in parsed documents (3) training ML models from the acquired annotations, (4) applying the custom ML model(s), (5) assembling the document(s) into a structured data format.", "This processing pipeline is formed by five components as depicted in Figure 1: (1) parsing of documents into an internal format optimised for ML, (2) Annotation of the label ground-truth in parsed documents (3) training ML models from the acquired annotations, (4) applying the custom ML model(s), (5) assembling the document(s) into a structured data format."], ["sentence", "", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 9237548311282946795, 9741605394929262654, 18446744073709551615, 18446744073709551615, 360, 456, 360, 456, 71, 92, true, "If a trained model is available, only components 1, 4 and 5 are needed to convert the documents.", "If a trained model is available, only components 1, 4 and 5 are needed to convert the documents."], ["sentence", "", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6944966380083130595, 5696095880668338595, 18446744073709551615, 18446744073709551615, 457, 631, 457, 631, 92, 122, true, "If no template-specific machine learned model is available yet, we provide two additional components 2 and 3, that allow users to gather ground-truth and train custom models.", "If no template-specific machine learned model is available yet, we provide two additional components 2 and 3, that allow users to gather ground-truth and train custom models."], ["sentence", "", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14539564690542389125, 5540857596080310327, 18446744073709551615, 18446744073709551615, 632, 799, 632, 799, 122, 153, true, "It is important to note that the platform comes with default models, so annotation and training are advised to retrieve the best quality output, yet they are optional.", "It is important to note that the platform comes with default models, so annotation and training are advised to retrieve the best quality output, yet they are optional."], ["term", "enum-term-mark-2", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 5647280319556365100, 12073018748361790650, 18446744073709551615, 18446744073709551615, 704, 727, 704, 727, 136, 139, true, "annotation and training", "annotation and training"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15641968049220564486, 15184810223045738752, 18446744073709551615, 18446744073709551615, 5, 24, 5, 24, 1, 3, true, "processing pipeline", "processing pipeline"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15437135447951449642, 4518564177144642967, 18446744073709551615, 18446744073709551615, 112, 127, 112, 127, 22, 24, true, "internal format", "internal format"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 843540523508195469, 1009968651563304041, 18446744073709551615, 18446744073709551615, 168, 186, 168, 186, 34, 36, true, "label ground-truth", "label ground-truth"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15116355445244524512, 7416607603607839333, 18446744073709551615, 18446744073709551615, 190, 206, 190, 206, 37, 39, true, "parsed documents", "parsed documents"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 7507517937852582487, 6078294739813943992, 18446744073709551615, 18446744073709551615, 211, 229, 211, 229, 42, 45, true, "training ML models", "training ML models"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 2737429882297243447, 13059840081028432667, 18446744073709551615, 18446744073709551615, 278, 296, 278, 296, 55, 58, true, "custom ML model(s)", "custom ML model(s)"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 18077282349116974352, 2593464952453242974, 18446744073709551615, 18446744073709551615, 336, 358, 336, 358, 67, 70, true, "structured data format", "structured data format"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 7516486339055917967, 10294393908560702715, 18446744073709551615, 18446744073709551615, 365, 378, 365, 378, 73, 75, true, "trained model", "trained model"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15134872732861191546, 14564992985051477580, 18446744073709551615, 18446744073709551615, 393, 408, 393, 408, 78, 80, true, "only components", "only components"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 10030042203366342768, 783598633112047048, 18446744073709551615, 18446744073709551615, 463, 488, 463, 488, 94, 96, true, "template-specific machine", "template-specific machine"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 9482497685613336800, 7352283876107266683, 18446744073709551615, 18446744073709551615, 536, 557, 536, 557, 105, 107, true, "additional components", "additional components"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6402004976666964284, 6933100789420329365, 18446744073709551615, 18446744073709551615, 611, 630, 611, 630, 118, 121, true, "train custom models", "train custom models"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 1915006193249717419, 16014958744068977698, 18446744073709551615, 18446744073709551615, 685, 699, 685, 699, 132, 134, true, "default models", "default models"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 16984543413455913769, 17986818399034101927, 18446744073709551615, 18446744073709551615, 756, 775, 756, 775, 144, 147, true, "best quality output", "best quality output"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 2703018952916355661, 13861502101887545388, 18446744073709551615, 18446744073709551615, 43, 53, 43, 53, 7, 8, true, "components", "components"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 16381206514091025767, 4885222411348045849, 18446744073709551615, 18446744073709551615, 69, 75, 69, 75, 11, 12, true, "Figure", "Figure"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 8106479143794098783, 2962541264367803251, 18446744073709551615, 18446744073709551615, 83, 90, 83, 90, 17, 18, true, "parsing", "parsing"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6167933651658664291, 11376723037997544694, 18446744073709551615, 18446744073709551615, 94, 103, 94, 103, 19, 20, true, "documents", "documents"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541480579, 13421765588580713285, 18446744073709551615, 18446744073709551615, 142, 144, 142, 144, 26, 27, true, "ML", "ML"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 7552769977713241504, 5898699418083397020, 18446744073709551615, 18446744073709551615, 150, 160, 150, 160, 31, 32, true, "Annotation", "Annotation"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 1037258523789473353, 7580283139140321310, 18446744073709551615, 18446744073709551615, 248, 259, 248, 259, 48, 49, true, "annotations", "annotations"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 1476026390672618576, 10107149952848687202, 18446744073709551615, 18446744073709551615, 317, 328, 317, 328, 64, 65, true, "document(s)", "document(s)"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6167933651658664291, 11376723037997543326, 18446744073709551615, 18446744073709551615, 446, 455, 446, 455, 90, 91, true, "documents", "documents"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 329104161610777240, 2839511462594090084, 18446744073709551615, 18446744073709551615, 497, 502, 497, 502, 97, 98, true, "model", "model"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 329104159157820437, 8191643342315584183, 18446744073709551615, 18446744073709551615, 578, 583, 578, 583, 113, 114, true, "users", "users"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14814125365076808131, 9337596647297514490, 18446744073709551615, 18446744073709551615, 665, 673, 665, 673, 129, 130, true, "platform", "platform"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15359807916847495711, 1545007371912531857, 18446744073709551615, 18446744073709551615, 704, 714, 704, 714, 136, 137, true, "annotation", "annotation"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14634153919632515335, 1746269937003899312, 18446744073709551615, 18446744073709551615, 719, 727, 719, 727, 138, 139, true, "training", "training"], ["verb", "compound-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6181919778911650791, 1457658887135264263, 18446744073709551615, 18446744073709551615, 25, 34, 25, 34, 3, 5, true, "is formed", "is formed"], ["verb", "compound-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6594130350774790903, 16739553497580608789, 18446744073709551615, 18446744073709551615, 420, 441, 420, 441, 85, 89, true, "are needed to convert", "are needed to convert"], ["verb", "compound-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 709385872197115477, 4590124741288282676, 18446744073709551615, 18446744073709551615, 728, 751, 728, 751, 139, 143, true, "are advised to retrieve", "are advised to retrieve"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14652261792406569736, 13894514311356629389, 18446744073709551615, 18446744073709551615, 57, 65, 57, 65, 9, 10, true, "depicted", "depicted"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6167805666845656469, 5420622691594517541, 18446744073709551615, 18446744073709551615, 128, 137, 128, 137, 24, 25, true, "optimised", "optimised"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14650442552334623127, 8421427627003326802, 18446744073709551615, 18446744073709551615, 239, 247, 239, 247, 47, 48, true, "acquired", "acquired"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14650448030444381648, 18239180096597721029, 18446744073709551615, 18446744073709551615, 265, 273, 265, 273, 53, 54, true, "applying", "applying"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 5615554093848987331, 12626781818686368526, 18446744073709551615, 18446744073709551615, 302, 312, 302, 312, 62, 63, true, "assembling", "assembling"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541486535, 13421773584933919664, 18446744073709551615, 18446744073709551615, 379, 381, 379, 381, 75, 76, true, "is", "is"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 8106342444693204894, 10008695562509568866, 18446744073709551615, 18446744073709551615, 489, 496, 489, 496, 96, 97, true, "learned", "learned"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541486535, 13421773584933943769, 18446744073709551615, 18446744073709551615, 503, 505, 503, 505, 98, 99, true, "is", "is"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 8106476000214061408, 15857118004815849675, 18446744073709551615, 18446744073709551615, 524, 531, 524, 531, 103, 104, true, "provide", "provide"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 329104159171192019, 8192566990508309340, 18446744073709551615, 18446744073709551615, 572, 577, 572, 577, 112, 113, true, "allow", "allow"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 16381206562264646932, 1960462029052909015, 18446744073709551615, 18446744073709551615, 587, 593, 587, 593, 115, 116, true, "gather", "gather"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541486535, 13421773584933772912, 18446744073709551615, 18446744073709551615, 635, 637, 635, 637, 123, 124, true, "is", "is"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 389609625621163440, 7945956557482740818, 18446744073709551615, 18446744073709551615, 651, 655, 651, 655, 126, 127, true, "note", "note"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 329104161555284808, 2777260180261771726, 18446744073709551615, 18446744073709551615, 674, 679, 674, 679, 130, 131, true, "comes", "comes"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415895564896, 9951910993348590293, 18446744073709551615, 18446744073709551615, 786, 789, 786, 789, 150, 151, true, "are", "are"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541486989, 13421773491343548165, 18446744073709551615, 18446744073709551615, 35, 37, 35, 37, 5, 6, true, "by", "by"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541487053, 13421773623307651151, 18446744073709551615, 18446744073709551615, 54, 56, 54, 56, 8, 9, true, "as", "as"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541486538, 13421773580625251915, 18446744073709551615, 18446744073709551615, 66, 68, 66, 68, 10, 11, true, "in", "in"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541485670, 13421765268538655208, 18446744073709551615, 18446744073709551615, 91, 93, 91, 93, 18, 19, true, "of", "of"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 8106398347393280713, 1525760500098149661, 18446744073709551615, 18446744073709551615, 104, 111, 104, 111, 20, 22, true, "into an", "into an"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415895625940, 9951862331881338732, 18446744073709551615, 18446744073709551615, 138, 141, 138, 141, 25, 26, true, "for", "for"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 16381206565712212855, 8780603472327557404, 18446744073709551615, 18446744073709551615, 161, 167, 161, 167, 32, 34, true, "of the", "of the"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541486538, 13421773580625308724, 18446744073709551615, 18446744073709551615, 187, 189, 187, 189, 36, 37, true, "in", "in"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14637917359887717745, 4284944502519852047, 18446744073709551615, 18446744073709551615, 230, 238, 230, 238, 45, 47, true, "from the", "from the"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 16381206560517276114, 690701761066186570, 18446744073709551615, 18446744073709551615, 329, 335, 329, 335, 65, 67, true, "into a", "into a"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 389609625538087702, 7941525277449308498, 18446744073709551615, 18446744073709551615, 360, 364, 360, 364, 71, 73, true, "If a", "If a"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 329104161875330307, 1829521586099700438, 18446744073709551615, 18446744073709551615, 457, 462, 457, 462, 92, 94, true, "If no", "If no"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14634130761162415388, 3953663377257032858, 18446744073709551615, 18446744073709551615, 656, 664, 656, 664, 127, 129, true, "that the", "that the"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 389609625618037948, 7945948305478382603, 18446744073709551615, 18446744073709551615, 680, 684, 680, 684, 131, 132, true, "with", "with"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541485930, 13421773574584698862, 18446744073709551615, 18446744073709551615, 701, 703, 701, 703, 135, 136, true, "so", "so"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541485865, 13421773580077175219, 18446744073709551615, 18446744073709551615, 431, 433, 431, 433, 87, 88, true, "to", "to"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541485865, 13421773580077214041, 18446744073709551615, 18446744073709551615, 584, 586, 584, 586, 114, 115, true, "to", "to"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541485865, 13421773580077226473, 18446744073709551615, 18446744073709551615, 648, 650, 648, 650, 125, 126, true, "to", "to"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541485865, 13421773580077221700, 18446744073709551615, 18446744073709551615, 740, 742, 740, 742, 141, 142, true, "to", "to"], ["sentence", "", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 10456209429844276823, 2422727482681724013, 18446744073709551615, 18446744073709551615, 0, 93, 0, 93, 0, 19, true, "Let us now elaborate on what each of the five components deliver in the rest of this section.", "Let us now elaborate on what each of the five components deliver in the rest of this section."], ["term", "single-term", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 2703018952916355661, 9708196146755666277, 18446744073709551615, 18446744073709551615, 46, 56, 46, 56, 10, 11, true, "components", "components"], ["term", "single-term", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 389609625632792118, 4197781341925173653, 18446744073709551615, 18446744073709551615, 72, 76, 72, 76, 14, 15, true, "rest", "rest"], ["term", "single-term", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 8106478708629288965, 3081332499878802976, 18446744073709551615, 18446744073709551615, 85, 92, 85, 92, 17, 18, true, "section", "section"], ["verb", "single-verb", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 12178341415896275389, 18145395793309844548, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "Let", "Let"], ["verb", "single-verb", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 6165947860431677587, 15989194838257465182, 18446744073709551615, 18446744073709551615, 11, 20, 11, 20, 3, 4, true, "elaborate", "elaborate"], ["verb", "single-verb", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 8106396542836595001, 13259224101982934496, 18446744073709551615, 18446744073709551615, 57, 64, 57, 64, 11, 12, true, "deliver", "deliver"], ["conn", "single-conn", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 15441160910541485678, 6207367339164111129, 18446744073709551615, 18446744073709551615, 21, 23, 21, 23, 4, 5, true, "on", "on"], ["conn", "single-conn", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 2283199098925706958, 7907397599804430129, 18446744073709551615, 18446744073709551615, 29, 40, 29, 40, 6, 9, true, "each of the", "each of the"], ["conn", "single-conn", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 16381206560518651853, 14564747107754323096, 18446744073709551615, 18446744073709551615, 65, 71, 65, 71, 12, 14, true, "in the", "in the"], ["conn", "single-conn", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 8106342927224204628, 9005811432893922380, 18446744073709551615, 18446744073709551615, 77, 84, 77, 84, 15, 17, true, "of this", "of this"], ["numval", "fval", 15403141463083979171, "TEXT", "#/texts/28", 1.0, 12178341415896435199, 15281646599530735231, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "3.2", "3.2"], ["numval", "ival", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 17767354399704235162, 1424727048414549116, 18446744073709551615, 18446744073709551615, 590, 591, 590, 591, 108, 109, true, "2", "2"], ["expression", "word-concatenation", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 7265216487325416347, 18237727768635437499, 18446744073709551615, 18446744073709551615, 85, 96, 85, 96, 14, 15, true, "non-trivial", "non-trivial"], ["expression", "word-concatenation", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 2334432749592536458, 15832688675868318655, 18446744073709551615, 18446744073709551615, 134, 147, 134, 147, 23, 24, true, "text-snippets", "text-snippets"], ["expression", "word-concatenation", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 2334432749592536458, 15832688675868348537, 18446744073709551615, 18446744073709551615, 237, 250, 237, 250, 43, 44, true, "text-snippets", "text-snippets"], ["sentence", "", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 17930569148916488857, 5505536758637995970, 18446744073709551615, 18446744073709551615, 0, 177, 0, 177, 0, 31, true, "In the parsing component of the pipeline, we solve the following straightforward but non-trivial task: Find the bounding boxes of all text-snippets that appear on each PDF page.", "In the parsing component of the pipeline, we solve the following straightforward but non-trivial task: Find the bounding boxes of all text-snippets that appear on each PDF page."], ["sentence", "", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 5655297151045106724, 13262557470809669287, 18446744073709551615, 18446744073709551615, 178, 290, 178, 290, 31, 53, true, "For simplicity, we will refer to the bounding boxes of the text-snippets as cells in the remainder of the paper.", "For simplicity, we will refer to the bounding boxes of the text-snippets as cells in the remainder of the paper."], ["sentence", "", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14442010922272755354, 2304293999158723749, 18446744073709551615, 18446744073709551615, 291, 350, 291, 350, 53, 65, true, "There are two reasons why we are interested in these cells.", "There are two reasons why we are interested in these cells."], ["sentence", "", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 7272978202358758349, 9961870530541141609, 18446744073709551615, 18446744073709551615, 351, 501, 351, 501, 65, 91, true, "First, they provide us with the crucial geometric features which are later used in the machine learning models to determine the layout semantic label.", "First, they provide us with the crucial geometric features which are later used in the machine learning models to determine the layout semantic label."], ["sentence", "", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 13416184063657893995, 5559344282962348298, 18446744073709551615, 18446744073709551615, 502, 579, 502, 579, 91, 106, true, "Second, the concept of a cell can be easily transferred to scanned documents.", "Second, the concept of a cell can be easily transferred to scanned documents."], ["sentence", "", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 1652403861242351933, 11439629794277621034, 18446744073709551615, 18446744073709551615, 580, 669, 580, 669, 106, 125, true, "In Figure 2, we show the cells obtained from an example PDF page after the parsing stage.", "In Figure 2, we show the cells obtained from an example PDF page after the parsing stage."], ["term", "enum-term-mark-1", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14297716267651363259, 16404864194219706557, 18446744073709551615, 18446744073709551615, 65, 101, 65, 101, 12, 16, true, "straightforward but non-trivial task", "straightforward but non-trivial task"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 539708993415547268, 4705216367729054225, 18446744073709551615, 18446744073709551615, 7, 24, 7, 24, 2, 4, true, "parsing component", "parsing component"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 12768482189442203961, 3521963780408719451, 18446744073709551615, 18446744073709551615, 85, 101, 85, 101, 14, 16, true, "non-trivial task", "non-trivial task"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14650937348812924036, 7972671100203928321, 18446744073709551615, 18446744073709551615, 168, 176, 168, 176, 28, 30, true, "PDF page", "PDF page"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 13453730679222803232, 6614428335010656559, 18446744073709551615, 18446744073709551615, 383, 409, 383, 409, 72, 75, true, "crucial geometric features", "crucial geometric features"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 2317020437411802284, 13633764960020459386, 18446744073709551615, 18446744073709551615, 479, 500, 479, 500, 87, 90, true, "layout semantic label", "layout semantic label"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 6638406718593592815, 12534367881860700133, 18446744073709551615, 18446744073709551615, 628, 644, 628, 644, 117, 120, true, "example PDF page", "example PDF page"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 17743353868824691761, 7006821731505867769, 18446744073709551615, 18446744073709551615, 655, 668, 655, 668, 122, 124, true, "parsing stage", "parsing stage"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14814125852840540191, 10333222397369262494, 18446744073709551615, 18446744073709551615, 32, 40, 32, 40, 6, 7, true, "pipeline", "pipeline"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104159325617355, 2319011674175919451, 18446744073709551615, 18446744073709551615, 121, 126, 121, 126, 20, 21, true, "boxes", "boxes"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 2334432749592536458, 15832688675868318655, 18446744073709551615, 18446744073709551615, 134, 147, 134, 147, 23, 24, true, "text-snippets", "text-snippets"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14087388443212978183, 8710122583887711946, 18446744073709551615, 18446744073709551615, 182, 192, 182, 192, 32, 33, true, "simplicity", "simplicity"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104159325617355, 2319011674175930307, 18446744073709551615, 18446744073709551615, 224, 229, 224, 229, 40, 41, true, "boxes", "boxes"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 2334432749592536458, 15832688675868348537, 18446744073709551615, 18446744073709551615, 237, 250, 237, 250, 43, 44, true, "text-snippets", "text-snippets"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104161531686411, 13181154022391063384, 18446744073709551615, 18446744073709551615, 254, 259, 254, 259, 45, 46, true, "cells", "cells"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 6165970943308474352, 2869268154850983474, 18446744073709551615, 18446744073709551615, 267, 276, 267, 276, 48, 49, true, "remainder", "remainder"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104161668023890, 13177965549816359198, 18446744073709551615, 18446744073709551615, 284, 289, 284, 289, 51, 52, true, "paper", "paper"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 8106478449187889361, 2785113865213804946, 18446744073709551615, 18446744073709551615, 305, 312, 305, 312, 56, 57, true, "reasons", "reasons"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104161531686411, 13181154022391052173, 18446744073709551615, 18446744073709551615, 344, 349, 344, 349, 63, 64, true, "cells", "cells"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 8106464587473865376, 17953291432149571565, 18446744073709551615, 18446744073709551615, 438, 445, 438, 445, 81, 82, true, "machine", "machine"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206567230470443, 12165568246676867373, 18446744073709551615, 18446744073709551615, 455, 461, 455, 461, 83, 84, true, "models", "models"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 8106398484423890147, 17423301407248305912, 18446744073709551615, 18446744073709551615, 514, 521, 514, 521, 94, 95, true, "concept", "concept"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 389609625696024605, 9216907839507590200, 18446744073709551615, 18446744073709551615, 527, 531, 527, 531, 97, 98, true, "cell", "cell"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 6167933651658664291, 13123759922164485441, 18446744073709551615, 18446744073709551615, 569, 578, 569, 578, 104, 105, true, "documents", "documents"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206514091025767, 13570160145101069077, 18446744073709551615, 18446744073709551615, 583, 589, 583, 589, 107, 108, true, "Figure", "Figure"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104161531686411, 13181154022391035920, 18446744073709551615, 18446744073709551615, 605, 610, 605, 610, 113, 114, true, "cells", "cells"], ["verb", "compound-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 4427434396064538754, 12127654283877660135, 18446744073709551615, 18446744073709551615, 197, 210, 197, 210, 35, 38, true, "will refer to", "will refer to"], ["verb", "compound-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 6724950217998665324, 17396052864677910526, 18446744073709551615, 18446744073709551615, 416, 430, 416, 430, 76, 79, true, "are later used", "are later used"], ["verb", "compound-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 10525416859123113492, 14679514170366798433, 18446744073709551615, 18446744073709551615, 532, 568, 532, 568, 98, 104, true, "can be easily transferred to scanned", "can be easily transferred to scanned"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104161785912251, 13150765894503023181, 18446744073709551615, 18446744073709551615, 45, 50, 45, 50, 9, 10, true, "solve", "solve"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 6187675911162887333, 9639461483670525942, 18446744073709551615, 18446744073709551615, 55, 64, 55, 64, 11, 12, true, "following", "following"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 389609625538336045, 8446862929409352327, 18446744073709551615, 18446744073709551615, 103, 107, 103, 107, 17, 18, true, "Find", "Find"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14652253380850532610, 107046829378636663, 18446744073709551615, 18446744073709551615, 112, 120, 112, 120, 19, 20, true, "bounding", "bounding"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206574684919940, 5616786163860063725, 18446744073709551615, 18446744073709551615, 153, 159, 153, 159, 25, 26, true, "appear", "appear"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14652253380850532610, 107046829378695267, 18446744073709551615, 18446744073709551615, 215, 223, 215, 223, 39, 40, true, "bounding", "bounding"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 12178341415895564896, 14915893524086030278, 18446744073709551615, 18446744073709551615, 297, 300, 297, 300, 54, 55, true, "are", "are"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 12178341415895564896, 14915893524086019548, 18446744073709551615, 18446744073709551615, 320, 323, 320, 323, 59, 60, true, "are", "are"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 8106476000214061408, 4097314869569959791, 18446744073709551615, 18446744073709551615, 363, 370, 363, 370, 68, 69, true, "provide", "provide"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14639581097006750428, 8481916445851174431, 18446744073709551615, 18446744073709551615, 446, 454, 446, 454, 82, 83, true, "learning", "learning"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 6180169261969955564, 18129286266821845731, 18446744073709551615, 18446744073709551615, 465, 474, 465, 474, 85, 86, true, "determine", "determine"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 389609625741152123, 9215016449560601709, 18446744073709551615, 18446744073709551615, 596, 600, 596, 600, 111, 112, true, "show", "show"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14814126654807168093, 5783395789111073227, 18446744073709551615, 18446744073709551615, 611, 619, 611, 619, 114, 115, true, "obtained", "obtained"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 3083497604064482481, 11203131312199197259, 18446744073709551615, 18446744073709551615, 324, 337, 324, 337, 60, 62, true, "interested in", "interested in"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16380809977974811061, 446265425895612346, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 2, true, "In the", "In the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206565712212855, 12113567223054660961, 18446744073709551615, 18446744073709551615, 25, 31, 25, 31, 4, 6, true, "of the", "of the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206565712007226, 12113589112466741738, 18446744073709551615, 18446744073709551615, 127, 133, 127, 133, 21, 23, true, "of all", "of all"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 8106342614185119603, 14378991548749122310, 18446744073709551615, 18446744073709551615, 160, 167, 160, 167, 26, 28, true, "on each", "on each"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 12178341415896108722, 14915809969954693677, 18446744073709551615, 18446744073709551615, 178, 181, 178, 181, 31, 32, true, "For", "For"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206565712212855, 12113567223054649064, 18446744073709551615, 18446744073709551615, 230, 236, 230, 236, 41, 43, true, "of the", "of the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 15441160910541487053, 14737048381530263484, 18446744073709551615, 18446744073709551615, 251, 253, 251, 253, 44, 45, true, "as", "as"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206560518651853, 12195079447811016074, 18446744073709551615, 18446744073709551615, 260, 266, 260, 266, 46, 48, true, "in the", "in the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206565712212855, 12113567223054608798, 18446744073709551615, 18446744073709551615, 277, 283, 277, 283, 49, 51, true, "of the", "of the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14638857868319795209, 699324563274466024, 18446744073709551615, 18446744073709551615, 374, 382, 374, 382, 70, 72, true, "with the", "with the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206560518651853, 12195079447810969129, 18446744073709551615, 18446744073709551615, 431, 437, 431, 437, 79, 81, true, "in the", "in the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 389609625620237736, 9181237261281377861, 18446744073709551615, 18446744073709551615, 522, 526, 522, 526, 95, 97, true, "of a", "of a"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 15441160910541480354, 14736971653023235653, 18446744073709551615, 18446744073709551615, 580, 582, 580, 582, 106, 107, true, "In", "In"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 8106397740404304256, 9502999072153915614, 18446744073709551615, 18446744073709551615, 620, 627, 620, 627, 115, 117, true, "from an", "from an"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 5948794771153674476, 3941442301932288916, 18446744073709551615, 18446744073709551615, 645, 654, 645, 654, 120, 122, true, "after the", "after the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206519425733256, 11107177216059411514, 18446744073709551615, 18446744073709551615, 208, 214, 208, 214, 37, 39, true, "to the", "to the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 15441160910541485865, 14736949766705825112, 18446744073709551615, 18446744073709551615, 462, 464, 462, 464, 84, 85, true, "to", "to"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 15441160910541485865, 14736949766705835136, 18446744073709551615, 18446744073709551615, 558, 560, 558, 560, 102, 103, true, "to", "to"], ["numval", "ival", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 17767354399704235152, 12765861062670798554, 18446744073709551615, 18446744073709551615, 264, 265, 264, 265, 51, 52, true, "8", "8"], ["parenthesis", "round brackets", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 17960842653009747058, 2579946755923203289, 18446744073709551615, 18446744073709551615, 572, 600, 572, 600, 107, 115, true, "(e.g. the width of the cell)", "(e.g. the width of the cell)"], ["expression", "common", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541487324, 9140229821694613215, 18446744073709551615, 18446744073709551615, 573, 577, 573, 577, 108, 109, true, "eg", "e.g."], ["expression", "word-concatenation", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 17168373465524353870, 1775550960674726092, 18446744073709551615, 18446744073709551615, 251, 263, 251, 263, 50, 51, true, "ISO-standard", "ISO-standard"], ["expression", "word-concatenation", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 5748925445660418888, 5099648152124005842, 18446744073709551615, 18446744073709551615, 505, 515, 505, 515, 96, 97, true, "text-lines", "text-lines"], ["sentence", "", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 7415923518961690554, 5436017382399562501, 18446744073709551615, 18446744073709551615, 0, 184, 0, 184, 0, 37, true, "While the task of finding the cells might appear intuitive from a conceptual point of view, it is not in practice, since there does not exist a unique, precise definition of the cells.", "While the task of finding the cells might appear intuitive from a conceptual point of view, it is not in practice, since there does not exist a unique, precise definition of the cells."], ["sentence", "", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 10722424685754320142, 14939850258700553466, 18446744073709551615, 18446744073709551615, 185, 349, 185, 349, 37, 68, true, "This lack of a precise definition has its origins not only in the ISO-standard 8 detailing the PDF document code but also in the variability of the quality of PDFs.", "This lack of a precise definition has its origins not only in the ISO-standard 8 detailing the PDF document code but also in the variability of the quality of PDFs."], ["sentence", "", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 5840331794895575041, 7522048939256453486, 18446744073709551615, 18446744073709551615, 350, 516, 350, 516, 68, 98, true, "Older PDFs which were created from scanned images using OCR typically return cells for each word, while more recent PDFs allow us to create cells for full text-lines.", "Older PDFs which were created from scanned images using OCR typically return cells for each word, while more recent PDFs allow us to create cells for full text-lines."], ["sentence", "", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 9909804561214722407, 7606701336647946079, 18446744073709551615, 18446744073709551615, 517, 672, 517, 672, 98, 126, true, "This variability in the geometric features of the cell (e.g. the width of the cell) can negatively impact the performance of later machine learning models.", "This variability in the geometric features of the cell (e.g. the width of the cell) can negatively impact the performance of later machine learning models."], ["sentence", "", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 1905260306212934301, 17027778861887240638, 18446744073709551615, 18446744073709551615, 673, 763, 673, 763, 126, 143, true, "As a consequence, we reduce the variability of the geometric features as much as possible.", "As a consequence, we reduce the variability of the geometric features as much as possible."], ["sentence", "", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 11883795114712972606, 1100089006714247884, 18446744073709551615, 18446744073709551615, 764, 900, 764, 900, 143, 166, true, "The more consistent and homogeneous the geometric features of a cell are, the better the machine learning algorithms can do predictions.", "The more consistent and homogeneous the geometric features of a cell are, the better the machine learning algorithms can do predictions."], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 14888806260649526283, 9888391296230812238, 18446744073709551615, 18446744073709551615, 66, 82, 66, 82, 12, 14, true, "conceptual point", "conceptual point"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15237688208174812943, 9169750940975380140, 18446744073709551615, 18446744073709551615, 152, 170, 152, 170, 31, 33, true, "precise definition", "precise definition"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15237688208174812943, 9169750940975379272, 18446744073709551615, 18446744073709551615, 200, 218, 200, 218, 41, 43, true, "precise definition", "precise definition"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 18333734320849547479, 14786376426466981623, 18446744073709551615, 18446744073709551615, 280, 297, 280, 297, 54, 57, true, "PDF document code", "PDF document code"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 1742535906543983437, 5107611858547030995, 18446744073709551615, 18446744073709551615, 350, 360, 350, 360, 68, 70, true, "Older PDFs", "Older PDFs"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 4980127422887365844, 7914224627465909624, 18446744073709551615, 18446744073709551615, 459, 470, 459, 470, 87, 89, true, "recent PDFs", "recent PDFs"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 17387068897999710340, 583199993496143951, 18446744073709551615, 18446744073709551615, 500, 515, 500, 515, 95, 97, true, "full text-lines", "full text-lines"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 19581948089354274, 11863368431354531962, 18446744073709551615, 18446744073709551615, 541, 559, 541, 559, 102, 104, true, "geometric features", "geometric features"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 10797576170574569798, 16421164876638798879, 18446744073709551615, 18446744073709551615, 642, 655, 642, 655, 121, 123, true, "later machine", "later machine"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 19581948089354274, 11863368431354539464, 18446744073709551615, 18446744073709551615, 724, 742, 724, 742, 136, 138, true, "geometric features", "geometric features"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 19581948089354274, 11863368431354518484, 18446744073709551615, 18446744073709551615, 804, 822, 804, 822, 149, 151, true, "geometric features", "geometric features"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625631210899, 286282282783107526, 18446744073709551615, 18446744073709551615, 10, 14, 10, 14, 2, 3, true, "task", "task"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104161531686411, 1686298631271692268, 18446744073709551615, 18446744073709551615, 30, 35, 30, 35, 6, 7, true, "cells", "cells"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625619349298, 153488826964012383, 18446744073709551615, 18446744073709551615, 86, 90, 86, 90, 15, 16, true, "view", "view"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 14814125472896938138, 13265343424957278224, 18446744073709551615, 18446744073709551615, 105, 113, 105, 113, 21, 22, true, "practice", "practice"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104161531686411, 1686298631271808079, 18446744073709551615, 18446744073709551615, 178, 183, 178, 183, 35, 36, true, "cells", "cells"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625633345913, 286327708023170250, 18446744073709551615, 18446744073709551615, 190, 194, 190, 194, 38, 39, true, "lack", "lack"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 8106342536556065951, 5101417539304616523, 18446744073709551615, 18446744073709551615, 227, 234, 227, 234, 45, 46, true, "origins", "origins"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 11600564911974996302, 9772943102206687174, 18446744073709551615, 18446744073709551615, 314, 325, 314, 325, 61, 62, true, "variability", "variability"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 8106477781724488761, 8016550195084047451, 18446744073709551615, 18446744073709551615, 333, 340, 333, 340, 64, 65, true, "quality", "quality"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625526197745, 154178518742491636, 18446744073709551615, 18446744073709551615, 344, 348, 344, 348, 66, 67, true, "PDFs", "PDFs"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206560620045048, 10217682693125643903, 18446744073709551615, 18446744073709551615, 393, 399, 393, 399, 75, 76, true, "images", "images"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 12178341415896269066, 15144800269722583989, 18446744073709551615, 18446744073709551615, 406, 409, 406, 409, 77, 78, true, "OCR", "OCR"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104161531686411, 1686298631271792344, 18446744073709551615, 18446744073709551615, 427, 432, 427, 432, 80, 81, true, "cells", "cells"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625633592024, 286369527288178260, 18446744073709551615, 18446744073709551615, 442, 446, 442, 446, 83, 84, true, "word", "word"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104161531686411, 1686298631271788393, 18446744073709551615, 18446744073709551615, 490, 495, 490, 495, 93, 94, true, "cells", "cells"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 11600564911974996302, 9772943102206677434, 18446744073709551615, 18446744073709551615, 522, 533, 522, 533, 99, 100, true, "variability", "variability"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625696024605, 274735343390871718, 18446744073709551615, 18446744073709551615, 567, 571, 567, 571, 106, 107, true, "cell", "cell"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104158766048883, 14200427917049610175, 18446744073709551615, 18446744073709551615, 582, 587, 582, 587, 110, 111, true, "width", "width"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625696024605, 274735343390873519, 18446744073709551615, 18446744073709551615, 595, 599, 595, 599, 113, 114, true, "cell", "cell"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 5731695876385560379, 8735798418445737053, 18446744073709551615, 18446744073709551615, 627, 638, 627, 638, 119, 120, true, "performance", "performance"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206567230470443, 10197774938990653233, 18446744073709551615, 18446744073709551615, 665, 671, 665, 671, 124, 125, true, "models", "models"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 2343822922798056892, 10154671440985252928, 18446744073709551615, 18446744073709551615, 678, 689, 678, 689, 128, 129, true, "consequence", "consequence"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 11600564911974996302, 9772943102206681686, 18446744073709551615, 18446744073709551615, 705, 716, 705, 716, 133, 134, true, "variability", "variability"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625696024605, 274735343390824345, 18446744073709551615, 18446744073709551615, 828, 832, 828, 832, 153, 154, true, "cell", "cell"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 8106464587473865376, 8005470758012666235, 18446744073709551615, 18446744073709551615, 853, 860, 853, 860, 159, 160, true, "machine", "machine"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15359670209433732834, 8546369363095933532, 18446744073709551615, 18446744073709551615, 870, 880, 870, 880, 161, 162, true, "algorithms", "algorithms"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15175963360124346573, 7425656004405084691, 18446744073709551615, 18446744073709551615, 888, 899, 888, 899, 164, 165, true, "predictions", "predictions"], ["verb", "compound-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206478391039341, 13319343947608985919, 18446744073709551615, 18446744073709551615, 95, 101, 95, 101, 18, 20, true, "is not", "is not"], ["verb", "compound-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 17508027506047556020, 2482579808046990290, 18446744073709551615, 18446744073709551615, 127, 141, 127, 141, 25, 28, true, "does not exist", "does not exist"], ["verb", "compound-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 17858840657736432000, 827456240175881986, 18446744073709551615, 18446744073709551615, 367, 379, 367, 379, 71, 73, true, "were created", "were created"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 8106397466565237467, 977130001411655379, 18446744073709551615, 18446744073709551615, 18, 25, 18, 25, 4, 5, true, "finding", "finding"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 1410392834682309464, 3495073499889577510, 18446744073709551615, 18446744073709551615, 36, 48, 36, 48, 7, 9, true, "might appear", "might appear"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 12178341415895601584, 15143512592534806650, 18446744073709551615, 18446744073709551615, 219, 222, 219, 222, 43, 44, true, "has", "has"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 6180169263126451304, 3119480438606465672, 18446744073709551615, 18446744073709551615, 266, 275, 266, 275, 52, 53, true, "detailing", "detailing"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 8106478648743879659, 18054348966917680372, 18446744073709551615, 18446744073709551615, 385, 392, 385, 392, 74, 75, true, "scanned", "scanned"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104159157798023, 1678901618548373231, 18446744073709551615, 18446744073709551615, 400, 405, 400, 405, 76, 77, true, "using", "using"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206521510867710, 13465346086607517925, 18446744073709551615, 18446744073709551615, 420, 426, 420, 426, 79, 80, true, "return", "return"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104159171192019, 1680748189061438627, 18446744073709551615, 18446744073709551615, 471, 476, 471, 476, 89, 90, true, "allow", "allow"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206532661480265, 9531737447789598314, 18446744073709551615, 18446744073709551615, 483, 489, 483, 489, 92, 93, true, "create", "create"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206560633513421, 9503604932247872739, 18446744073709551615, 18446744073709551615, 616, 622, 616, 622, 117, 118, true, "impact", "impact"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 14639581097006750428, 14675095568206484231, 18446744073709551615, 18446744073709551615, 656, 664, 656, 664, 123, 124, true, "learning", "learning"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206521531524134, 13473141336162655057, 18446744073709551615, 18446744073709551615, 694, 700, 694, 700, 131, 132, true, "reduce", "reduce"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 12178341415895564896, 15143520438392043936, 18446744073709551615, 18446744073709551615, 833, 836, 833, 836, 154, 155, true, "are", "are"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 14639581097006750428, 14675095568205014589, 18446744073709551615, 18446744073709551615, 861, 869, 861, 869, 160, 161, true, "learning", "learning"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206563385633981, 13002579886209109896, 18446744073709551615, 18446744073709551615, 881, 887, 881, 887, 162, 164, true, "can do", "can do"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16949988767738090709, 100229038272389, 18446744073709551615, 18446744073709551615, 49, 63, 49, 63, 9, 11, true, "intuitive from", "intuitive from"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 8106464529736241562, 1453137548743953724, 18446744073709551615, 18446744073709551615, 746, 753, 746, 753, 139, 141, true, "much as", "much as"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 6179252389649895475, 3082759261078903372, 18446744073709551615, 18446744073709551615, 0, 9, 0, 9, 0, 2, true, "While the", "While the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541485670, 9140198027864825824, 18446744073709551615, 18446744073709551615, 15, 17, 15, 17, 3, 4, true, "of", "of"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541485670, 9140198027864794615, 18446744073709551615, 18446744073709551615, 83, 85, 83, 85, 14, 15, true, "of", "of"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541486538, 9140197839759264413, 18446744073709551615, 18446744073709551615, 102, 104, 102, 104, 20, 21, true, "in", "in"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104161786618045, 1529671095071243222, 18446744073709551615, 18446744073709551615, 115, 120, 115, 120, 23, 24, true, "since", "since"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206565712212855, 10162434354564300945, 18446744073709551615, 18446744073709551615, 171, 177, 171, 177, 33, 35, true, "of the", "of the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625620237736, 274280443846053091, 18446744073709551615, 18446744073709551615, 195, 199, 195, 199, 39, 41, true, "of a", "of a"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206560518651853, 9445866209727956530, 18446744073709551615, 18446744073709551615, 244, 250, 244, 250, 48, 50, true, "in the", "in the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206560518651853, 9445866209727960386, 18446744073709551615, 18446744073709551615, 307, 313, 307, 313, 59, 61, true, "in the", "in the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206565712212855, 10162434354564328048, 18446744073709551615, 18446744073709551615, 326, 332, 326, 332, 62, 64, true, "of the", "of the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541485670, 9140198027864778039, 18446744073709551615, 18446744073709551615, 341, 343, 341, 343, 65, 66, true, "of", "of"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625697843734, 276176066640977335, 18446744073709551615, 18446744073709551615, 380, 384, 380, 384, 73, 74, true, "from", "from"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 14637917333167503367, 15104975758825844690, 18446744073709551615, 18446744073709551615, 433, 441, 433, 441, 81, 83, true, "for each", "for each"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104161580427521, 1571916512453351057, 18446744073709551615, 18446744073709551615, 448, 453, 448, 453, 85, 86, true, "while", "while"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 12178341415895625940, 15143513577680482188, 18446744073709551615, 18446744073709551615, 496, 499, 496, 499, 94, 95, true, "for", "for"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206560518651853, 9445866209727942525, 18446744073709551615, 18446744073709551615, 534, 540, 534, 540, 100, 102, true, "in the", "in the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206565712212855, 10162434354564801127, 18446744073709551615, 18446744073709551615, 560, 566, 560, 566, 104, 106, true, "of the", "of the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206564601699726, 9376311130415997675, 18446744073709551615, 18446744073709551615, 573, 581, 573, 581, 108, 110, true, "eg the", "e.g. the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206565712212855, 10162434354564803006, 18446744073709551615, 18446744073709551615, 588, 594, 588, 594, 111, 113, true, "of the", "of the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541485670, 9140198027863323620, 18446744073709551615, 18446744073709551615, 639, 641, 639, 641, 120, 121, true, "of", "of"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625539850184, 154652053038545723, 18446744073709551615, 18446744073709551615, 673, 677, 673, 677, 126, 128, true, "As a", "As a"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206565712212855, 10162434354564302848, 18446744073709551615, 18446744073709551615, 717, 723, 717, 723, 134, 136, true, "of the", "of the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625620237736, 274280443846028590, 18446744073709551615, 18446744073709551615, 823, 827, 823, 827, 151, 153, true, "of a", "of a"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541485865, 9140198005523690763, 18446744073709551615, 18446744073709551615, 480, 482, 480, 482, 91, 92, true, "to", "to"], ["expression", "latex", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 389609625699793082, 10849477641302803979, 18446744073709551615, 18446744073709551615, 192, 198, 192, 198, 33, 34, true, "^{9}", "$^{9}$"], ["sentence", "", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 14537379500392520301, 2641874782821116556, 18446744073709551615, 18446744073709551615, 0, 124, 0, 124, 0, 22, true, "For programmatic PDFs, the text cells are contructed from raw streams of symbols and transforms defined in the PDF document.", "For programmatic PDFs, the text cells are contructed from raw streams of symbols and transforms defined in the PDF document."], ["sentence", "", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 13624401348528241810, 18138670151212489595, 18446744073709551615, 18446744073709551615, 125, 199, 125, 199, 22, 35, true, "This operation relies on the iterators provided by the QPDF library$^{9}$.", "This operation relies on the iterators provided by the QPDF library$^{9}$."], ["term", "enum-term-mark-3", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 4523841464557345199, 10114780374881260645, 18446744073709551615, 18446744073709551615, 73, 95, 73, 95, 13, 16, true, "symbols and transforms", "symbols and transforms"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 1490421477877365637, 10634695956169887731, 18446744073709551615, 18446744073709551615, 4, 21, 4, 21, 1, 3, true, "programmatic PDFs", "programmatic PDFs"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 5748925367544727060, 5318612030524804463, 18446744073709551615, 18446744073709551615, 27, 37, 27, 37, 5, 7, true, "text cells", "text cells"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 10366280871409328423, 6450002708232284703, 18446744073709551615, 18446744073709551615, 58, 69, 58, 69, 10, 12, true, "raw streams", "raw streams"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 12366808243217836777, 18339274156313290686, 18446744073709551615, 18446744073709551615, 111, 123, 111, 123, 19, 21, true, "PDF document", "PDF document"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 17527422690611097285, 6107149276644244418, 18446744073709551615, 18446744073709551615, 180, 192, 180, 192, 31, 33, true, "QPDF library", "QPDF library"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 8106478574083600801, 3492130479069597648, 18446744073709551615, 18446744073709551615, 73, 80, 73, 80, 13, 14, true, "symbols", "symbols"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 8619280146728881072, 14423298271510896494, 18446744073709551615, 18446744073709551615, 85, 95, 85, 95, 15, 16, true, "transforms", "transforms"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 6167836358624304835, 2001895887803008895, 18446744073709551615, 18446744073709551615, 130, 139, 130, 139, 23, 24, true, "operation", "operation"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 6182474587515713435, 3040902962194794247, 18446744073709551615, 18446744073709551615, 154, 163, 154, 163, 27, 28, true, "iterators", "iterators"], ["verb", "compound-verb", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 653298976799407280, 12994946537251491253, 18446744073709551615, 18446744073709551615, 38, 52, 38, 52, 7, 9, true, "are contructed", "are contructed"], ["verb", "single-verb", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 8106396543067771897, 8988266389465152017, 18446744073709551615, 18446744073709551615, 96, 103, 96, 103, 16, 17, true, "defined", "defined"], ["verb", "single-verb", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 16381206521530126984, 2128490878609892302, 18446744073709551615, 18446744073709551615, 140, 146, 140, 146, 24, 25, true, "relies", "relies"], ["verb", "single-verb", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 14814125838089603136, 14570552129528981767, 18446744073709551615, 18446744073709551615, 164, 172, 164, 172, 28, 29, true, "provided", "provided"], ["conn", "single-conn", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 12178341415896108722, 16716331678697369730, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "For", "For"], ["conn", "single-conn", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 389609625697843734, 10745918863008613894, 18446744073709551615, 18446744073709551615, 53, 57, 53, 57, 9, 10, true, "from", "from"], ["conn", "single-conn", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 15441160910541485670, 11782782313518536809, 18446744073709551615, 18446744073709551615, 70, 72, 70, 72, 12, 13, true, "of", "of"], ["conn", "single-conn", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 16381206560518651853, 13454833510749227092, 18446744073709551615, 18446744073709551615, 104, 110, 104, 110, 17, 19, true, "in the", "in the"], ["conn", "single-conn", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 16381206566339127348, 13538838978812801477, 18446744073709551615, 18446744073709551615, 147, 153, 147, 153, 25, 27, true, "on the", "on the"], ["conn", "single-conn", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 16381206574363061705, 7182896789576417479, 18446744073709551615, 18446744073709551615, 173, 179, 173, 179, 29, 31, true, "by the", "by the"], ["expression", "word-concatenation", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 2334432749592536458, 10462994002415974044, 18446744073709551615, 18446744073709551615, 165, 178, 165, 178, 32, 33, true, "text-snippets", "text-snippets"], ["sentence", "", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15260949785571891965, 11653374250913568665, 18446744073709551615, 18446744073709551615, 0, 262, 0, 262, 0, 46, true, "For scanned PDFs, we use a two step approach to find the cells by first running all bitmap resources in the PDF through an OCR engine and then merging the extracted text-snippets from the images with the remaining cells from the programmatically created content.", "For scanned PDFs, we use a two step approach to find the cells by first running all bitmap resources in the PDF through an OCR engine and then merging the extracted text-snippets from the images with the remaining cells from the programmatically created content."], ["sentence", "", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 13844944822277961427, 5220212344413701116, 18446744073709551615, 18446744073709551615, 263, 432, 263, 432, 46, 77, true, "Eventually, all the created cells and line paths are stored in an internal JSON format, which also keeps references to the bitmap resources embedded in the PDF document.", "Eventually, all the created cells and line paths are stored in an internal JSON format, which also keeps references to the bitmap resources embedded in the PDF document."], ["sentence", "", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 17672617811638114041, 1159640313158568858, 18446744073709551615, 18446744073709551615, 433, 542, 433, 542, 77, 95, true, "From this point, all further processing does not need to distinguish between scanned or programmatic sources.", "From this point, all further processing does not need to distinguish between scanned or programmatic sources."], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 9943751231165233071, 2473120546975171959, 18446744073709551615, 18446744073709551615, 31, 44, 31, 44, 8, 10, true, "step approach", "step approach"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15277331178077245503, 8655695116780644511, 18446744073709551615, 18446744073709551615, 84, 100, 84, 100, 18, 20, true, "bitmap resources", "bitmap resources"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 1743069044297951691, 8965951185434506549, 18446744073709551615, 18446744073709551615, 123, 133, 123, 133, 25, 27, true, "OCR engine", "OCR engine"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 16095915465433192910, 1057695382242219584, 18446744073709551615, 18446744073709551615, 301, 311, 301, 311, 53, 55, true, "line paths", "line paths"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 1613146853262082611, 6945903350931267856, 18446744073709551615, 18446744073709551615, 329, 349, 329, 349, 59, 62, true, "internal JSON format", "internal JSON format"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15277331178077245503, 8655695116780655631, 18446744073709551615, 18446744073709551615, 386, 402, 386, 402, 69, 71, true, "bitmap resources", "bitmap resources"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 12366808243217836777, 7405717181425166168, 18446744073709551615, 18446744073709551615, 419, 431, 419, 431, 74, 76, true, "PDF document", "PDF document"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 1813666098981047843, 5197711836231568924, 18446744073709551615, 18446744073709551615, 454, 472, 454, 472, 82, 84, true, "further processing", "further processing"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 4748498094194401130, 132855827826017730, 18446744073709551615, 18446744073709551615, 521, 541, 521, 541, 92, 94, true, "programmatic sources", "programmatic sources"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 389609625526197745, 6232643682528103190, 18446744073709551615, 18446744073709551615, 12, 16, 12, 16, 2, 3, true, "PDFs", "PDFs"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 329104161531686411, 13473927785000648089, 18446744073709551615, 18446744073709551615, 57, 62, 57, 62, 13, 14, true, "cells", "cells"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 12178341415896289890, 8091364884506794024, 18446744073709551615, 18446744073709551615, 108, 111, 108, 111, 22, 23, true, "PDF", "PDF"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 2334432749592536458, 10462994002415974044, 18446744073709551615, 18446744073709551615, 165, 178, 165, 178, 32, 33, true, "text-snippets", "text-snippets"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 16381206560620045048, 9011858595433045724, 18446744073709551615, 18446744073709551615, 188, 194, 188, 194, 35, 36, true, "images", "images"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 329104161531686411, 13473927785000686753, 18446744073709551615, 18446744073709551615, 214, 219, 214, 219, 39, 40, true, "cells", "cells"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106398484416916345, 6493205953920844274, 18446744073709551615, 18446744073709551615, 254, 261, 254, 261, 44, 45, true, "content", "content"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 329104161531686411, 13473927785000641518, 18446744073709551615, 18446744073709551615, 291, 296, 291, 296, 51, 52, true, "cells", "cells"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15984565858548749625, 6555376916269954109, 18446744073709551615, 18446744073709551615, 368, 378, 368, 378, 66, 67, true, "references", "references"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 329104161594416377, 13421450643931759595, 18446744073709551615, 18446744073709551615, 443, 448, 443, 448, 79, 80, true, "point", "point"], ["verb", "compound-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15388942590337907789, 13245487767265512912, 18446744073709551615, 18446744073709551615, 312, 322, 312, 322, 55, 57, true, "are stored", "are stored"], ["verb", "compound-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 2326544351310328000, 11496667878093951528, 18446744073709551615, 18446744073709551615, 473, 501, 473, 501, 84, 89, true, "does not need to distinguish", "does not need to distinguish"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106478648743879659, 11927965358765640838, 18446744073709551615, 18446744073709551615, 4, 11, 4, 11, 1, 2, true, "scanned", "scanned"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 12178341415895516060, 8089169012284556182, 18446744073709551615, 18446744073709551615, 21, 24, 21, 24, 5, 6, true, "use", "use"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 389609625697824147, 6281299199008773227, 18446744073709551615, 18446744073709551615, 48, 52, 48, 52, 11, 12, true, "find", "find"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106478500389476193, 5862158711018795994, 18446744073709551615, 18446744073709551615, 72, 79, 72, 79, 16, 17, true, "running", "running"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106464574161696199, 1599318992587414525, 18446744073709551615, 18446744073709551615, 143, 150, 143, 150, 29, 30, true, "merging", "merging"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 6168374324562720592, 11340980888213609541, 18446744073709551615, 18446744073709551615, 155, 164, 155, 164, 31, 32, true, "extracted", "extracted"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 6165970943308974402, 6183362660277321571, 18446744073709551615, 18446744073709551615, 204, 213, 204, 213, 38, 39, true, "remaining", "remaining"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106398513399298373, 12729825352779269268, 18446744073709551615, 18446744073709551615, 246, 253, 246, 253, 43, 44, true, "created", "created"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106398513399298373, 12729825352779201453, 18446744073709551615, 18446744073709551615, 283, 290, 283, 290, 50, 51, true, "created", "created"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 329104158690196448, 12090438731660771062, 18446744073709551615, 18446744073709551615, 362, 367, 362, 367, 65, 66, true, "keeps", "keeps"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 14652256356231447381, 6547058476669491030, 18446744073709551615, 18446744073709551615, 403, 411, 403, 411, 71, 72, true, "embedded", "embedded"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106478648743879659, 11927965358765867932, 18446744073709551615, 18446744073709551615, 510, 517, 510, 517, 90, 91, true, "scanned", "scanned"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 12178341415896108722, 8091477407578352430, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "For", "For"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15441160910541486989, 1560905956973584825, 18446744073709551615, 18446744073709551615, 63, 65, 63, 65, 14, 15, true, "by", "by"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 16381206560518651853, 9040886573940705408, 18446744073709551615, 18446744073709551615, 101, 107, 101, 107, 20, 22, true, "in the", "in the"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 5748881733723902671, 2744428132179375268, 18446744073709551615, 18446744073709551615, 112, 122, 112, 122, 23, 25, true, "through an", "through an"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 14637917359887717745, 2285679170166519016, 18446744073709551615, 18446744073709551615, 179, 187, 179, 187, 33, 35, true, "from the", "from the"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 14638857868319795209, 9986308683752228647, 18446744073709551615, 18446744073709551615, 195, 203, 195, 203, 36, 38, true, "with the", "with the"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 14637917359887717745, 2285679170166508726, 18446744073709551615, 18446744073709551615, 220, 228, 220, 228, 40, 42, true, "from the", "from the"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 329104161828310801, 9514998739209225941, 18446744073709551615, 18446744073709551615, 323, 328, 323, 328, 57, 59, true, "in an", "in an"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 16381206560518651853, 9040886573940702850, 18446744073709551615, 18446744073709551615, 412, 418, 412, 418, 72, 74, true, "in the", "in the"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 6560703375081670815, 13188537813218715189, 18446744073709551615, 18446744073709551615, 433, 442, 433, 442, 77, 79, true, "From this", "From this"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106397860038858133, 1895817873747457970, 18446744073709551615, 18446744073709551615, 502, 509, 502, 509, 89, 90, true, "between", "between"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15441160910541485865, 1560905894462741172, 18446744073709551615, 18446744073709551615, 45, 47, 45, 47, 10, 11, true, "to", "to"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 16381206519425733256, 8056542592271067263, 18446744073709551615, 18446744073709551615, 379, 385, 379, 385, 67, 69, true, "to the", "to the"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15441160910541485865, 1560905894463292709, 18446744073709551615, 18446744073709551615, 487, 489, 487, 489, 87, 88, true, "to", "to"], ["numval", "fval", 17759618186065566858, "TEXT", "#/texts/33", 1.0, 12178341415896435196, 2390434231117813361, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "3.3", "3.3"], ["expression", "word-concatenation", 17759618186065566858, "TEXT", "#/texts/33", 1.0, 2818878630166942113, 14739962831805467920, 18446744073709551615, 18446744073709551615, 4, 16, 4, 16, 1, 2, true, "Ground-truth", "Ground-truth"], ["expression", "word-concatenation", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 3753411203337468488, 13210849437960952407, 18446744073709551615, 18446744073709551615, 30, 42, 30, 42, 6, 7, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 3753411203337468488, 13210849437960910794, 18446744073709551615, 18446744073709551615, 115, 127, 115, 127, 19, 20, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 3753411203337468488, 13210849437960914871, 18446744073709551615, 18446744073709551615, 300, 312, 300, 312, 48, 49, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14635108738803425688, 7365949764326008316, 18446744073709551615, 18446744073709551615, 548, 556, 548, 556, 89, 90, true, "two-fold", "two-fold"], ["sentence", "", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 17583259158513687366, 4844263450613991747, 18446744073709551615, 18446744073709551615, 0, 99, 0, 99, 0, 18, true, "In this component, we collect ground-truth for the custom machine learning models to be trained on.", "In this component, we collect ground-truth for the custom machine learning models to be trained on."], ["sentence", "", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 5326083869552464270, 6000109878302441608, 18446744073709551615, 18446744073709551615, 100, 229, 100, 229, 18, 36, true, "Representative ground-truth data is of paramount importance to obtain machine learned models with excellent recall and precision.", "Representative ground-truth data is of paramount importance to obtain machine learned models with excellent recall and precision."], ["sentence", "", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 10760691172507406288, 16462010365433270400, 18446744073709551615, 18446744073709551615, 230, 393, 230, 393, 36, 63, true, "Unfortunately, it is often very hard to obtain lots of representative ground-truth data, primarily due the the enormous variability across the layout of documents.", "Unfortunately, it is often very hard to obtain lots of representative ground-truth data, primarily due the the enormous variability across the layout of documents."], ["sentence", "", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8873628216442269211, 15169897531659573964, 18446744073709551615, 18446744073709551615, 394, 512, 394, 512, 63, 83, true, "As a consequence, the concept of annotators for documents were incorporated into the platform from the very beginning.", "As a consequence, the concept of annotators for documents were incorporated into the platform from the very beginning."], ["sentence", "", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 7092604321314445148, 6011191642401048334, 18446744073709551615, 18446744073709551615, 513, 557, 513, 557, 83, 91, true, "The purpose of these annotators is two-fold.", "The purpose of these annotators is two-fold."], ["term", "enum-term-mark-2", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 11037453576911667853, 13165947129504054427, 18446744073709551615, 18446744073709551615, 208, 228, 208, 228, 32, 35, true, "recall and precision", "recall and precision"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 808436161032208500, 7814799285540693049, 18446744073709551615, 18446744073709551615, 51, 65, 51, 65, 9, 11, true, "custom machine", "custom machine"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 12419378180983228278, 392973101999502154, 18446744073709551615, 18446744073709551615, 100, 132, 100, 132, 18, 21, true, "Representative ground-truth data", "Representative ground-truth data"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 3376407656379762908, 1362351500590231707, 18446744073709551615, 18446744073709551615, 139, 159, 139, 159, 23, 25, true, "paramount importance", "paramount importance"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 16814682987492505919, 13217553285104349249, 18446744073709551615, 18446744073709551615, 198, 214, 198, 214, 31, 33, true, "excellent recall", "excellent recall"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 11730809760258185856, 5794808849754265728, 18446744073709551615, 18446744073709551615, 285, 317, 285, 317, 47, 50, true, "representative ground-truth data", "representative ground-truth data"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14929125759175486455, 10500241043325238885, 18446744073709551615, 18446744073709551615, 341, 361, 341, 361, 55, 57, true, "enormous variability", "enormous variability"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 4671466949882320018, 14567088525314735297, 18446744073709551615, 18446744073709551615, 497, 511, 497, 511, 80, 82, true, "very beginning", "very beginning"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 5947879501615734370, 3587833538502150180, 18446744073709551615, 18446744073709551615, 8, 17, 8, 17, 2, 3, true, "component", "component"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 16381206567230470443, 11589242443361386039, 18446744073709551615, 18446744073709551615, 75, 81, 75, 81, 12, 13, true, "models", "models"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8106464587473865376, 9393655970867981228, 18446744073709551615, 18446744073709551615, 170, 177, 170, 177, 27, 28, true, "machine", "machine"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 16381206567230470443, 11589242443360492541, 18446744073709551615, 18446744073709551615, 186, 192, 186, 192, 29, 30, true, "models", "models"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 6184954595655792282, 8083671121965931318, 18446744073709551615, 18446744073709551615, 219, 228, 219, 228, 34, 35, true, "precision", "precision"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 389609625633531007, 11139648504492918540, 18446744073709551615, 18446744073709551615, 277, 281, 277, 281, 45, 46, true, "lots", "lots"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 16381206590620761857, 2390360296848922245, 18446744073709551615, 18446744073709551615, 373, 379, 373, 379, 59, 60, true, "layout", "layout"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 6167933651658664291, 6008261537128558327, 18446744073709551615, 18446744073709551615, 383, 392, 383, 392, 61, 62, true, "documents", "documents"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 2343822922798056892, 1513785689321948444, 18446744073709551615, 18446744073709551615, 399, 410, 399, 410, 65, 66, true, "consequence", "consequence"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8106398484423890147, 15438783781988491310, 18446744073709551615, 18446744073709551615, 416, 423, 416, 423, 68, 69, true, "concept", "concept"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15359807916847569012, 8985499154594338495, 18446744073709551615, 18446744073709551615, 427, 437, 427, 437, 70, 71, true, "annotators", "annotators"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 6167933651658664291, 6008261537128554278, 18446744073709551615, 18446744073709551615, 442, 451, 442, 451, 72, 73, true, "documents", "documents"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14814125365076808131, 17209393146457670947, 18446744073709551615, 18446744073709551615, 479, 487, 479, 487, 77, 78, true, "platform", "platform"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8106479265948440982, 10714368050200949681, 18446744073709551615, 18446744073709551615, 517, 524, 517, 524, 84, 85, true, "purpose", "purpose"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15359807916847569012, 8985499154594181730, 18446744073709551615, 18446744073709551615, 534, 544, 534, 544, 87, 88, true, "annotators", "annotators"], ["verb", "compound-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 7108090617469355457, 1912699042415883107, 18446744073709551615, 18446744073709551615, 85, 95, 85, 95, 14, 16, true, "be trained", "be trained"], ["verb", "compound-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 12385838741613342316, 15032477692672067044, 18446744073709551615, 18446744073709551615, 248, 261, 248, 261, 39, 42, true, "is often very", "is often very"], ["verb", "compound-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 892343280790899680, 7500746613947828378, 18446744073709551615, 18446744073709551615, 452, 469, 452, 469, 73, 75, true, "were incorporated", "were incorporated"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8106398484822949544, 15441707406508006491, 18446744073709551615, 18446744073709551615, 22, 29, 22, 29, 5, 6, true, "collect", "collect"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14639581097006750428, 11409172441407676517, 18446744073709551615, 18446744073709551615, 66, 74, 66, 74, 11, 12, true, "learning", "learning"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541486535, 1572664503296569454, 18446744073709551615, 18446744073709551615, 133, 135, 133, 135, 21, 22, true, "is", "is"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 16381206566454849358, 12105593666577949867, 18446744073709551615, 18446744073709551615, 163, 169, 163, 169, 26, 27, true, "obtain", "obtain"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8106342444693204894, 14875576246519152573, 18446744073709551615, 18446744073709551615, 178, 185, 178, 185, 28, 29, true, "learned", "learned"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 16381206566454849358, 12105593666577949565, 18446744073709551615, 18446744073709551615, 270, 276, 270, 276, 44, 45, true, "obtain", "obtain"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541486535, 1572664503296459901, 18446744073709551615, 18446744073709551615, 545, 547, 545, 547, 88, 89, true, "is", "is"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 17583629327895598301, 13459834771297717800, 18446744073709551615, 18446744073709551615, 30, 46, 30, 46, 6, 8, true, "ground-truth for", "ground-truth for"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8106396862006371970, 7459930432539401525, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 2, true, "In this", "In this"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485678, 1572664558755839209, 18446744073709551615, 18446744073709551615, 96, 98, 96, 98, 16, 17, true, "on", "on"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485670, 1572664558081613030, 18446744073709551615, 18446744073709551615, 136, 138, 136, 138, 22, 23, true, "of", "of"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 389609625618037948, 12415310863134701485, 18446744073709551615, 18446744073709551615, 193, 197, 193, 197, 30, 31, true, "with", "with"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485670, 1572664558081615219, 18446744073709551615, 18446744073709551615, 282, 284, 282, 284, 46, 47, true, "of", "of"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15359550767359331054, 17736639368994315199, 18446744073709551615, 18446744073709551615, 362, 372, 362, 372, 57, 59, true, "across the", "across the"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485670, 1572664558081629607, 18446744073709551615, 18446744073709551615, 380, 382, 380, 382, 60, 61, true, "of", "of"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 389609625539850184, 12416294209159103980, 18446744073709551615, 18446744073709551615, 394, 398, 394, 398, 63, 65, true, "As a", "As a"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485670, 1572664558081628846, 18446744073709551615, 18446744073709551615, 424, 426, 424, 426, 69, 70, true, "of", "of"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 12178341415895625940, 15392261357260739020, 18446744073709551615, 18446744073709551615, 438, 441, 438, 441, 71, 72, true, "for", "for"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14637953883063114384, 17588754078703253842, 18446744073709551615, 18446744073709551615, 470, 478, 470, 478, 75, 77, true, "into the", "into the"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14637917359887717745, 12604191960591849593, 18446744073709551615, 18446744073709551615, 488, 496, 488, 496, 78, 80, true, "from the", "from the"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14814148868025447689, 1566470717057299206, 18446744073709551615, 18446744073709551615, 525, 533, 525, 533, 85, 87, true, "of these", "of these"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485865, 1572664555486701809, 18446744073709551615, 18446744073709551615, 82, 84, 82, 84, 13, 14, true, "to", "to"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485865, 1572664555486707009, 18446744073709551615, 18446744073709551615, 160, 162, 160, 162, 25, 26, true, "to", "to"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485865, 1572664555486699317, 18446744073709551615, 18446744073709551615, 267, 269, 267, 269, 43, 44, true, "to", "to"], ["numval", "ival", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 17767354399704235162, 14146447032891005863, 18446744073709551615, 18446744073709551615, 255, 256, 255, 256, 44, 45, true, "2", "2"], ["numval", "ival", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 17767354399704235163, 14146447035751187062, 18446744073709551615, 18446744073709551615, 673, 674, 673, 674, 130, 131, true, "3", "3"], ["numval", "ival", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541481851, 14395253567444244807, 18446744073709551615, 18446744073709551615, 874, 876, 874, 876, 168, 169, true, "30", "30"], ["parenthesis", "round brackets", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 7105842701545078035, 8408400604204305824, 18446744073709551615, 18446744073709551615, 243, 257, 243, 257, 41, 46, true, "(see Figure 2)", "(see Figure 2)"], ["parenthesis", "round brackets", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 8106340679880086785, 13947230042842640243, 18446744073709551615, 18446744073709551615, 275, 282, 275, 282, 51, 54, true, "(human)", "(human)"], ["expression", "word-concatenation", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 3753411203337468488, 16963906746658391185, 18446744073709551615, 18446744073709551615, 70, 82, 70, 82, 13, 14, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 3481880259565470086, 2454306430075603922, 18446744073709551615, 18446744073709551615, 100, 114, 100, 114, 18, 19, true, "crowd-sourcing", "crowd-sourcing"], ["expression", "word-concatenation", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15274658437030291237, 927761389620237631, 18446744073709551615, 18446744073709551615, 632, 646, 632, 646, 122, 123, true, "colouring-task", "colouring-task"], ["expression", "wtoken-concatenation", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14652257787682118593, 2235344648887433985, 18446744073709551615, 18446744073709551615, 434, 444, 434, 444, 86, 87, true, "etc^{10}", "etc$^{10}$"], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 9726701973674527424, 5629316039426793264, 18446744073709551615, 18446744073709551615, 0, 124, 0, 124, 0, 21, true, "First and foremost, the annotators on the platform allow us to gather ground-truth at scale using a crowd-sourcing approach.", "First and foremost, the annotators on the platform allow us to gather ground-truth at scale using a crowd-sourcing approach."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15691131460025387221, 12159716655828567313, 18446744073709551615, 18446744073709551615, 125, 258, 125, 258, 21, 47, true, "In each annotation task, we retrieve the original PDF page and its associated parsed components, containing the cells (see Figure 2).", "In each annotation task, we retrieve the original PDF page and its associated parsed components, containing the cells (see Figure 2)."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 2641958415274525762, 9846787274480004683, 18446744073709551615, 18446744073709551615, 259, 337, 259, 337, 47, 64, true, "We then ask the (human) annotator to assign each cell a layout semantic label.", "We then ask the (human) annotator to assign each cell a layout semantic label."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 1037767454801566655, 17259389722144888034, 18446744073709551615, 18446744073709551615, 338, 445, 338, 445, 64, 88, true, "Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$.", "Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 10111701075959805159, 5133545533971093986, 18446744073709551615, 18446744073709551615, 446, 532, 446, 532, 88, 104, true, "In the annotator tool, each layout semantic label is visually represented by a colour.", "In the annotator tool, each layout semantic label is visually represented by a colour."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 4706201309352513459, 17185467944624883260, 18446744073709551615, 18446744073709551615, 533, 675, 533, 675, 104, 132, true, "By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3.", "By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 2594226177845736721, 3469738757049287491, 18446744073709551615, 18446744073709551615, 676, 766, 676, 766, 132, 149, true, "Since humans are very efficient in visual recognition, this task comes very natural to us.", "Since humans are very efficient in visual recognition, this task comes very natural to us."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 18263945833780251048, 18046267097228773723, 18446744073709551615, 18446744073709551615, 767, 919, 767, 919, 149, 175, true, "The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns.", "The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns."], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 5981390564575261606, 5462827921468056185, 18446744073709551615, 18446744073709551615, 100, 123, 100, 123, 18, 20, true, "crowd-sourcing approach", "crowd-sourcing approach"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 4147688156856812386, 14660217840011410416, 18446744073709551615, 18446744073709551615, 133, 148, 133, 148, 23, 25, true, "annotation task", "annotation task"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 11734732391183296006, 1599617040730216806, 18446744073709551615, 18446744073709551615, 166, 183, 166, 183, 29, 32, true, "original PDF page", "original PDF page"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 5845623659139499376, 18290324680406483857, 18446744073709551615, 18446744073709551615, 203, 220, 203, 220, 35, 37, true, "parsed components", "parsed components"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 2317020437411802284, 2857760401781024162, 18446744073709551615, 18446744073709551615, 315, 336, 315, 336, 60, 63, true, "layout semantic label", "layout semantic label"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 17144395416522725511, 17753778739971755617, 18446744073709551615, 18446744073709551615, 350, 365, 350, 365, 66, 68, true, "semantic labels", "semantic labels"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 6408516478084086022, 17282278894039682530, 18446744073709551615, 18446744073709551615, 453, 467, 453, 467, 90, 92, true, "annotator tool", "annotator tool"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 2317020437411802284, 2857760401780966284, 18446744073709551615, 18446744073709551615, 474, 495, 474, 495, 94, 97, true, "layout semantic label", "layout semantic label"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 11173100292227021015, 10629231856798201869, 18446744073709551615, 18446744073709551615, 563, 577, 563, 577, 110, 112, true, "semantic label", "semantic label"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 3203380946006439274, 14675068709317928424, 18446744073709551615, 18446744073709551615, 591, 610, 591, 610, 116, 118, true, "semantic annotation", "semantic annotation"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 5999195606993327398, 6806903084840068180, 18446744073709551615, 18446744073709551615, 711, 729, 711, 729, 138, 140, true, "visual recognition", "visual recognition"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16216055950284707729, 1614911910001461455, 18446744073709551615, 18446744073709551615, 771, 784, 771, 784, 150, 152, true, "required time", "required time"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 1353284443403550494, 16957154834908790928, 18446744073709551615, 18446744073709551615, 805, 816, 805, 816, 156, 158, true, "single page", "single page"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14195411118943606613, 11725530869998519128, 18446744073709551615, 18446744073709551615, 835, 849, 835, 849, 161, 163, true, "parsing output", "parsing output"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 9136485266740328691, 5311838821730551171, 18446744073709551615, 18446744073709551615, 890, 918, 890, 918, 171, 174, true, "various annotation campaigns", "various annotation campaigns"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15359807916847569012, 6832664458179175310, 18446744073709551615, 18446744073709551615, 24, 34, 24, 34, 5, 6, true, "annotators", "annotators"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14814125365076808131, 13227537830384402784, 18446744073709551615, 18446744073709551615, 42, 50, 42, 50, 8, 9, true, "platform", "platform"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 3753411203337468488, 16963906746658391185, 18446744073709551615, 18446744073709551615, 70, 82, 70, 82, 13, 14, true, "ground-truth", "ground-truth"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104161785194305, 16177406997540344708, 18446744073709551615, 18446744073709551615, 86, 91, 86, 91, 15, 16, true, "scale", "scale"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104161531686411, 16177740231384398405, 18446744073709551615, 18446744073709551615, 237, 242, 237, 242, 40, 41, true, "cells", "cells"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206514091025767, 138645421409664679, 18446744073709551615, 18446744073709551615, 248, 254, 248, 254, 43, 44, true, "Figure", "Figure"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 5946726816546568920, 5690589095203344212, 18446744073709551615, 18446744073709551615, 283, 292, 283, 292, 54, 55, true, "annotator", "annotator"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625696024605, 2623686961522394102, 18446744073709551615, 18446744073709551615, 308, 312, 308, 312, 58, 59, true, "cell", "cell"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14650277098690689540, 14175748967298263712, 18446744073709551615, 18446744073709551615, 338, 346, 338, 346, 64, 65, true, "Examples", "Examples"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104161841334670, 4540659438624295673, 18446744073709551615, 18446744073709551615, 371, 376, 371, 376, 70, 71, true, "Title", "Title"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14650447666970618949, 5549516867519665861, 18446744073709551615, 18446744073709551615, 378, 386, 378, 386, 72, 73, true, "Abstract", "Abstract"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 8106479192428836136, 9415558511939706336, 18446744073709551615, 18446744073709551615, 388, 395, 388, 395, 74, 75, true, "Authors", "Authors"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14652314692921799233, 13219745338982113946, 18446744073709551615, 18446744073709551615, 397, 405, 397, 405, 76, 77, true, "Subtitle", "Subtitle"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625541629035, 2600754505563366440, 18446744073709551615, 18446744073709551615, 407, 411, 407, 411, 78, 79, true, "Text", "Text"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104161846359995, 4541367182255891997, 18446744073709551615, 18446744073709551615, 413, 418, 413, 418, 80, 81, true, "Table", "Table"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206514091025767, 138645421409679550, 18446744073709551615, 18446744073709551615, 420, 426, 420, 426, 82, 83, true, "Figure", "Figure"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625527096807, 2600425877064879199, 18446744073709551615, 18446744073709551615, 428, 432, 428, 432, 84, 85, true, "List", "List"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14652257787682118593, 2235344648887433985, 18446744073709551615, 18446744073709551615, 434, 444, 434, 444, 86, 87, true, "etc^{10}", "etc$^{10}$"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206562405951200, 14915916748921937907, 18446744073709551615, 18446744073709551615, 525, 531, 525, 531, 102, 103, true, "colour", "colour"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206562405951200, 14915916748921935127, 18446744073709551615, 18446744073709551615, 548, 554, 548, 554, 107, 108, true, "colour", "colour"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625631210899, 2869741906537984803, 18446744073709551615, 18446744073709551615, 583, 587, 583, 587, 114, 115, true, "task", "task"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15274658437030291237, 927761389620237631, 18446744073709551615, 18446744073709551615, 632, 646, 632, 646, 122, 123, true, "colouring-task", "colouring-task"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206514091025767, 138645421409695848, 18446744073709551615, 18446744073709551615, 666, 672, 666, 672, 129, 130, true, "Figure", "Figure"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206562125478786, 14844805221869100354, 18446744073709551615, 18446744073709551615, 682, 688, 682, 688, 133, 134, true, "humans", "humans"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625631210899, 2869741906537807335, 18446744073709551615, 18446744073709551615, 736, 740, 736, 740, 142, 143, true, "task", "task"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 8106478708554912027, 15651220002486280438, 18446744073709551615, 18446744073709551615, 877, 884, 877, 884, 169, 170, true, "seconds", "seconds"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 17379018868500585066, 7887869988328938557, 18446744073709551615, 18446744073709551615, 496, 519, 496, 519, 97, 100, true, "is visually represented", "is visually represented"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 12222886291842911511, 14164925570223305777, 18446744073709551615, 18446744073709551615, 611, 624, 611, 624, 118, 120, true, "is translated", "is translated"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14892762836247367071, 8594133885493293021, 18446744073709551615, 18446744073709551615, 651, 662, 651, 662, 125, 128, true, "can be seen", "can be seen"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14650447943360509516, 9369867654851497889, 18446744073709551615, 18446744073709551615, 689, 697, 689, 697, 134, 136, true, "are very", "are very"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 2702989145586421307, 16288047702767501090, 18446744073709551615, 18446744073709551615, 741, 751, 741, 751, 143, 145, true, "comes very", "comes very"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 12716975807947326132, 18394114003906370195, 18446744073709551615, 18446744073709551615, 785, 802, 785, 802, 152, 155, true, "spent to annotate", "spent to annotate"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 7762624089155089838, 8059148629484155796, 18446744073709551615, 18446744073709551615, 850, 870, 850, 870, 163, 167, true, "has shown to average", "has shown to average"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104159171192019, 4339284832956404466, 18446744073709551615, 18446744073709551615, 51, 56, 51, 56, 9, 10, true, "allow", "allow"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206562264646932, 14865734342971057341, 18446744073709551615, 18446744073709551615, 63, 69, 63, 69, 12, 13, true, "gather", "gather"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104159157798023, 4075838060624060091, 18446744073709551615, 18446744073709551615, 92, 97, 92, 97, 16, 17, true, "using", "using"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14634109585341561832, 11176599631065673692, 18446744073709551615, 18446744073709551615, 153, 161, 153, 161, 27, 28, true, "retrieve", "retrieve"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 5615021626537608757, 17448469512561363698, 18446744073709551615, 18446744073709551615, 192, 202, 192, 202, 34, 35, true, "associated", "associated"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 2703017932388060560, 7117070694088785565, 18446744073709551615, 18446744073709551615, 222, 232, 222, 232, 38, 39, true, "containing", "containing"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 12178341415895638617, 2314208399419917922, 18446744073709551615, 18446744073709551615, 244, 247, 244, 247, 42, 43, true, "see", "see"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 12178341415895564320, 2314153029143332235, 18446744073709551615, 18446744073709551615, 267, 270, 267, 270, 49, 50, true, "ask", "ask"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206568833706309, 76439633065696099, 18446744073709551615, 18446744073709551615, 296, 302, 296, 302, 56, 57, true, "assign", "assign"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 12178341415895564896, 2314153075758350109, 18446744073709551615, 18446744073709551615, 366, 369, 366, 369, 68, 69, true, "are", "are"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 5950066704821957614, 2855124977157713060, 18446744073709551615, 18446744073709551615, 536, 545, 536, 545, 105, 106, true, "assigning", "assigning"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14635107449930294178, 18245738378700485826, 18446744073709551615, 18446744073709551615, 817, 825, 817, 825, 158, 159, true, "starting", "starting"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 13874442862817243077, 5577683104197750470, 18446744073709551615, 18446744073709551615, 698, 710, 698, 710, 136, 138, true, "efficient in", "efficient in"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206566339127348, 14821225559023260074, 18446744073709551615, 18446744073709551615, 35, 41, 35, 41, 6, 8, true, "on the", "on the"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541487054, 14395248044803713066, 18446744073709551615, 18446744073709551615, 83, 85, 83, 85, 14, 15, true, "at", "at"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 8106396862068141297, 11468319027671071218, 18446744073709551615, 18446744073709551615, 125, 132, 125, 132, 21, 23, true, "In each", "In each"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485670, 14395248211619962859, 18446744073709551615, 18446744073709551615, 347, 349, 347, 349, 65, 66, true, "of", "of"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16380809977974811061, 16784953016435174527, 18446744073709551615, 18446744073709551615, 446, 452, 446, 452, 88, 90, true, "In the", "In the"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625686288966, 2869870603595820956, 18446744073709551615, 18446744073709551615, 520, 524, 520, 524, 100, 102, true, "by a", "by a"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541480853, 14395253412858146951, 18446744073709551615, 18446744073709551615, 533, 535, 533, 535, 104, 105, true, "By", "By"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485670, 14395248211619800045, 18446744073709551615, 18446744073709551615, 588, 590, 588, 590, 115, 116, true, "of", "of"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206560517276114, 14905918454097336053, 18446744073709551615, 18446744073709551615, 625, 631, 625, 631, 120, 122, true, "into a", "into a"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541487053, 14395248044754679793, 18446744073709551615, 18446744073709551615, 648, 650, 648, 650, 124, 125, true, "as", "as"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541486538, 14395247967935705607, 18446744073709551615, 18446744073709551615, 663, 665, 663, 665, 128, 129, true, "in", "in"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104162323265917, 4542723028457504319, 18446744073709551615, 18446744073709551615, 676, 681, 676, 681, 132, 133, true, "Since", "Since"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14637917359887717745, 9802156572988376526, 18446744073709551615, 18446744073709551615, 826, 834, 826, 834, 159, 161, true, "from the", "from the"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541487054, 14395248044803775593, 18446744073709551615, 18446744073709551615, 871, 873, 871, 873, 167, 168, true, "at", "at"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625618865305, 2626455388543067312, 18446744073709551615, 18446744073709551615, 885, 889, 885, 889, 170, 171, true, "over", "over"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485865, 14395248221468713743, 18446744073709551615, 18446744073709551615, 60, 62, 60, 62, 11, 12, true, "to", "to"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485865, 14395248221468711840, 18446744073709551615, 18446744073709551615, 293, 295, 293, 295, 55, 56, true, "to", "to"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 8106351192289801590, 15640680202175036929, 18446744073709551615, 18446744073709551615, 555, 562, 555, 562, 108, 110, true, "to each", "to each"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485865, 14395248221468734066, 18446744073709551615, 18446744073709551615, 760, 762, 760, 762, 146, 147, true, "to", "to"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485865, 14395248221468732296, 18446744073709551615, 18446744073709551615, 791, 793, 791, 793, 153, 154, true, "to", "to"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485865, 14395248221468253143, 18446744073709551615, 18446744073709551615, 860, 862, 860, 862, 165, 166, true, "to", "to"], ["expression", "common", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541486545, 5648627953971592423, 18446744073709551615, 18446744073709551615, 170, 174, 170, 174, 32, 33, true, "ie", "i.e."], ["expression", "common", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541486545, 5648627953971505641, 18446744073709551615, 18446744073709551615, 565, 569, 565, 569, 105, 106, true, "ie", "i.e."], ["expression", "word-concatenation", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 3753411203337468488, 2887415461401166935, 18446744073709551615, 18446744073709551615, 988, 1000, 988, 1000, 178, 179, true, "ground-truth", "ground-truth"], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8811629219803841493, 764401587528891604, 18446744073709551615, 18446744073709551615, 0, 102, 0, 102, 0, 18, true, "The second purpose of the annotators is to visually inspect the quality of our machine learned models.", "The second purpose of the annotators is to visually inspect the quality of our machine learned models."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 17074610120338710338, 17138558258330657336, 18446744073709551615, 18446744073709551615, 103, 222, 103, 222, 18, 43, true, "The goal of the models is to emulate the action of the annotators, i.e. to assign a layout semantic label to each cell.", "The goal of the models is to emulate the action of the annotators, i.e. to assign a layout semantic label to each cell."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 13952947067572963915, 14804888992560412829, 18446744073709551615, 18446744073709551615, 223, 332, 223, 332, 43, 65, true, "Clearly, the result of a prediction for each page can therefore be displayed as if it were an annotated page.", "Clearly, the result of a prediction for each page can therefore be displayed as if it were an annotated page."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 3886548767560778611, 15171206665642280195, 18446744073709551615, 18446744073709551615, 333, 417, 333, 417, 65, 81, true, "This allows the users to directly inspect the results of the models on unseen pages.", "This allows the users to directly inspect the results of the models on unseen pages."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 7265618967841439802, 9171259371202189029, 18446744073709551615, 18446744073709551615, 418, 645, 418, 645, 81, 118, true, "A direct consequence of this inspection capability in the annotators is that the annotation task can be transformed easily into a correction task, i.e. the human annotators only need to correct the incorrectly predicted labels.", "A direct consequence of this inspection capability in the annotators is that the annotation task can be transformed easily into a correction task, i.e. the human annotators only need to correct the incorrectly predicted labels."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 13719400799583725468, 8197048490410940553, 18446744073709551615, 18446744073709551615, 646, 761, 646, 761, 118, 142, true, "Of course, as the models become better over time, the number of corrections needed to be made become less and less.", "Of course, as the models become better over time, the number of corrections needed to be made become less and less."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 11311841238554173343, 1992585178053613626, 18446744073709551615, 18446744073709551615, 762, 834, 762, 834, 142, 154, true, "This allows us to significantly reduce the annotation time per document.", "This allows us to significantly reduce the annotation time per document."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 11029085865310589014, 140652675393977829, 18446744073709551615, 18446744073709551615, 835, 1011, 835, 1011, 154, 181, true, "Since annotations are typically created by professionals with a high hourly rate, the colouring technique allowed us to significantly reduce the cost of ground-truth gathering.", "Since annotations are typically created by professionals with a high hourly rate, the colouring technique allowed us to significantly reduce the cost of ground-truth gathering."], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1265616008757051900, 16887628514756530891, 18446744073709551615, 18446744073709551615, 4, 18, 4, 18, 1, 3, true, "second purpose", "second purpose"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 2317020437411802284, 18255726684800034363, 18446744073709551615, 18446744073709551615, 187, 208, 187, 208, 36, 39, true, "layout semantic label", "layout semantic label"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 6406713431887338480, 7916422400711570126, 18446744073709551615, 18446744073709551615, 317, 331, 317, 331, 62, 64, true, "annotated page", "annotated page"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 2348752337749164990, 14543923397071441531, 18446744073709551615, 18446744073709551615, 404, 416, 404, 416, 78, 80, true, "unseen pages", "unseen pages"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15605685416779062553, 8687993225293924380, 18446744073709551615, 18446744073709551615, 420, 438, 420, 438, 82, 84, true, "direct consequence", "direct consequence"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 5762122585570374444, 14528211899176051482, 18446744073709551615, 18446744073709551615, 447, 468, 447, 468, 86, 88, true, "inspection capability", "inspection capability"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 4147688156856812386, 6490015423550437879, 18446744073709551615, 18446744073709551615, 499, 514, 499, 514, 94, 96, true, "annotation task", "annotation task"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1447735027206064326, 4580102222737649898, 18446744073709551615, 18446744073709551615, 548, 563, 548, 563, 102, 104, true, "correction task", "correction task"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1385949438436713657, 10394950804393440811, 18446744073709551615, 18446744073709551615, 574, 590, 574, 590, 107, 109, true, "human annotators", "human annotators"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 4147688156856784008, 6490015274061652606, 18446744073709551615, 18446744073709551615, 805, 820, 805, 820, 149, 151, true, "annotation time", "annotation time"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 12538595938735813332, 3845550124263106024, 18446744073709551615, 18446744073709551615, 899, 915, 899, 915, 163, 166, true, "high hourly rate", "high hourly rate"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1591019414094504294, 10764400280857424150, 18446744073709551615, 18446744073709551615, 988, 1010, 988, 1010, 178, 180, true, "ground-truth gathering", "ground-truth gathering"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15359807916847569012, 2773765720706928008, 18446744073709551615, 18446744073709551615, 26, 36, 26, 36, 5, 6, true, "annotators", "annotators"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106477781724488761, 877124515430637025, 18446744073709551615, 18446744073709551615, 64, 71, 64, 71, 11, 12, true, "quality", "quality"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106464587473865376, 17528532861678646004, 18446744073709551615, 18446744073709551615, 79, 86, 79, 86, 14, 15, true, "machine", "machine"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206567230470443, 7370067777486058567, 18446744073709551615, 18446744073709551615, 95, 101, 95, 101, 16, 17, true, "models", "models"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625699055241, 14717516376643823052, 18446744073709551615, 18446744073709551615, 107, 111, 107, 111, 19, 20, true, "goal", "goal"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206567230470443, 7370067777486035497, 18446744073709551615, 18446744073709551615, 119, 125, 119, 125, 22, 23, true, "models", "models"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206569104268492, 6068740663996960621, 18446744073709551615, 18446744073709551615, 144, 150, 144, 150, 27, 28, true, "action", "action"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15359807916847569012, 2773765720706953011, 18446744073709551615, 18446744073709551615, 158, 168, 158, 168, 30, 31, true, "annotators", "annotators"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625696024605, 14717680184571495356, 18446744073709551615, 18446744073709551615, 217, 221, 217, 221, 41, 42, true, "cell", "cell"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206521509536706, 8483052701360678843, 18446744073709551615, 18446744073709551615, 236, 242, 236, 242, 46, 47, true, "result", "result"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 14103651237077221583, 4734852607792192719, 18446744073709551615, 18446744073709551615, 248, 258, 248, 258, 49, 50, true, "prediction", "prediction"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625632301461, 14734624151323869703, 18446744073709551615, 18446744073709551615, 268, 272, 268, 272, 52, 53, true, "page", "page"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 329104159157820437, 5473671238311420391, 18446744073709551615, 18446744073709551615, 349, 354, 349, 354, 68, 69, true, "users", "users"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106478445190161533, 12197236091761950611, 18446744073709551615, 18446744073709551615, 379, 386, 379, 386, 73, 74, true, "results", "results"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206567230470443, 7370067777485849666, 18446744073709551615, 18446744073709551615, 394, 400, 394, 400, 76, 77, true, "models", "models"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15359807916847569012, 2773765720706781412, 18446744073709551615, 18446744073709551615, 476, 486, 476, 486, 90, 91, true, "annotators", "annotators"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206590740615814, 4654449512501997811, 18446744073709551615, 18446744073709551615, 638, 644, 638, 644, 116, 117, true, "labels", "labels"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206562412792821, 7448141254981419885, 18446744073709551615, 18446744073709551615, 649, 655, 649, 655, 119, 120, true, "course", "course"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206567230470443, 7370067777485806355, 18446744073709551615, 18446744073709551615, 664, 670, 664, 670, 123, 124, true, "models", "models"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625631241985, 14734405111978592850, 18446744073709551615, 18446744073709551615, 690, 694, 690, 694, 127, 128, true, "time", "time"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206574973295053, 5687305569601072199, 18446744073709551615, 18446744073709551615, 700, 706, 700, 706, 130, 131, true, "number", "number"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 2993400436190652919, 10582887119126517774, 18446744073709551615, 18446744073709551615, 710, 721, 710, 721, 132, 133, true, "corrections", "corrections"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 14650401089286948001, 1771869589109122009, 18446744073709551615, 18446744073709551615, 825, 833, 825, 833, 152, 153, true, "document", "document"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1037258523789473353, 10472772837010545151, 18446744073709551615, 18446744073709551615, 841, 852, 841, 852, 155, 156, true, "annotations", "annotations"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 9754205718486487036, 4571850225111332849, 18446744073709551615, 18446744073709551615, 878, 891, 878, 891, 160, 161, true, "professionals", "professionals"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 3503953938541970428, 5150439481258416324, 18446744073709551615, 18446744073709551615, 931, 940, 931, 940, 169, 170, true, "technique", "technique"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625695918775, 14717671066676122635, 18446744073709551615, 18446744073709551615, 980, 984, 980, 984, 176, 177, true, "cost", "cost"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1733018218539366667, 11085627304462353310, 18446744073709551615, 18446744073709551615, 37, 59, 37, 59, 6, 10, true, "is to visually inspect", "is to visually inspect"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 13692217046483889459, 9755057425888296816, 18446744073709551615, 18446744073709551615, 126, 139, 126, 139, 23, 26, true, "is to emulate", "is to emulate"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8336379008036663285, 2252560075242747349, 18446744073709551615, 18446744073709551615, 170, 184, 170, 184, 32, 35, true, "ie to assign", "i.e. to assign"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 14444118778847914736, 5678933160321683165, 18446744073709551615, 18446744073709551615, 287, 299, 287, 299, 55, 57, true, "be displayed", "be displayed"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 13727039347256328143, 14963124457720431052, 18446744073709551615, 18446744073709551615, 515, 540, 515, 540, 96, 100, true, "can be transformed easily", "can be transformed easily"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 10428062617433318222, 17753611624855826478, 18446744073709551615, 18446744073709551615, 596, 611, 596, 611, 110, 113, true, "need to correct", "need to correct"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 17328653366627729324, 9156057289599737447, 18446744073709551615, 18446744073709551615, 722, 746, 722, 746, 133, 138, true, "needed to be made become", "needed to be made become"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1919368618428862817, 2645599353882990224, 18446744073709551615, 18446744073709551615, 853, 874, 853, 874, 156, 159, true, "are typically created", "are typically created"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106342444693204894, 15282897598607271867, 18446744073709551615, 18446744073709551615, 87, 94, 87, 94, 15, 16, true, "learned", "learned"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625633616262, 14734510420580904856, 18446744073709551615, 18446744073709551615, 309, 313, 309, 313, 60, 61, true, "were", "were"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206569317834029, 6192068890556919984, 18446744073709551615, 18446744073709551615, 338, 344, 338, 344, 66, 67, true, "allows", "allows"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106398347643660299, 15243737699270700259, 18446744073709551615, 18446744073709551615, 367, 374, 367, 374, 71, 72, true, "inspect", "inspect"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541486535, 5648627928025204004, 18446744073709551615, 18446744073709551615, 487, 489, 487, 489, 91, 92, true, "is", "is"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541486545, 5648627953971505641, 18446744073709551615, 18446744073709551615, 565, 569, 565, 569, 105, 106, true, "ie", "i.e."], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 6184954633443293966, 6201163133929267929, 18446744073709551615, 18446744073709551615, 628, 637, 628, 637, 115, 116, true, "predicted", "predicted"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206574366219255, 6162855425004985556, 18446744073709551615, 18446744073709551615, 671, 677, 671, 677, 124, 125, true, "become", "become"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206569317834029, 6192068890556892683, 18446744073709551615, 18446744073709551615, 767, 773, 767, 773, 143, 144, true, "allows", "allows"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206521531524134, 8487629705353323144, 18446744073709551615, 18446744073709551615, 794, 800, 794, 800, 147, 148, true, "reduce", "reduce"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 5947874710666698111, 7433459475979555695, 18446744073709551615, 18446744073709551615, 921, 930, 921, 930, 168, 169, true, "colouring", "colouring"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106397564189393266, 10189283544961513918, 18446744073709551615, 18446744073709551615, 941, 948, 941, 948, 170, 171, true, "allowed", "allowed"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206521531524134, 8487629705353346362, 18446744073709551615, 18446744073709551615, 969, 975, 969, 975, 174, 175, true, "reduce", "reduce"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206565712212855, 5007444638099411779, 18446744073709551615, 18446744073709551615, 19, 25, 19, 25, 3, 5, true, "of the", "of the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485670, 5648627965386078792, 18446744073709551615, 18446744073709551615, 72, 74, 72, 74, 12, 13, true, "of", "of"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206565712212855, 5007444638099311142, 18446744073709551615, 18446744073709551615, 112, 118, 112, 118, 20, 22, true, "of the", "of the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206565712212855, 5007444638099321250, 18446744073709551615, 18446744073709551615, 151, 157, 151, 157, 28, 30, true, "of the", "of the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625620237736, 14734126707883533061, 18446744073709551615, 18446744073709551615, 243, 247, 243, 247, 47, 49, true, "of a", "of a"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 14637917333167503367, 798190401077603715, 18446744073709551615, 18446744073709551615, 259, 267, 259, 267, 50, 52, true, "for each", "for each"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541487053, 5648627952979693361, 18446744073709551615, 18446744073709551615, 300, 302, 300, 302, 57, 58, true, "as", "as"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541486546, 5648627954249524212, 18446744073709551615, 18446744073709551615, 303, 305, 303, 305, 58, 59, true, "if", "if"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206565712212855, 5007444638099430551, 18446744073709551615, 18446744073709551615, 387, 393, 387, 393, 74, 76, true, "of the", "of the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485678, 5648627990571398259, 18446744073709551615, 18446744073709551615, 401, 403, 401, 403, 77, 78, true, "on", "on"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106342927224204628, 9808316297265099098, 18446744073709551615, 18446744073709551615, 439, 446, 439, 446, 84, 86, true, "of this", "of this"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206560518651853, 4762915631712600090, 18446744073709551615, 18446744073709551615, 469, 475, 469, 475, 88, 90, true, "in the", "in the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 14634130761162415388, 81576451573710762, 18446744073709551615, 18446744073709551615, 490, 498, 490, 498, 92, 94, true, "that the", "that the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206560517276114, 4764682161644402103, 18446744073709551615, 18446744073709551615, 541, 547, 541, 547, 100, 102, true, "into a", "into a"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541487694, 5648628413298466486, 18446744073709551615, 18446744073709551615, 646, 648, 646, 648, 118, 119, true, "Of", "Of"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206568455155979, 7352846837303360973, 18446744073709551615, 18446744073709551615, 657, 663, 657, 663, 121, 123, true, "as the", "as the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625618865305, 14762415042042723791, 18446744073709551615, 18446744073709551615, 685, 689, 685, 689, 126, 127, true, "over", "over"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485670, 5648627965385789683, 18446744073709551615, 18446744073709551615, 707, 709, 707, 709, 131, 132, true, "of", "of"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 12178341415895635383, 522863227394400205, 18446744073709551615, 18446744073709551615, 821, 824, 821, 824, 151, 152, true, "per", "per"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 329104162323265917, 703542606745908864, 18446744073709551615, 18446744073709551615, 835, 840, 835, 840, 154, 155, true, "Since", "Since"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541486989, 5648627977530603539, 18446744073709551615, 18446744073709551615, 875, 877, 875, 877, 159, 160, true, "by", "by"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206557726458966, 4826134415899722071, 18446744073709551615, 18446744073709551615, 892, 898, 892, 898, 161, 163, true, "with a", "with a"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485670, 5648627965385872694, 18446744073709551615, 18446744073709551615, 985, 987, 985, 987, 177, 178, true, "of", "of"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167410249, 18446744073709551615, 18446744073709551615, 40, 42, 40, 42, 7, 8, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167418813, 18446744073709551615, 18446744073709551615, 129, 131, 129, 131, 24, 25, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167419001, 18446744073709551615, 18446744073709551615, 175, 177, 175, 177, 33, 34, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106351192289801590, 3283401131731009010, 18446744073709551615, 18446744073709551615, 209, 216, 209, 216, 39, 41, true, "to each", "to each"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167626806, 18446744073709551615, 18446744073709551615, 355, 357, 355, 357, 69, 70, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167658087, 18446744073709551615, 18446744073709551615, 601, 603, 601, 603, 111, 112, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167600455, 18446744073709551615, 18446744073709551615, 729, 731, 729, 731, 134, 135, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167591235, 18446744073709551615, 18446744073709551615, 777, 779, 777, 779, 145, 146, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167603037, 18446744073709551615, 18446744073709551615, 952, 954, 952, 954, 172, 173, true, "to", "to"], ["numval", "ival", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 17767354399704235163, 8981450943146377597, 18446744073709551615, 18446744073709551615, 10, 11, 10, 11, 2, 3, true, "3", "3"], ["expression", "word-concatenation", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 4147688168886302397, 4316020704427245043, 18446744073709551615, 18446744073709551615, 25, 40, 25, 40, 7, 8, true, "annotation-rate", "annotation-rate"], ["expression", "word-concatenation", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 10203919108291344398, 6932446687114432211, 18446744073709551615, 18446744073709551615, 44, 68, 44, 68, 9, 10, true, "number-of-annotatedpages", "number-of-annotatedpages"], ["sentence", "", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 1209815263017833313, 7894248700334870994, 18446744073709551615, 18446744073709551615, 0, 80, 0, 80, 0, 13, true, "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute.", "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute."], ["term", "single-term", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 16381206514091025767, 6269509740307391107, 18446744073709551615, 18446744073709551615, 3, 9, 3, 9, 1, 2, true, "Figure", "Figure"], ["term", "single-term", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 4147688168886302397, 4316020704427245043, 18446744073709551615, 18446744073709551615, 25, 40, 25, 40, 7, 8, true, "annotation-rate", "annotation-rate"], ["term", "single-term", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 10203919108291344398, 6932446687114432211, 18446744073709551615, 18446744073709551615, 44, 68, 44, 68, 9, 10, true, "number-of-annotatedpages", "number-of-annotatedpages"], ["term", "single-term", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 16381206594557227155, 607213422089229276, 18446744073709551615, 18446744073709551615, 73, 79, 73, 79, 11, 12, true, "minute", "minute"], ["verb", "single-verb", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 389609625741152123, 14211761448341366960, 18446744073709551615, 18446744073709551615, 16, 20, 16, 20, 5, 6, true, "show", "show"], ["conn", "single-conn", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 15441160910541480354, 2695916118355464437, 18446744073709551615, 18446744073709551615, 0, 2, 0, 2, 0, 1, true, "In", "In"], ["conn", "single-conn", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 15441160910541486538, 2695916097612071945, 18446744073709551615, 18446744073709551615, 41, 43, 41, 43, 8, 9, true, "in", "in"], ["conn", "single-conn", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 12178341415895635383, 15900505804860594428, 18446744073709551615, 18446744073709551615, 69, 72, 69, 72, 10, 11, true, "per", "per"], ["numval", "ival", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541481982, 11951507606951917585, 18446744073709551615, 18446744073709551615, 508, 510, 508, 510, 92, 93, true, "10", "10"], ["parenthesis", "round brackets", 887751753527930563, "TEXT", "#/texts/38", 1.0, 8040598725736414260, 16009765337602090817, 18446744073709551615, 18446744073709551615, 185, 218, 185, 218, 35, 43, true, "(based on annotated ground-truth)", "(based on annotated ground-truth)"], ["parenthesis", "round brackets", 887751753527930563, "TEXT", "#/texts/38", 1.0, 5092507472812879080, 14553694282484972012, 18446744073709551615, 18446744073709551615, 296, 391, 296, 391, 55, 71, true, "(submitting page-annotations, training the model, applying the model for predicting the labels)", "(submitting page-annotations, training the model, applying the model for predicting the labels)"], ["expression", "word-concatenation", 887751753527930563, "TEXT", "#/texts/38", 1.0, 3005486399909847392, 14682678718727362625, 18446744073709551615, 18446744073709551615, 152, 165, 152, 165, 31, 32, true, "inter-leaving", "inter-leaving"], ["expression", "word-concatenation", 887751753527930563, "TEXT", "#/texts/38", 1.0, 12771775017586419952, 2802304582408659586, 18446744073709551615, 18446744073709551615, 308, 324, 308, 324, 57, 58, true, "page-annotations", "page-annotations"], ["expression", "word-concatenation", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14635107217315999975, 15420785844354915058, 18446744073709551615, 18446744073709551615, 484, 492, 484, 492, 87, 88, true, "speed-up", "speed-up"], ["expression", "word-concatenation", 887751753527930563, "TEXT", "#/texts/38", 1.0, 3753411203337468488, 12096051488974704464, 18446744073709551615, 18446744073709551615, 515, 527, 515, 527, 94, 95, true, "ground-truth", "ground-truth"], ["sentence", "", 887751753527930563, "TEXT", "#/texts/38", 1.0, 6272342027418177618, 7231857899686407766, 18446744073709551615, 18446744073709551615, 44, 119, 44, 119, 9, 24, true, "Since the corrections become less and less, the rate of annotation goes up.", "Since the corrections become less and less, the rate of annotation goes up."], ["sentence", "", 887751753527930563, "TEXT", "#/texts/38", 1.0, 10373341387652950101, 4296463374728196626, 18446744073709551615, 18446744073709551615, 120, 445, 120, 445, 24, 81, true, "It is needless to say that this inter-leaving of training models (based on annotated ground-truth) and annotation benefits directly from our platform approach, since each task (submitting page-annotations, training the model, applying the model for predicting the labels) comes down to an asynchronous call to a microservice.", "It is needless to say that this inter-leaving of training models (based on annotated ground-truth) and annotation benefits directly from our platform approach, since each task (submitting page-annotations, training the model, applying the model for predicting the labels) comes down to an asynchronous call to a microservice."], ["sentence", "", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14172981653758283269, 8881727103428706752, 18446744073709551615, 18446744073709551615, 446, 539, 446, 539, 81, 97, true, "The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection.", "The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection."], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 1075818440275369468, 17454316887235878242, 18446744073709551615, 18446744073709551615, 169, 184, 169, 184, 33, 35, true, "training models", "training models"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 9789767543968526861, 314805434658016224, 18446744073709551615, 18446744073709551615, 223, 242, 223, 242, 44, 46, true, "annotation benefits", "annotation benefits"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 17553665798218401963, 3726396549088745766, 18446744073709551615, 18446744073709551615, 261, 278, 261, 278, 49, 51, true, "platform approach", "platform approach"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 6824561449593020406, 2835024862878445208, 18446744073709551615, 18446744073709551615, 409, 426, 409, 426, 75, 77, true, "asynchronous call", "asynchronous call"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15545443248697733320, 10164958119644670876, 18446744073709551615, 18446744073709551615, 450, 472, 450, 472, 82, 84, true, "accelerated annotation", "accelerated annotation"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 6431940343880246726, 1708248768847169986, 18446744073709551615, 18446744073709551615, 515, 538, 515, 538, 94, 96, true, "ground-truth collection", "ground-truth collection"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 2993400436190652919, 13692484191386832624, 18446744073709551615, 18446744073709551615, 54, 65, 54, 65, 11, 12, true, "corrections", "corrections"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625632775616, 7525501055193045867, 18446744073709551615, 18446744073709551615, 92, 96, 92, 96, 18, 19, true, "rate", "rate"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15359807916847495711, 11270264014125951727, 18446744073709551615, 18446744073709551615, 100, 110, 100, 110, 20, 21, true, "annotation", "annotation"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 3005486399909847392, 14682678718727362625, 18446744073709551615, 18446744073709551615, 152, 165, 152, 165, 31, 32, true, "inter-leaving", "inter-leaving"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 16381206541509431009, 14577680045959261972, 18446744073709551615, 18446744073709551615, 205, 211, 205, 211, 39, 40, true, "ground", "ground"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 329104159241711235, 5593828058814821597, 18446744073709551615, 18446744073709551615, 212, 217, 212, 217, 41, 42, true, "truth", "truth"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625631210899, 7525474213247124703, 18446744073709551615, 18446744073709551615, 291, 295, 291, 295, 54, 55, true, "task", "task"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 12771775017586419952, 2802304582408659586, 18446744073709551615, 18446744073709551615, 308, 324, 308, 324, 57, 58, true, "page-annotations", "page-annotations"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 329104161610777240, 13417175623690781227, 18446744073709551615, 18446744073709551615, 339, 344, 339, 344, 61, 62, true, "model", "model"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 329104161610777240, 13417175623690783224, 18446744073709551615, 18446744073709551615, 359, 364, 359, 364, 65, 66, true, "model", "model"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 16381206590740615814, 5715153085670598580, 18446744073709551615, 18446744073709551615, 384, 390, 384, 390, 69, 70, true, "labels", "labels"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 16682817150367627875, 14157104143939096698, 18446744073709551615, 18446744073709551615, 432, 444, 432, 444, 79, 80, true, "microservice", "microservice"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14635107217315999975, 15420785844354915058, 18446744073709551615, 18446744073709551615, 484, 492, 484, 492, 87, 88, true, "speed-up", "speed-up"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 16381206548642682247, 14381169330489265135, 18446744073709551615, 18446744073709551615, 498, 504, 498, 504, 90, 91, true, "factor", "factor"], ["verb", "compound-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14639581096777419601, 11157428250198373143, 18446744073709551615, 18446744073709551615, 473, 481, 473, 481, 84, 86, true, "leads to", "leads to"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 16381206574366219255, 14308904875579874986, 18446744073709551615, 18446744073709551615, 66, 72, 66, 72, 12, 13, true, "become", "become"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625699055541, 7971072571524745502, 18446744073709551615, 18446744073709551615, 111, 115, 111, 115, 21, 22, true, "goes", "goes"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541486535, 11951507066520328741, 18446744073709551615, 18446744073709551615, 123, 125, 123, 125, 25, 26, true, "is", "is"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 12178341415895645562, 13102495715066849378, 18446744073709551615, 18446744073709551615, 138, 141, 138, 141, 28, 29, true, "say", "say"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 329104159219515955, 5597411499285869731, 18446744073709551615, 18446744073709551615, 186, 191, 186, 191, 36, 37, true, "based", "based"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 5946726816546568286, 1477520919743645775, 18446744073709551615, 18446744073709551615, 195, 204, 195, 204, 38, 39, true, "annotated", "annotated"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14109055745804186414, 4901536807348624447, 18446744073709551615, 18446744073709551615, 297, 307, 297, 307, 56, 57, true, "submitting", "submitting"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14634153919632515335, 4255451725012375348, 18446744073709551615, 18446744073709551615, 326, 334, 326, 334, 59, 60, true, "training", "training"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14650448030444381648, 2869960803513685796, 18446744073709551615, 18446744073709551615, 346, 354, 346, 354, 63, 64, true, "applying", "applying"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14103651237077222912, 3033995743627716102, 18446744073709551615, 18446744073709551615, 369, 379, 369, 379, 67, 68, true, "predicting", "predicting"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 329104161555284808, 5558370405433613923, 18446744073709551615, 18446744073709551615, 392, 397, 392, 397, 71, 72, true, "comes", "comes"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 5959619225047725157, 1767038672352581109, 18446744073709551615, 18446744073709551615, 44, 53, 44, 53, 9, 11, true, "Since the", "Since the"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541485670, 11951507671817644057, 18446744073709551615, 18446744073709551615, 97, 99, 97, 99, 19, 20, true, "of", "of"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 3504047303127782210, 16099388320343716523, 18446744073709551615, 18446744073709551615, 142, 151, 142, 151, 29, 31, true, "that this", "that this"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541485670, 11951507671817649898, 18446744073709551615, 18446744073709551615, 166, 168, 166, 168, 32, 33, true, "of", "of"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541485678, 11951507647334320637, 18446744073709551615, 18446744073709551615, 192, 194, 192, 194, 37, 38, true, "on", "on"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625697843734, 7970779865719157582, 18446744073709551615, 18446744073709551615, 252, 256, 252, 256, 47, 48, true, "from", "from"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14091433066300748251, 6432975356795058417, 18446744073709551615, 18446744073709551615, 280, 290, 280, 290, 52, 54, true, "since each", "since each"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 12178341415895625940, 13102496268306370799, 18446744073709551615, 18446744073709551615, 365, 368, 365, 368, 66, 67, true, "for", "for"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625620237736, 7524977021781763492, 18446744073709551615, 18446744073709551615, 493, 497, 493, 497, 88, 90, true, "of a", "of a"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541485670, 11951507671817651167, 18446744073709551615, 18446744073709551615, 505, 507, 505, 507, 91, 92, true, "of", "of"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 12178341415895625940, 13102496268307349332, 18446744073709551615, 18446744073709551615, 511, 514, 511, 514, 93, 94, true, "for", "for"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541485865, 11951507086803862758, 18446744073709551615, 18446744073709551615, 135, 137, 135, 137, 27, 28, true, "to", "to"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 329104159243175056, 5593839863116247665, 18446744073709551615, 18446744073709551615, 403, 408, 403, 408, 73, 75, true, "to an", "to an"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625631408052, 7525477393951992167, 18446744073709551615, 18446744073709551615, 427, 431, 427, 431, 77, 79, true, "to a", "to a"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625631408052, 7525477393952012376, 18446744073709551615, 18446744073709551615, 479, 483, 479, 483, 85, 87, true, "to a", "to a"], ["numval", "fval", 4695688617288377564, "TEXT", "#/texts/39", 1.0, 12178341415896435197, 3724559431380620971, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "3.4", "3.4"], ["expression", "common", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 12178341415895450733, 17146743324715295976, 18446744073709551615, 18446744073709551615, 268, 272, 268, 272, 54, 55, true, "etc", "etc."], ["expression", "word-concatenation", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 6307689511527468252, 11049638471994231619, 18446744073709551615, 18446744073709551615, 47, 63, 47, 63, 10, 11, true, "machine-learning", "machine-learning"], ["sentence", "", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 13517213129330321847, 18127661899519764164, 18446744073709551615, 18446744073709551615, 0, 71, 0, 71, 0, 13, true, "In the CCS, there are essentially two types of machine-learning models.", "In the CCS, there are essentially two types of machine-learning models."], ["sentence", "", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 12886030746334264259, 13723940064220454406, 18446744073709551615, 18446744073709551615, 72, 157, 72, 157, 13, 31, true, "On the one hand, we have default models, which are designed to be layout independent.", "On the one hand, we have default models, which are designed to be layout independent."], ["sentence", "", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 14013456187441881338, 17239504930887350611, 18446744073709551615, 18446744073709551615, 158, 520, 158, 520, 31, 100, true, "They take a raster image of the page to identify and locate basic objects, such as tables, figures, formulas, etc. On the other hand, we also support the training of custom, templatespecific models, which are designed to specialize on a particular layout template and allow us to convert and extract the data out of documents with very high precision and recall.", "They take a raster image of the page to identify and locate basic objects, such as tables, figures, formulas, etc. On the other hand, we also support the training of custom, templatespecific models, which are designed to specialize on a particular layout template and allow us to convert and extract the data out of documents with very high precision and recall."], ["sentence", "", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 17780227530484624695, 18273401801814716252, 18446744073709551615, 18446744073709551615, 521, 605, 521, 605, 100, 116, true, "They will classify each cell in the page with regard to their layout semantic label.", "They will classify each cell in the page with regard to their layout semantic label."], ["term", "enum-term-mark-2", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 767578358531619449, 16632720521972427975, 18446744073709551615, 18446744073709551615, 499, 519, 499, 519, 96, 99, true, "precision and recall", "precision and recall"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 7873664013415410219, 4430601135045369937, 18446744073709551615, 18446744073709551615, 47, 70, 47, 70, 10, 12, true, "machine-learning models", "machine-learning models"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 1915006193249717419, 1875271127477933001, 18446744073709551615, 18446744073709551615, 97, 111, 97, 111, 20, 22, true, "default models", "default models"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16239183518337686478, 11101556195438910420, 18446744073709551615, 18446744073709551615, 170, 182, 170, 182, 34, 36, true, "raster image", "raster image"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15816118739913983357, 4910490026828256135, 18446744073709551615, 18446744073709551615, 218, 231, 218, 231, 43, 45, true, "basic objects", "basic objects"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 14046205808324278415, 17119448556130976489, 18446744073709551615, 18446744073709551615, 280, 290, 280, 290, 57, 59, true, "other hand", "other hand"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 2583917242234592483, 374576428698011758, 18446744073709551615, 18446744073709551615, 332, 355, 332, 355, 68, 70, true, "templatespecific models", "templatespecific models"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 1409002602794608575, 12647829849056583196, 18446744073709551615, 18446744073709551615, 395, 421, 395, 421, 78, 81, true, "particular layout template", "particular layout template"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 334743981239923851, 9786444577548631916, 18446744073709551615, 18446744073709551615, 494, 508, 494, 508, 95, 97, true, "high precision", "high precision"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 2317020437411802284, 6661739231635438995, 18446744073709551615, 18446744073709551615, 583, 604, 583, 604, 112, 115, true, "layout semantic label", "layout semantic label"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 12178341415896221596, 17145631232582541696, 18446744073709551615, 18446744073709551615, 7, 10, 7, 10, 2, 3, true, "CCS", "CCS"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 329104159243796903, 9793326042416365976, 18446744073709551615, 18446744073709551615, 38, 43, 38, 43, 8, 9, true, "types", "types"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625695385072, 5116635824435316109, 18446744073709551615, 18446744073709551615, 83, 87, 83, 87, 16, 17, true, "hand", "hand"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625632301461, 5151166762899359442, 18446744073709551615, 18446744073709551615, 190, 194, 190, 194, 38, 39, true, "page", "page"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206513098478539, 13904355123067777732, 18446744073709551615, 18446744073709551615, 241, 247, 241, 247, 48, 49, true, "tables", "tables"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 8106397480533647371, 3439539142366206574, 18446744073709551615, 18446744073709551615, 249, 256, 249, 256, 50, 51, true, "figures", "figures"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 14637917332659859466, 11680350228874501365, 18446744073709551615, 18446744073709551615, 258, 266, 258, 266, 52, 53, true, "formulas", "formulas"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 14634153919632515335, 2062260761097997133, 18446744073709551615, 18446744073709551615, 312, 320, 312, 320, 64, 65, true, "training", "training"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206559341571450, 10891618514124079785, 18446744073709551615, 18446744073709551615, 324, 330, 324, 330, 66, 67, true, "custom", "custom"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625696431489, 5116475966440691304, 18446744073709551615, 18446744073709551615, 462, 466, 462, 466, 89, 90, true, "data", "data"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 6167933651658664291, 1969766390996936239, 18446744073709551615, 18446744073709551615, 474, 483, 474, 483, 92, 93, true, "documents", "documents"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206521531485437, 15754120034665078572, 18446744073709551615, 18446744073709551615, 513, 519, 513, 519, 98, 99, true, "recall", "recall"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625696024605, 5116487879327823312, 18446744073709551615, 18446744073709551615, 545, 549, 545, 549, 104, 105, true, "cell", "cell"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625632301461, 5151166762899537630, 18446744073709551615, 18446744073709551615, 557, 561, 557, 561, 107, 108, true, "page", "page"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206521526353544, 15751299097077186113, 18446744073709551615, 18446744073709551615, 567, 573, 567, 573, 109, 110, true, "regard", "regard"], ["verb", "compound-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 2291392503341416825, 11195031169916444600, 18446744073709551615, 18446744073709551615, 18, 33, 18, 33, 5, 7, true, "are essentially", "are essentially"], ["verb", "compound-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 12310758631134579429, 6123985748511190566, 18446744073709551615, 18446744073709551615, 119, 144, 119, 144, 24, 29, true, "are designed to be layout", "are designed to be layout"], ["verb", "compound-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 4898731964974610599, 9536147348813194873, 18446744073709551615, 18446744073709551615, 363, 389, 363, 389, 72, 76, true, "are designed to specialize", "are designed to specialize"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625695387621, 5116635804435289206, 18446744073709551615, 18446744073709551615, 92, 96, 92, 96, 19, 20, true, "have", "have"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625631208371, 5151807067501724158, 18446744073709551615, 18446744073709551615, 163, 167, 163, 167, 32, 33, true, "take", "take"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 14637940110145064744, 2106064499720351293, 18446744073709551615, 18446744073709551615, 198, 206, 198, 206, 40, 41, true, "identify", "identify"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206567157578886, 10902549664873100786, 18446744073709551615, 18446744073709551615, 211, 217, 211, 217, 42, 43, true, "locate", "locate"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 8106478689608778321, 3710394550719923132, 18446744073709551615, 18446744073709551615, 300, 307, 300, 307, 62, 63, true, "support", "support"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 329104159171192019, 9796554067934784618, 18446744073709551615, 18446744073709551615, 426, 431, 426, 431, 82, 83, true, "allow", "allow"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 8106398484416229602, 17326137764355875282, 18446744073709551615, 18446744073709551615, 438, 445, 438, 445, 85, 86, true, "convert", "convert"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 8106397496930289884, 10559828500844892971, 18446744073709551615, 18446744073709551615, 450, 457, 450, 457, 87, 88, true, "extract", "extract"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 4427431036151074662, 16906017590074365227, 18446744073709551615, 18446744073709551615, 526, 539, 526, 539, 101, 103, true, "will classify", "will classify"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 8106478685702231057, 2830627525930358532, 18446744073709551615, 18446744073709551615, 233, 240, 233, 240, 46, 48, true, "such as", "such as"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16380809977974811061, 4113701743493453883, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 2, true, "In the", "In the"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485670, 17273985970400205401, 18446744073709551615, 18446744073709551615, 44, 46, 44, 46, 9, 10, true, "of", "of"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206532384237233, 17053737365407359035, 18446744073709551615, 18446744073709551615, 72, 78, 72, 78, 13, 15, true, "On the", "On the"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206565712212855, 9404609303836786535, 18446744073709551615, 18446744073709551615, 183, 189, 183, 189, 36, 38, true, "of the", "of the"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206532384237233, 17053737365407379411, 18446744073709551615, 18446744073709551615, 273, 279, 273, 279, 55, 57, true, "On the", "On the"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485670, 17273985970400249568, 18446744073709551615, 18446744073709551615, 321, 323, 321, 323, 65, 66, true, "of", "of"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625618762887, 5116378122940476765, 18446744073709551615, 18446744073709551615, 390, 394, 390, 394, 76, 78, true, "on a", "on a"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 12178341415895623120, 17146120863816560875, 18446744073709551615, 18446744073709551615, 467, 470, 467, 470, 90, 91, true, "out", "out"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485670, 17273985970400259382, 18446744073709551615, 18446744073709551615, 471, 473, 471, 473, 91, 92, true, "of", "of"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625618037948, 5116501620067563988, 18446744073709551615, 18446744073709551615, 484, 488, 484, 488, 93, 94, true, "with", "with"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206560518651853, 9497734443008541647, 18446744073709551615, 18446744073709551615, 550, 556, 550, 556, 105, 107, true, "in the", "in the"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625618037948, 5116501620067789933, 18446744073709551615, 18446744073709551615, 562, 566, 562, 566, 108, 109, true, "with", "with"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485865, 17273985947860325671, 18446744073709551615, 18446744073709551615, 132, 134, 132, 134, 26, 27, true, "to", "to"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485865, 17273985947860327104, 18446744073709551615, 18446744073709551615, 195, 197, 195, 197, 39, 40, true, "to", "to"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485865, 17273985947860314144, 18446744073709551615, 18446744073709551615, 376, 378, 376, 378, 74, 75, true, "to", "to"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485865, 17273985947860311804, 18446744073709551615, 18446744073709551615, 435, 437, 435, 437, 84, 85, true, "to", "to"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485865, 17273985947860221241, 18446744073709551615, 18446744073709551615, 574, 576, 574, 576, 110, 111, true, "to", "to"], ["expression", "common", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15441160910541486545, 217027369193113293, 18446744073709551615, 18446744073709551615, 276, 280, 276, 280, 50, 51, true, "ie", "i.e."], ["expression", "wtoken-concatenation", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 329104147725158906, 15860090129804914924, 18446744073709551615, 18446744073709551615, 0, 5, 0, 5, 0, 1, true, "3.4.1", "3.4.1"], ["sentence", "", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 11192516552703949087, 2492940853653053327, 18446744073709551615, 18446744073709551615, 0, 14, 0, 14, 0, 3, true, "3.4.1 Metrics.", "3.4.1 Metrics."], ["sentence", "", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 5597992645106161612, 18212052448285647499, 18446744073709551615, 18446744073709551615, 15, 146, 15, 146, 3, 26, true, "Before discussing the performance of the models, let us first define the precision and recall metrics used to evaluate the results.", "Before discussing the performance of the models, let us first define the precision and recall metrics used to evaluate the results."], ["sentence", "", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 17501061903424888556, 2513848896063536453, 18446744073709551615, 18446744073709551615, 147, 325, 147, 325, 26, 61, true, "The first observation is that the output of a machine learned model is exactly the same of what a human annotator would produce, i.e. it will assign a text cell a semantic label.", "The first observation is that the output of a machine learned model is exactly the same of what a human annotator would produce, i.e. it will assign a text cell a semantic label."], ["sentence", "", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 11474071216785359264, 2522141362145198185, 18446744073709551615, 18446744073709551615, 326, 420, 326, 420, 61, 79, true, "The correctness of this label is what we aim to measure with the recall and precision metrics.", "The correctness of this label is what we aim to measure with the recall and precision metrics."], ["term", "enum-term-mark-2", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 767578358531619449, 4617639867407406559, 18446744073709551615, 18446744073709551615, 88, 108, 88, 108, 16, 19, true, "precision and recall", "precision and recall"], ["term", "enum-term-mark-2", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 11037453576911667853, 13989172036358784933, 18446744073709551615, 18446744073709551615, 391, 411, 391, 411, 74, 77, true, "recall and precision", "recall and precision"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16904814960714419182, 11528817755660542225, 18446744073709551615, 18446744073709551615, 102, 116, 102, 116, 18, 20, true, "recall metrics", "recall metrics"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 2298135982047686680, 2518908270667636880, 18446744073709551615, 18446744073709551615, 151, 168, 151, 168, 27, 29, true, "first observation", "first observation"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 13314981802876368811, 8098139644132232215, 18446744073709551615, 18446744073709551615, 245, 260, 245, 260, 45, 47, true, "human annotator", "human annotator"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 3503955255877193443, 16168589224721760141, 18446744073709551615, 18446744073709551615, 298, 307, 298, 307, 55, 57, true, "text cell", "text cell"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 11173100292227021015, 1800543351623843241, 18446744073709551615, 18446744073709551615, 310, 324, 310, 324, 58, 60, true, "semantic label", "semantic label"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 13620323371457554126, 458926269169904744, 18446744073709551615, 18446744073709551615, 402, 419, 402, 419, 76, 78, true, "precision metrics", "precision metrics"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 8106471246351785636, 976374676032317796, 18446744073709551615, 18446744073709551615, 6, 13, 6, 13, 1, 2, true, "Metrics", "Metrics"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 5731695876385560379, 811697740203155724, 18446744073709551615, 18446744073709551615, 37, 48, 37, 48, 6, 7, true, "performance", "performance"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16381206567230470443, 12418767995683661148, 18446744073709551615, 18446744073709551615, 56, 62, 56, 62, 9, 10, true, "models", "models"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 6184954595655792282, 14903161230079425690, 18446744073709551615, 18446744073709551615, 88, 97, 88, 97, 16, 17, true, "precision", "precision"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 8106478445190161533, 1918860944869567675, 18446744073709551615, 18446744073709551615, 138, 145, 138, 145, 24, 25, true, "results", "results"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16381206566212127622, 12760563398286649502, 18446744073709551615, 18446744073709551615, 181, 187, 181, 187, 32, 33, true, "output", "output"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 8106464587473865376, 11906315049643178602, 18446744073709551615, 18446744073709551615, 193, 200, 193, 200, 35, 36, true, "machine", "machine"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 329104161610777240, 17091366051023556973, 18446744073709551615, 18446744073709551615, 209, 214, 209, 214, 37, 38, true, "model", "model"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 2993400436143573854, 11462282328537020132, 18446744073709551615, 18446744073709551615, 330, 341, 330, 341, 62, 63, true, "correctness", "correctness"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 329104161624445793, 17107701638781466857, 18446744073709551615, 18446744073709551615, 350, 355, 350, 355, 65, 66, true, "label", "label"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16381206521531485437, 11959686269616181622, 18446744073709551615, 18446744073709551615, 391, 397, 391, 397, 74, 75, true, "recall", "recall"], ["verb", "compound-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 5912573161125631318, 18389017722629856917, 18446744073709551615, 18446744073709551615, 117, 133, 117, 133, 20, 23, true, "used to evaluate", "used to evaluate"], ["verb", "compound-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15603910262549214758, 16583885660288754437, 18446744073709551615, 18446744073709551615, 215, 225, 215, 225, 38, 40, true, "is exactly", "is exactly"], ["verb", "compound-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 4896725137073128692, 11944200149874396120, 18446744073709551615, 18446744073709551615, 367, 381, 367, 381, 69, 72, true, "aim to measure", "aim to measure"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 5314857828561765555, 16399137808174602665, 18446744073709551615, 18446744073709551615, 22, 32, 22, 32, 4, 5, true, "discussing", "discussing"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 12178341415895617983, 13147520311847756339, 18446744073709551615, 18446744073709551615, 64, 67, 64, 67, 11, 12, true, "let", "let"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16381206567815771749, 12306163915021708527, 18446744073709551615, 18446744073709551615, 77, 83, 77, 83, 14, 15, true, "define", "define"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15441160910541486535, 217027369425741123, 18446744073709551615, 18446744073709551615, 169, 171, 169, 171, 29, 30, true, "is", "is"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 8106342444693204894, 8432795443147196977, 18446744073709551615, 18446744073709551615, 201, 208, 201, 208, 36, 37, true, "learned", "learned"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 12860895623677311427, 17253192036505745074, 18446744073709551615, 18446744073709551615, 261, 274, 261, 274, 47, 49, true, "would produce", "would produce"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15441160910541486545, 217027369193113293, 18446744073709551615, 18446744073709551615, 276, 280, 276, 280, 50, 51, true, "ie", "i.e."], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 3080311951459850033, 533771465723720685, 18446744073709551615, 18446744073709551615, 284, 295, 284, 295, 52, 54, true, "will assign", "will assign"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15441160910541486535, 217027369425752515, 18446744073709551615, 18446744073709551615, 356, 358, 356, 358, 66, 67, true, "is", "is"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 8106475344576462148, 12913315537205632230, 18446744073709551615, 18446744073709551615, 230, 237, 230, 237, 41, 43, true, "same of", "same of"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16381206535679983326, 13147770262986613637, 18446744073709551615, 18446744073709551615, 15, 21, 15, 21, 3, 4, true, "Before", "Before"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16381206565712212855, 13758401303862020825, 18446744073709551615, 18446744073709551615, 49, 55, 49, 55, 7, 9, true, "of the", "of the"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 14634130761162415388, 14555384046902233320, 18446744073709551615, 18446744073709551615, 172, 180, 172, 180, 30, 32, true, "that the", "that the"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 389609625620237736, 16645129036916663308, 18446744073709551615, 18446744073709551615, 188, 192, 188, 192, 33, 35, true, "of a", "of a"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 8106342927224204628, 1697921553677515464, 18446744073709551615, 18446744073709551615, 342, 349, 342, 349, 63, 65, true, "of this", "of this"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 14638857868319795209, 1013014462354929148, 18446744073709551615, 18446744073709551615, 382, 390, 382, 390, 72, 74, true, "with the", "with the"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15441160910541485865, 217027973200879925, 18446744073709551615, 18446744073709551615, 122, 124, 122, 124, 21, 22, true, "to", "to"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15441160910541485865, 217027973200576949, 18446744073709551615, 18446744073709551615, 371, 373, 371, 373, 70, 71, true, "to", "to"], ["expression", "common", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 15441160910541486545, 567598813997742384, 18446744073709551615, 18446744073709551615, 36, 40, 36, 40, 4, 5, true, "ie", "i.e."], ["expression", "apostrophe", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 389609625696231302, 15358368697525810593, 18446744073709551615, 18446744073709551615, 44, 49, 44, 49, 6, 7, true, "dont", "don't"], ["expression", "word-concatenation", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 1304401478471854224, 649631639388333518, 18446744073709551615, 18446744073709551615, 0, 11, 0, 11, 0, 1, true, "multi-class", "multi-class"], ["sentence", "", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 15490331838172880166, 6727043395560011683, 18446744073709551615, 18446744073709551615, 0, 199, 0, 199, 0, 35, true, "multi-class classification problem, i.e. we don't have only two labels, but many possible semantic labels, hence the performance result will be the average of the recall and precision for each label.", "multi-class classification problem, i.e. we don't have only two labels, but many possible semantic labels, hence the performance result will be the average of the recall and precision for each label."], ["term", "enum-term-mark-2", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 11037453576911667853, 6817453896806379289, 18446744073709551615, 18446744073709551615, 163, 183, 163, 183, 28, 31, true, "recall and precision", "recall and precision"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 8860580720669525337, 5603592342532464212, 18446744073709551615, 18446744073709551615, 0, 34, 0, 34, 0, 3, true, "multi-class classification problem", "multi-class classification problem"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 11769591311949881693, 6939363705971315498, 18446744073709551615, 18446744073709551615, 76, 105, 76, 105, 13, 17, true, "many possible semantic labels", "many possible semantic labels"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 2685619966605559755, 15479784634162924092, 18446744073709551615, 18446744073709551615, 117, 135, 117, 135, 20, 22, true, "performance result", "performance result"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 16381206590740615814, 15595426310372972978, 18446744073709551615, 18446744073709551615, 64, 70, 64, 70, 10, 11, true, "labels", "labels"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 8106397775114664992, 12360006154142338319, 18446744073709551615, 18446744073709551615, 148, 155, 148, 155, 25, 26, true, "average", "average"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 16381206521531485437, 2808816684408088237, 18446744073709551615, 18446744073709551615, 163, 169, 163, 169, 28, 29, true, "recall", "recall"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 6184954595655792282, 7514668760154149307, 18446744073709551615, 18446744073709551615, 174, 183, 174, 183, 30, 31, true, "precision", "precision"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 329104161624445793, 17860511514209003623, 18446744073709551615, 18446744073709551615, 193, 198, 193, 198, 33, 34, true, "label", "label"], ["verb", "compound-verb", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 6184629322009942858, 1696555540029622570, 18446744073709551615, 18446744073709551615, 50, 59, 50, 59, 7, 9, true, "have only", "have only"], ["verb", "single-verb", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 15441160910541486545, 567598813997742384, 18446744073709551615, 18446744073709551615, 36, 40, 36, 40, 4, 5, true, "ie", "i.e."], ["verb", "single-verb", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 8106477985499172124, 7535163702770855737, 18446744073709551615, 18446744073709551615, 136, 143, 136, 143, 22, 24, true, "will be", "will be"], ["conn", "single-conn", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 16381206565712212855, 15841842752973774978, 18446744073709551615, 18446744073709551615, 156, 162, 156, 162, 26, 28, true, "of the", "of the"], ["conn", "single-conn", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 14637917333167503367, 1995163399168108017, 18446744073709551615, 18446744073709551615, 184, 192, 184, 192, 31, 33, true, "for each", "for each"], ["parenthesis", "round brackets", 2249972239307071508, "TEXT", "#/texts/43", 1.0, 329104053186083887, 849114158930843840, 18446744073709551615, 18446744073709551615, 11, 18, 11, 16, 2, 6, true, "(= \u211b)", "(= \u211b)"], ["parenthesis", "round brackets", 2249972239307071508, "TEXT", "#/texts/43", 1.0, 16380808301981907129, 17658555346938479763, 18446744073709551615, 18446744073709551615, 33, 41, 31, 36, 8, 12, true, "(= \ud835\udcab)", "(= \ud835\udcab)"], ["numval", "ival", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 17767354399704235161, 6099910645632343085, 18446744073709551615, 18446744073709551615, 70, 71, 65, 66, 15, 16, true, "1", "1"], ["parenthesis", "reference", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 12178341415896395122, 16140836621700527287, 18446744073709551615, 18446744073709551615, 69, 72, 64, 67, 14, 17, true, "(1)", "(1)"], ["expression", "wtoken-concatenation", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 329104159213418835, 12512684746394364905, 18446744073709551615, 18446744073709551615, 6, 13, 4, 11, 2, 3, true, "t_{p}", "t$_{p}$"], ["expression", "wtoken-concatenation", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 329104159213418835, 12512684746394365415, 18446744073709551615, 18446744073709551615, 14, 21, 12, 19, 3, 4, true, "t_{p}", "t$_{p}$"], ["expression", "wtoken-concatenation", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 329104161698390847, 17326553453012765569, 18446744073709551615, 18446744073709551615, 24, 31, 22, 29, 5, 6, true, "f_{p}", "f$_{p}$"], ["expression", "wtoken-concatenation", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 329104159213418835, 12512684746394305740, 18446744073709551615, 18446744073709551615, 41, 48, 36, 43, 9, 10, true, "t_{p}", "t$_{p}$"], ["expression", "wtoken-concatenation", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 329104159213418835, 12512684746394367710, 18446744073709551615, 18446744073709551615, 49, 56, 44, 51, 10, 11, true, "t_{p}", "t$_{p}$"], ["expression", "wtoken-concatenation", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 329104161698393277, 17326553835857512588, 18446744073709551615, 18446744073709551615, 59, 66, 54, 61, 12, 13, true, "f_{n}", "f$_{n}$"], ["expression", "wtoken-concatenation", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 329104159213418835, 14787525895513303260, 18446744073709551615, 18446744073709551615, 6, 13, 6, 13, 1, 2, true, "t_{p}", "t$_{p}$"], ["expression", "wtoken-concatenation", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 329104161698390847, 13988743388207353010, 18446744073709551615, 18446744073709551615, 15, 22, 15, 22, 3, 4, true, "f_{p}", "f$_{p}$"], ["expression", "wtoken-concatenation", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 329104161698393277, 13988741372871105719, 18446744073709551615, 18446744073709551615, 27, 34, 27, 34, 5, 6, true, "f_{n}", "f$_{n}$"], ["sentence", "", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 7335149866647742481, 5389973564291704178, 18446744073709551615, 18446744073709551615, 6, 124, 6, 124, 1, 19, true, "t$_{p}$, f$_{p}$ and f$_{n}$ represent respectively true positive, false positive and false negative predicted labels.", "t$_{p}$, f$_{p}$ and f$_{n}$ represent respectively true positive, false positive and false negative predicted labels."], ["term", "enum-term-mark-2", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 9949181435334963834, 13863161906120910343, 18446744073709551615, 18446744073709551615, 6, 34, 6, 34, 1, 6, true, "t_{p}, f_{p} and f_{n}", "t$_{p}$, f$_{p}$ and f$_{n}$"], ["term", "single-term", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 329104159213418835, 14787525895513303260, 18446744073709551615, 18446744073709551615, 6, 13, 6, 13, 1, 2, true, "t_{p}", "t$_{p}$"], ["term", "single-term", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 329104161698390847, 13988743388207353010, 18446744073709551615, 18446744073709551615, 15, 22, 15, 22, 3, 4, true, "f_{p}", "f$_{p}$"], ["term", "single-term", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 329104161698393277, 13988741372871105719, 18446744073709551615, 18446744073709551615, 27, 34, 27, 34, 5, 6, true, "f_{n}", "f$_{n}$"], ["term", "single-term", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 16381206590740615814, 634376818699750368, 18446744073709551615, 18446744073709551615, 117, 123, 117, 123, 17, 18, true, "labels", "labels"], ["verb", "compound-verb", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 12161488772614107065, 10356018545425099123, 18446744073709551615, 18446744073709551615, 35, 57, 35, 57, 6, 8, true, "represent respectively", "represent respectively"], ["verb", "single-verb", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 6184954633443293966, 1327321667481246505, 18446744073709551615, 18446744073709551615, 107, 116, 107, 116, 16, 17, true, "predicted", "predicted"], ["numval", "ival", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17767354399704235157, 12704387693977461708, 18446744073709551615, 18446744073709551615, 501, 502, 501, 502, 92, 93, true, "5", "5"], ["numval", "ival", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17767354399704235158, 12704387694225961167, 18446744073709551615, 18446744073709551615, 504, 505, 504, 505, 94, 95, true, "6", "6"], ["numval", "ival", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15441160910541481982, 10124025090905978449, 18446744073709551615, 18446744073709551615, 507, 509, 507, 509, 96, 97, true, "10", "10"], ["numval", "ival", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17767354399704235152, 12704387694582788969, 18446744073709551615, 18446744073709551615, 535, 536, 535, 536, 103, 104, true, "8", "8"], ["numval", "ival", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17767354399704235153, 12704387694366902016, 18446744073709551615, 18446744073709551615, 538, 539, 538, 539, 105, 106, true, "9", "9"], ["parenthesis", "reference", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895577775, 10221660528452945315, 18446744073709551615, 18446744073709551615, 562, 565, 562, 565, 111, 112, true, "[7]", "[7]"], ["parenthesis", "reference", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 389609625697296215, 2696768862675468030, 18446744073709551615, 18446744073709551615, 609, 613, 609, 613, 121, 122, true, "[10]", "[10]"], ["parenthesis", "reference", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895577640, 10221660531532098661, 18446744073709551615, 18446744073709551615, 629, 632, 629, 632, 125, 126, true, "[9]", "[9]"], ["parenthesis", "round brackets", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 3545569348905905585, 4469200219510189414, 18446744073709551615, 18446744073709551615, 454, 499, 454, 499, 80, 91, true, "(and their derivatives Fast-and Faster-R-CNN)", "(and their derivatives Fast-and Faster-R-CNN)"], ["parenthesis", "square brackets", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15965486372532711702, 9304269592494963167, 18446744073709551615, 18446744073709551615, 500, 510, 500, 510, 91, 98, true, "[5, 6, 10]", "[5, 6, 10]"], ["parenthesis", "square brackets", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206575391579989, 5003543538146822835, 18446744073709551615, 18446744073709551615, 534, 540, 534, 540, 102, 107, true, "[8, 9]", "[8, 9]"], ["expression", "common", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895450733, 10221669895136519552, 18446744073709551615, 18446744073709551615, 199, 203, 199, 203, 35, 36, true, "etc", "etc."], ["expression", "word-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206484692763269, 3909475141979723724, 18446744073709551615, 18446744073709551615, 447, 453, 447, 453, 79, 80, true, "R-CNNs", "R-CNNs"], ["expression", "word-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14650433109252770301, 8107892129208874535, 18446744073709551615, 18446744073709551615, 477, 485, 477, 485, 84, 85, true, "Fast-and", "Fast-and"], ["expression", "wtoken-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 329104147725158907, 15783524442332654082, 18446744073709551615, 18446744073709551615, 0, 5, 0, 5, 0, 1, true, "3.4.2", "3.4.2"], ["expression", "wtoken-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895577775, 10221660528452945315, 18446744073709551615, 18446744073709551615, 562, 565, 562, 565, 111, 112, true, "[7]", "[7]"], ["expression", "wtoken-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 5328308949596420596, 16816517518101354522, 18446744073709551615, 18446744073709551615, 596, 608, 596, 608, 120, 121, true, "Faster-R-CNN", "Faster-R-CNN"], ["expression", "wtoken-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 389609625697296215, 2696768862675468030, 18446744073709551615, 18446744073709551615, 609, 613, 609, 613, 121, 122, true, "[10]", "[10]"], ["expression", "wtoken-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206533950151485, 7429344052728955394, 18446744073709551615, 18446744073709551615, 622, 628, 622, 628, 124, 125, true, "YOLOv2", "YOLOv2"], ["expression", "wtoken-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895577640, 10221660531532098661, 18446744073709551615, 18446744073709551615, 629, 632, 629, 632, 125, 126, true, "[9]", "[9]"], ["sentence", "", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 13862970203854964234, 1000664251196474029, 18446744073709551615, 18446744073709551615, 0, 21, 0, 21, 0, 4, true, "3.4.2 Default Models.", "3.4.2 Default Models."], ["sentence", "", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15397362661759974852, 12298157670836849129, 18446744073709551615, 18446744073709551615, 22, 109, 22, 109, 4, 20, true, "The aim of the default models is to identify specific, ubiquitous objects in documents.", "The aim of the default models is to identify specific, ubiquitous objects in documents."], ["sentence", "", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16425179928293029532, 1769621671439790257, 18446744073709551615, 18446744073709551615, 110, 356, 110, 356, 20, 64, true, "Examples of such objects are tables, figures with their captions, mathematical formulas, etc. Due to the high variability in both the document layout as well as in the representation of these objects, we need very robust object detection methods.", "Examples of such objects are tables, figures with their captions, mathematical formulas, etc. Due to the high variability in both the document layout as well as in the representation of these objects, we need very robust object detection methods."], ["sentence", "", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 9742780938207760057, 12486707672052556921, 18446744073709551615, 18446744073709551615, 357, 566, 357, 566, 64, 113, true, "Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7].", "Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7]."], ["sentence", "", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16770266057504597285, 13963861550769241956, 18446744073709551615, 18446744073709551615, 567, 715, 567, 715, 113, 138, true, "On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions.", "On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions."], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 18025670271476196185, 4076525407823276511, 18446744073709551615, 18446744073709551615, 6, 20, 6, 20, 1, 3, true, "Default Models", "Default Models"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 1915006193249717419, 11522134204391288466, 18446744073709551615, 18446744073709551615, 37, 51, 37, 51, 8, 10, true, "default models", "default models"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 5609921068371406163, 9046228298730371624, 18446744073709551615, 18446744073709551615, 77, 95, 77, 95, 15, 17, true, "ubiquitous objects", "ubiquitous objects"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14475299778579205125, 8777768897758483305, 18446744073709551615, 18446744073709551615, 122, 134, 122, 134, 22, 24, true, "such objects", "such objects"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 1883638592780196390, 3583719480229507642, 18446744073709551615, 18446744073709551615, 176, 197, 176, 197, 32, 34, true, "mathematical formulas", "mathematical formulas"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 13067270324954730530, 3518300029937178041, 18446744073709551615, 18446744073709551615, 215, 231, 215, 231, 39, 41, true, "high variability", "high variability"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16647295474405589964, 3738165022613409525, 18446744073709551615, 18446744073709551615, 244, 259, 244, 259, 44, 46, true, "document layout", "document layout"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14113886516804568139, 16613561485635018399, 18446744073709551615, 18446744073709551615, 324, 355, 324, 355, 59, 63, true, "robust object detection methods", "robust object detection methods"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 1993812675333266626, 1904048806204470733, 18446744073709551615, 18446744073709551615, 377, 391, 377, 391, 68, 70, true, "robust methods", "robust methods"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17376821969571444655, 10906121291496328649, 18446744073709551615, 18446744073709551615, 418, 438, 418, 438, 74, 77, true, "deep neural networks", "deep neural networks"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 10062108462976606414, 4739790499633724912, 18446744073709551615, 18446744073709551615, 465, 492, 465, 492, 83, 86, true, "derivatives Fast-and Faster", "derivatives Fast-and Faster"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17384399425913386842, 17600753885763817486, 18446744073709551615, 18446744073709551615, 516, 535, 516, 535, 100, 103, true, "YOLO architecture [", "YOLO architecture ["], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 13690465142419804728, 5041912981728959292, 18446744073709551615, 18446744073709551615, 549, 561, 549, 561, 109, 111, true, "SSD networks", "SSD networks"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15318930646636167014, 16480554943289876127, 18446744073709551615, 18446744073709551615, 655, 679, 655, 679, 129, 131, true, "individual microservices", "individual microservices"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895571887, 10221689572436439446, 18446744073709551615, 18446744073709551615, 26, 29, 26, 29, 5, 6, true, "aim", "aim"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 6167933651658664291, 6518863807379418823, 18446744073709551615, 18446744073709551615, 99, 108, 99, 108, 18, 19, true, "documents", "documents"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14650277098690689540, 4238826485163265018, 18446744073709551615, 18446744073709551615, 110, 118, 110, 118, 20, 21, true, "Examples", "Examples"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206513098478539, 7158084506369383330, 18446744073709551615, 18446744073709551615, 139, 145, 139, 145, 25, 26, true, "tables", "tables"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 8106397480533647371, 6864720578269745032, 18446744073709551615, 18446744073709551615, 147, 154, 147, 154, 27, 28, true, "figures", "figures"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14652289689770638970, 18248014564023136762, 18446744073709551615, 18446744073709551615, 166, 174, 166, 174, 30, 31, true, "captions", "captions"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415896115241, 10214876492138438607, 18446744073709551615, 18446744073709551615, 204, 207, 204, 207, 36, 37, true, "Due", "Due"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 11387678566946341343, 17478644430819869889, 18446744073709551615, 18446744073709551615, 278, 292, 278, 292, 51, 52, true, "representation", "representation"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 8106342034010873556, 852442964076649594, 18446744073709551615, 18446744073709551615, 302, 309, 302, 309, 54, 55, true, "objects", "objects"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 8106342034010873556, 852442964076641960, 18446744073709551615, 18446744073709551615, 406, 413, 406, 413, 72, 73, true, "objects", "objects"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206484692763269, 3909475141979723724, 18446744073709551615, 18446744073709551615, 447, 453, 447, 453, 79, 80, true, "R-CNNs", "R-CNNs"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415896234584, 10214872764581833763, 18446744073709551615, 18446744073709551615, 495, 498, 495, 498, 89, 90, true, "CNN", "CNN"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14814125365076808131, 16189701360705085149, 18446744073709551615, 18446744073709551615, 574, 582, 574, 582, 115, 116, true, "platform", "platform"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 5328308949596420596, 16816517518101354522, 18446744073709551615, 18446744073709551615, 596, 608, 596, 608, 120, 121, true, "Faster-R-CNN", "Faster-R-CNN"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206533950151485, 7429344052728955394, 18446744073709551615, 18446744073709551615, 622, 628, 622, 628, 124, 125, true, "YOLOv2", "YOLOv2"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14814151113413570861, 12769628045261514081, 18446744073709551615, 18446744073709551615, 633, 641, 633, 641, 126, 127, true, "networks", "networks"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14634153919632515335, 18400654478213785826, 18446744073709551615, 18446744073709551615, 690, 698, 690, 698, 134, 135, true, "training", "training"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15175963360124346573, 3242350763380404235, 18446744073709551615, 18446744073709551615, 703, 714, 703, 714, 136, 137, true, "predictions", "predictions"], ["verb", "compound-verb", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17731448345306221383, 8279129691342552404, 18446744073709551615, 18446744073709551615, 52, 66, 52, 66, 10, 13, true, "is to identify", "is to identify"], ["verb", "compound-verb", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 6169327273053177421, 16918164282889950953, 18446744073709551615, 18446744073709551615, 314, 323, 314, 323, 57, 59, true, "need very", "need very"], ["verb", "single-verb", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895564896, 10221689629778288871, 18446744073709551615, 18446744073709551615, 135, 138, 135, 138, 24, 25, true, "are", "are"], ["verb", "single-verb", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 6180169258394806855, 12614592480509617382, 18446744073709551615, 18446744073709551615, 396, 405, 396, 405, 71, 72, true, "detecting", "detecting"], ["verb", "single-verb", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895564896, 10221689629778338900, 18446744073709551615, 18446744073709551615, 414, 417, 414, 417, 73, 74, true, "are", "are"], ["verb", "single-verb", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 389609625695387621, 2696595208771284580, 18446744073709551615, 18446744073709551615, 587, 591, 587, 591, 118, 119, true, "have", "have"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 8106478685702231057, 12497800819898333301, 18446744073709551615, 18446744073709551615, 439, 446, 439, 446, 77, 79, true, "such as", "such as"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15601168207941442599, 1060530269192402463, 18446744073709551615, 18446744073709551615, 642, 654, 642, 654, 127, 129, true, "available as", "available as"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206565712212855, 12447021742026355211, 18446744073709551615, 18446744073709551615, 30, 36, 30, 36, 6, 8, true, "of the", "of the"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15441160910541486538, 10124025012681555291, 18446744073709551615, 18446744073709551615, 96, 98, 96, 98, 17, 18, true, "in", "in"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15441160910541485670, 10124025032663008686, 18446744073709551615, 18446744073709551615, 119, 121, 119, 121, 21, 22, true, "of", "of"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 389609625618037948, 2698070654931797643, 18446744073709551615, 18446744073709551615, 155, 159, 155, 159, 28, 29, true, "with", "with"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15357604611893232445, 12706581294679878362, 18446744073709551615, 18446744073709551615, 232, 243, 232, 243, 41, 44, true, "in both the", "in both the"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15441160910541487053, 10124024937119636968, 18446744073709551615, 18446744073709551615, 268, 270, 268, 270, 48, 49, true, "as", "as"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206560518651853, 6740683993471155777, 18446744073709551615, 18446744073709551615, 271, 277, 271, 277, 49, 51, true, "in the", "in the"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14814148868025447689, 13785663703275405723, 18446744073709551615, 18446744073709551615, 293, 301, 293, 301, 52, 54, true, "of these", "of these"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895625940, 10221692560851504811, 18446744073709551615, 18446744073709551615, 392, 395, 392, 395, 70, 71, true, "for", "for"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15441160910541487702, 10124024927677174728, 18446744073709551615, 18446744073709551615, 567, 569, 567, 569, 113, 114, true, "On", "On"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14652253381802754387, 7964973155712271593, 18446744073709551615, 18446744073709551615, 681, 689, 681, 689, 132, 134, true, "both for", "both for"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15441160910541485865, 10124025024834353152, 18446744073709551615, 18446744073709551615, 55, 57, 55, 57, 11, 12, true, "to", "to"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206519425733256, 6892863425976124668, 18446744073709551615, 18446744073709551615, 208, 214, 208, 214, 37, 39, true, "to the", "to the"], ["sentence", "", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 13907813772802190178, 7003946510300018767, 18446744073709551615, 18446744073709551615, 0, 172, 0, 172, 0, 33, true, "In this paper, we will focus only on the detection of table objects, but the same principles described in the following analysis are also applied for other type of objects.", "In this paper, we will focus only on the detection of table objects, but the same principles described in the following analysis are also applied for other type of objects."], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 9383287618729376838, 4119329632487579731, 18446744073709551615, 18446744073709551615, 54, 67, 54, 67, 12, 14, true, "table objects", "table objects"], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 19890884327632451, 8285658877583341965, 18446744073709551615, 18446744073709551615, 77, 92, 77, 92, 17, 19, true, "same principles", "same principles"], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 16919282435020496358, 3695407906295049397, 18446744073709551615, 18446744073709551615, 110, 128, 110, 128, 22, 24, true, "following analysis", "following analysis"], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 14046205808336657429, 12297163905961192123, 18446744073709551615, 18446744073709551615, 150, 160, 150, 160, 28, 30, true, "other type", "other type"], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 329104161668023890, 15034731120442627912, 18446744073709551615, 18446744073709551615, 8, 13, 8, 13, 2, 3, true, "paper", "paper"], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 6180169258394806776, 9258215019222553593, 18446744073709551615, 18446744073709551615, 41, 50, 41, 50, 10, 11, true, "detection", "detection"], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 8106342034010873556, 5785162319445905699, 18446744073709551615, 18446744073709551615, 164, 171, 164, 171, 31, 32, true, "objects", "objects"], ["verb", "compound-verb", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 332064237701512405, 14438730281518682242, 18446744073709551615, 18446744073709551615, 18, 33, 18, 33, 5, 8, true, "will focus only", "will focus only"], ["verb", "compound-verb", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 15596128382219940825, 312116650770694261, 18446744073709551615, 18446744073709551615, 129, 145, 129, 145, 24, 27, true, "are also applied", "are also applied"], ["verb", "single-verb", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 6180169740129371114, 10720671592342542201, 18446744073709551615, 18446744073709551615, 93, 102, 93, 102, 19, 20, true, "described", "described"], ["conn", "single-conn", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 8106396862006371970, 7588613479518108209, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 2, true, "In this", "In this"], ["conn", "single-conn", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 16381206566339127348, 9743263998949637888, 18446744073709551615, 18446744073709551615, 34, 40, 34, 40, 8, 10, true, "on the", "on the"], ["conn", "single-conn", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 15441160910541485670, 16075120093636142673, 18446744073709551615, 18446744073709551615, 51, 53, 51, 53, 11, 12, true, "of", "of"], ["conn", "single-conn", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 16381206560518651853, 10949484449484932256, 18446744073709551615, 18446744073709551615, 103, 109, 103, 109, 20, 22, true, "in the", "in the"], ["conn", "single-conn", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 12178341415895625940, 13938643598911286725, 18446744073709551615, 18446744073709551615, 146, 149, 146, 149, 27, 28, true, "for", "for"], ["conn", "single-conn", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 15441160910541485670, 16075120093636131136, 18446744073709551615, 18446744073709551615, 161, 163, 161, 163, 30, 31, true, "of", "of"], ["numval", "ival", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 329104147714235827, 16237474479634274377, 18446744073709551615, 18446744073709551615, 97, 102, 97, 102, 16, 17, true, "30000", "30000"], ["expression", "wtoken-concatenation", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 6165495739602921837, 9340964570503238985, 18446744073709551615, 18446744073709551615, 66, 77, 66, 77, 11, 12, true, "data^{11}", "data$^{11}$"], ["sentence", "", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 10610641527733228652, 13865291180638734476, 18446744073709551615, 18446744073709551615, 0, 78, 0, 78, 0, 13, true, "The networks available on our platform have been trained on arXiv data$^{11}$.", "The networks available on our platform have been trained on arXiv data$^{11}$."], ["term", "single-term", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 16274809754904284981, 14150942809734201297, 18446744073709551615, 18446744073709551615, 60, 77, 60, 77, 10, 12, true, "arXiv data^{11}", "arXiv data$^{11}$"], ["term", "single-term", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 14814151113413570861, 13612463695668145248, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 1, 2, true, "networks", "networks"], ["term", "single-term", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 14814125365076808131, 15422858865778697355, 18446744073709551615, 18446744073709551615, 30, 38, 30, 38, 5, 6, true, "platform", "platform"], ["verb", "compound-verb", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 15334498195961772498, 2558436440940418273, 18446744073709551615, 18446744073709551615, 39, 56, 39, 56, 6, 9, true, "have been trained", "have been trained"], ["conn", "single-conn", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 15601168207941439665, 9881833882850079373, 18446744073709551615, 18446744073709551615, 13, 25, 13, 25, 2, 4, true, "available on", "available on"], ["conn", "single-conn", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 15441160910541485678, 11845235562561279624, 18446744073709551615, 18446744073709551615, 57, 59, 57, 59, 9, 10, true, "on", "on"], ["numval", "ival", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 329104147714235827, 3899762460303934761, 18446744073709551615, 18446744073709551615, 56, 61, 56, 61, 12, 13, true, "30000", "30000"], ["numval", "ival", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 329104147765109382, 3508537341518270119, 18446744073709551615, 18446744073709551615, 82, 87, 82, 87, 18, 19, true, "25000", "25000"], ["numval", "ival", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 389609625655246800, 3852020413771069256, 18446744073709551615, 18446744073709551615, 130, 134, 130, 134, 27, 28, true, "5000", "5000"], ["expression", "word-concatenation", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 10569149591004722800, 16900271628482084512, 18446744073709551615, 18446744073709551615, 225, 242, 225, 242, 48, 49, true, "data-augmentation", "data-augmentation"], ["expression", "word-concatenation", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 3458523808570659318, 12440619335925777434, 18446744073709551615, 18446744073709551615, 285, 301, 285, 301, 56, 57, true, "object-detection", "object-detection"], ["expression", "word-concatenation", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 11170541192325073142, 933495499316463478, 18446744073709551615, 18446744073709551615, 305, 325, 305, 325, 58, 59, true, "image-classification", "image-classification"], ["sentence", "", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 11408825138316374504, 8478950504398234103, 18446744073709551615, 18446744073709551615, 45, 156, 45, 156, 10, 32, true, "From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation.", "From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation."], ["sentence", "", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 12423813338298982980, 11414690006404237207, 18446744073709551615, 18446744073709551615, 157, 337, 157, 337, 32, 61, true, "Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms.", "Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms."], ["term", "enum-term-mark-2", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 6721676886213735149, 343887467640033113, 18446744073709551615, 18446744073709551615, 285, 325, 285, 325, 56, 59, true, "object-detection or image-classification", "object-detection or image-classification"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 7520153641230220879, 15768111495623467099, 18446744073709551615, 18446744073709551615, 97, 110, 97, 110, 21, 23, true, "training data", "training data"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 15907509424283598820, 1309701731943016996, 18446744073709551615, 18446744073709551615, 168, 178, 168, 178, 35, 37, true, "large size", "large size"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 1001363875463467083, 8251004718776725960, 18446744073709551615, 18446744073709551615, 225, 252, 225, 252, 48, 50, true, "data-augmentation technique", "data-augmentation technique"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 16323431510197362598, 15799839381285791422, 18446744073709551615, 18446744073709551615, 305, 336, 305, 336, 58, 60, true, "image-classification algorithms", "image-classification algorithms"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 329104161667992688, 3729850945376531663, 18446744073709551615, 18446744073709551615, 62, 67, 62, 67, 13, 14, true, "pages", "pages"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 329104161667992688, 3729850945376431434, 18446744073709551615, 18446744073709551615, 88, 93, 88, 93, 19, 20, true, "pages", "pages"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 329104161667992688, 3729850945376541076, 18446744073709551615, 18446744073709551615, 135, 140, 135, 140, 28, 29, true, "pages", "pages"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 5456363662501675139, 15220816455592297792, 18446744073709551615, 18446744073709551615, 145, 155, 145, 155, 30, 31, true, "evaluation", "evaluation"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 8106396676716241904, 14648059457390189617, 18446744073709551615, 18446744073709551615, 186, 193, 186, 193, 39, 40, true, "dataset", "dataset"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 3458523808570659318, 12440619335925777434, 18446744073709551615, 18446744073709551615, 285, 301, 285, 301, 56, 57, true, "object-detection", "object-detection"], ["verb", "compound-verb", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 6184629322227841334, 13137096673988516295, 18446744073709551615, 18446744073709551615, 72, 81, 72, 81, 16, 18, true, "have used", "have used"], ["verb", "compound-verb", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 6850430008328108593, 5399883786399020567, 18446744073709551615, 18446744073709551615, 198, 220, 198, 220, 42, 47, true, "did not need to employ", "did not need to employ"], ["verb", "compound-verb", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 15603860103506328238, 5837976326483269424, 18446744073709551615, 18446744073709551615, 260, 270, 260, 270, 52, 54, true, "is usually", "is usually"], ["verb", "single-verb", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 389609625632409820, 3852621341384686815, 18446744073709551615, 18446744073709551615, 115, 119, 115, 119, 24, 25, true, "kept", "kept"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 6514618750987489747, 8397355782128073403, 18446744073709551615, 18446744073709551615, 271, 284, 271, 284, 54, 56, true, "necessary for", "necessary for"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 15033932706303876615, 13736182680378928256, 18446744073709551615, 18446744073709551615, 45, 55, 45, 55, 10, 12, true, "From these", "From these"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 15441160910541487053, 288595830704355564, 18446744073709551615, 18446744073709551615, 94, 96, 94, 96, 20, 21, true, "as", "as"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 12178341415895625940, 2076970598603837899, 18446744073709551615, 18446744073709551615, 141, 144, 141, 144, 29, 30, true, "for", "for"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 16381206565712212855, 7145463495673152577, 18446744073709551615, 18446744073709551615, 179, 185, 179, 185, 37, 39, true, "of the", "of the"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 16381206519425733256, 5276673569012284605, 18446744073709551615, 18446744073709551615, 161, 167, 161, 167, 33, 35, true, "to the", "to the"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 15441160910541485865, 288606447891747161, 18446744073709551615, 18446744073709551615, 211, 213, 211, 213, 45, 46, true, "to", "to"], ["numval", "ival", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 17767354399704235157, 12945931063214660534, 18446744073709551615, 18446744073709551615, 370, 371, 370, 371, 70, 71, true, "5", "5"], ["numval", "ival", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 17767354399704235157, 12945931063214542735, 18446744073709551615, 18446744073709551615, 579, 580, 579, 580, 110, 111, true, "5", "5"], ["expression", "wtoken-concatenation", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206533950151485, 9489822756853105574, 18446744073709551615, 18446744073709551615, 451, 457, 451, 457, 86, 87, true, "YOLOv2", "YOLOv2"], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 4061378656158338373, 14465990865377588223, 18446744073709551615, 18446744073709551615, 0, 151, 0, 151, 0, 30, true, "We do not locate the table directly on the image of the original PDF page but rather on an image representation of the parsed PDF page with cell boxes.", "We do not locate the table directly on the image of the original PDF page but rather on an image representation of the parsed PDF page with cell boxes."], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 7230305818063790009, 5970193631513633625, 18446744073709551615, 18446744073709551615, 152, 319, 152, 319, 30, 59, true, "The reasoning behind this is to reduce the variability between all input PDF pages as much as possible and thus increase the effectiveness of the deep neural networks.", "The reasoning behind this is to reduce the variability between all input PDF pages as much as possible and thus increase the effectiveness of the deep neural networks."], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 17664701293225181585, 4146648270552361528, 18446744073709551615, 18446744073709551615, 320, 372, 320, 372, 59, 72, true, "An example of such an image can be seen in Figure 5.", "An example of such an image can be seen in Figure 5."], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 5465327582624645132, 9551200007057793983, 18446744073709551615, 18446744073709551615, 373, 514, 373, 514, 72, 99, true, "The red bounding boxes around the tables are a result of the prediction using YOLOv2 and are absent in the image on which the model predicts.", "The red bounding boxes around the tables are a result of the prediction using YOLOv2 and are absent in the image on which the model predicts."], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 7667365579028617388, 5332282821916987263, 18446744073709551615, 18446744073709551615, 515, 669, 515, 669, 99, 127, true, "Note that the visualisation of the text cells visible in Figure 5 does not include any text of the original document, but only its geometrical definition.", "Note that the visualisation of the text cells visible in Figure 5 does not include any text of the original document, but only its geometrical definition."], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 10642870011809358022, 16157254256485996574, 18446744073709551615, 18446744073709551615, 670, 828, 670, 828, 127, 152, true, "This is important when one compares for example Asian documents with Japanese, Chinese or Korean characters versus European languages with the roman alphabet.", "This is important when one compares for example Asian documents with Japanese, Chinese or Korean characters versus European languages with the roman alphabet."], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 986317930482970848, 7349070355092674215, 18446744073709551615, 18446744073709551615, 829, 955, 829, 955, 152, 179, true, "We do not want the deep neural network to focus on the specific characters, but rather on the layout of the cells in the page.", "We do not want the deep neural network to focus on the specific characters, but rather on the layout of the cells in the page."], ["term", "enum-term-mark-1", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 10919039264205031078, 14544387719036714097, 18446744073709551615, 18446744073709551615, 739, 777, 739, 777, 138, 144, true, "Japanese, Chinese or Korean characters", "Japanese, Chinese or Korean characters"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 11734732391183296006, 2587291782923000164, 18446744073709551615, 18446744073709551615, 56, 73, 56, 73, 12, 15, true, "original PDF page", "original PDF page"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15093911763430035033, 3694205018907190846, 18446744073709551615, 18446744073709551615, 91, 111, 91, 111, 19, 21, true, "image representation", "image representation"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14650937348812924036, 5539748062603626443, 18446744073709551615, 18446744073709551615, 126, 134, 126, 134, 24, 26, true, "PDF page", "PDF page"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 2707481668736505496, 7198690923753170302, 18446744073709551615, 18446744073709551615, 140, 150, 140, 150, 27, 29, true, "cell boxes", "cell boxes"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 13885091818292120189, 16117092993028309, 18446744073709551615, 18446744073709551615, 219, 234, 219, 234, 41, 44, true, "input PDF pages", "input PDF pages"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 17376821969571444655, 15750834930012037640, 18446744073709551615, 18446744073709551615, 298, 318, 298, 318, 55, 58, true, "deep neural networks", "deep neural networks"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 17072853444362853273, 16262656258750397787, 18446744073709551615, 18446744073709551615, 377, 395, 377, 395, 73, 76, true, "red bounding boxes", "red bounding boxes"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 5748925367544727060, 8711755993137804135, 18446744073709551615, 18446744073709551615, 550, 560, 550, 560, 105, 107, true, "text cells", "text cells"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 11738704476441755021, 599225668707656883, 18446744073709551615, 18446744073709551615, 614, 631, 614, 631, 118, 120, true, "original document", "original document"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 10286550524249249390, 16039764385894237573, 18446744073709551615, 18446744073709551615, 646, 668, 646, 668, 124, 126, true, "geometrical definition", "geometrical definition"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 9867266577668942964, 9457468283515724221, 18446744073709551615, 18446744073709551615, 710, 733, 710, 733, 134, 137, true, "example Asian documents", "example Asian documents"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 10390141656432228254, 10147168339704319013, 18446744073709551615, 18446744073709551615, 760, 777, 760, 777, 142, 144, true, "Korean characters", "Korean characters"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 12210549037008273093, 7960050686572576759, 18446744073709551615, 18446744073709551615, 785, 803, 785, 803, 145, 147, true, "European languages", "European languages"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 9514268965649159808, 10277151526855785007, 18446744073709551615, 18446744073709551615, 813, 827, 813, 827, 149, 151, true, "roman alphabet", "roman alphabet"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 692814466722212625, 12089462010086874276, 18446744073709551615, 18446744073709551615, 848, 867, 848, 867, 157, 160, true, "deep neural network", "deep neural network"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 13643956046017087081, 16700714172171101819, 18446744073709551615, 18446744073709551615, 884, 903, 884, 903, 164, 166, true, "specific characters", "specific characters"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104159216638303, 14652104812982082451, 18446744073709551615, 18446744073709551615, 21, 26, 21, 26, 5, 6, true, "table", "table"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161828335551, 9390410964741037597, 18446744073709551615, 18446744073709551615, 43, 48, 43, 48, 9, 10, true, "image", "image"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 6165973192311129130, 13673636908541982518, 18446744073709551615, 18446744073709551615, 156, 165, 156, 165, 31, 32, true, "reasoning", "reasoning"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 11600564911974996302, 11742311844381609874, 18446744073709551615, 18446744073709551615, 195, 206, 195, 206, 38, 39, true, "variability", "variability"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 10219753174915530122, 1883418763479216285, 18446744073709551615, 18446744073709551615, 277, 290, 277, 290, 52, 53, true, "effectiveness", "effectiveness"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 8106397496085150773, 4842581059053952340, 18446744073709551615, 18446744073709551615, 323, 330, 323, 330, 60, 61, true, "example", "example"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161828335551, 9390410964741015440, 18446744073709551615, 18446744073709551615, 342, 347, 342, 347, 64, 65, true, "image", "image"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206514091025767, 14972195604838550183, 18446744073709551615, 18446744073709551615, 363, 369, 363, 369, 69, 70, true, "Figure", "Figure"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206513098478539, 9453181643734708057, 18446744073709551615, 18446744073709551615, 407, 413, 407, 413, 78, 79, true, "tables", "tables"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206521509536706, 9304998689309080594, 18446744073709551615, 18446744073709551615, 420, 426, 420, 426, 81, 82, true, "result", "result"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14103651237077221583, 692779663451117535, 18446744073709551615, 18446744073709551615, 434, 444, 434, 444, 84, 85, true, "prediction", "prediction"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206533950151485, 9489822756853105574, 18446744073709551615, 18446744073709551615, 451, 457, 451, 457, 86, 87, true, "YOLOv2", "YOLOv2"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161828335551, 9390410964741025354, 18446744073709551615, 18446744073709551615, 480, 485, 480, 485, 92, 93, true, "image", "image"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161610777240, 9390567938245318871, 18446744073709551615, 18446744073709551615, 499, 504, 499, 504, 96, 97, true, "model", "model"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 5688100034928622351, 14997538649878118765, 18446744073709551615, 18446744073709551615, 529, 542, 529, 542, 102, 103, true, "visualisation", "visualisation"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206514091025767, 14972195604838595929, 18446744073709551615, 18446744073709551615, 572, 578, 572, 578, 109, 110, true, "Figure", "Figure"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 389609625631325904, 11992223016262407561, 18446744073709551615, 18446744073709551615, 602, 606, 602, 606, 115, 116, true, "text", "text"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 12178341415895621781, 15944948045218351689, 18446744073709551615, 18446744073709551615, 693, 696, 693, 696, 131, 132, true, "one", "one"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206590620761857, 12848968517137340397, 18446744073709551615, 18446744073709551615, 923, 929, 923, 929, 171, 172, true, "layout", "layout"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161531686411, 9388401245790561328, 18446744073709551615, 18446744073709551615, 937, 942, 937, 942, 174, 175, true, "cells", "cells"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 389609625632301461, 11992203554903389909, 18446744073709551615, 18446744073709551615, 950, 954, 950, 954, 177, 178, true, "page", "page"], ["verb", "compound-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 9093868816922383778, 10658048985165887467, 18446744073709551615, 18446744073709551615, 3, 16, 3, 16, 1, 4, true, "do not locate", "do not locate"], ["verb", "compound-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 6623117478668938130, 11887640081287291311, 18446744073709551615, 18446744073709551615, 178, 190, 178, 190, 34, 37, true, "is to reduce", "is to reduce"], ["verb", "compound-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14892762836247367071, 16988368498155692642, 18446744073709551615, 18446744073709551615, 348, 359, 348, 359, 65, 68, true, "can be seen", "can be seen"], ["verb", "compound-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 13754841728266722569, 5786336844458416623, 18446744073709551615, 18446744073709551615, 581, 597, 581, 597, 111, 114, true, "does not include", "does not include"], ["verb", "compound-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 2266019913458277733, 12639741498410603017, 18446744073709551615, 18446744073709551615, 832, 843, 832, 843, 153, 156, true, "do not want", "do not want"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206517379850387, 14955464588357672805, 18446744073709551615, 18446744073709551615, 119, 125, 119, 125, 23, 24, true, "parsed", "parsed"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14637951609302951605, 17658725300768464798, 18446744073709551615, 18446744073709551615, 264, 272, 264, 272, 50, 51, true, "increase", "increase"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 12178341415895564896, 15959393995125623708, 18446744073709551615, 18446744073709551615, 414, 417, 414, 417, 79, 80, true, "are", "are"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104159157798023, 14628335270365657217, 18446744073709551615, 18446744073709551615, 445, 450, 445, 450, 85, 86, true, "using", "using"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 12178341415895564896, 15959393995125627936, 18446744073709551615, 18446744073709551615, 462, 465, 462, 465, 88, 89, true, "are", "are"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14814125862003597070, 852547638956416530, 18446744073709551615, 18446744073709551615, 505, 513, 505, 513, 97, 98, true, "predicts", "predicts"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 389609625695584167, 11991047750942547462, 18446744073709551615, 18446744073709551615, 515, 519, 515, 519, 99, 100, true, "Note", "Note"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15441160910541486535, 16635202711086327418, 18446744073709551615, 18446744073709551615, 675, 677, 675, 677, 128, 129, true, "is", "is"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14652282388581682239, 13937110036866613314, 18446744073709551615, 18446744073709551615, 697, 705, 697, 705, 132, 133, true, "compares", "compares"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161710991423, 9388791396474858101, 18446744073709551615, 18446744073709551615, 871, 876, 871, 876, 161, 162, true, "focus", "focus"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 8106464529736241562, 11507614992345422349, 18446744073709551615, 18446744073709551615, 238, 245, 238, 245, 45, 47, true, "much as", "much as"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 5950055285115702077, 1625228371522495478, 18446744073709551615, 18446744073709551615, 466, 475, 466, 475, 89, 91, true, "absent in", "absent in"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 4711116093400357657, 13061186436309055298, 18446744073709551615, 18446744073709551615, 561, 571, 561, 571, 107, 109, true, "visible in", "visible in"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206566339127348, 18133865400670459150, 18446744073709551615, 18446744073709551615, 36, 42, 36, 42, 7, 9, true, "on the", "on the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169631358, 18446744073709551615, 18446744073709551615, 49, 55, 49, 55, 10, 12, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161572724641, 9391892882871994485, 18446744073709551615, 18446744073709551615, 85, 90, 85, 90, 17, 19, true, "on an", "on an"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169619016, 18446744073709551615, 18446744073709551615, 112, 118, 112, 118, 21, 23, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 389609625618037948, 11995818646377557360, 18446744073709551615, 18446744073709551615, 135, 139, 135, 139, 26, 27, true, "with", "with"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 13689038747610583945, 10114373553176097977, 18446744073709551615, 18446744073709551615, 166, 177, 166, 177, 32, 34, true, "behind this", "behind this"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 2011002864324373888, 658421949741848157, 18446744073709551615, 18446744073709551615, 207, 218, 207, 218, 39, 41, true, "between all", "between all"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169625865, 18446744073709551615, 18446744073709551615, 291, 297, 291, 297, 53, 55, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15441160910541485670, 16635202722366690068, 18446744073709551615, 18446744073709551615, 331, 333, 331, 333, 61, 62, true, "of", "of"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15441160910541486538, 16635202713083707741, 18446744073709551615, 18446744073709551615, 360, 362, 360, 362, 68, 69, true, "in", "in"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15388840276945242407, 16541083529619122756, 18446744073709551615, 18446744073709551615, 396, 406, 396, 406, 76, 78, true, "around the", "around the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169622547, 18446744073709551615, 18446744073709551615, 427, 433, 427, 433, 82, 84, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15441160910541485678, 16635202722513330465, 18446744073709551615, 18446744073709551615, 486, 488, 486, 488, 93, 94, true, "on", "on"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14634130761162415388, 3116502908862951252, 18446744073709551615, 18446744073709551615, 520, 528, 520, 528, 100, 102, true, "that the", "that the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169642174, 18446744073709551615, 18446744073709551615, 543, 549, 543, 549, 103, 105, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169466222, 18446744073709551615, 18446744073709551615, 607, 613, 607, 613, 116, 118, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 12178341415895625940, 15944948137295589714, 18446744073709551615, 18446744073709551615, 706, 709, 706, 709, 133, 134, true, "for", "for"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 389609625618037948, 11995818646377545406, 18446744073709551615, 18446744073709551615, 734, 738, 734, 738, 137, 138, true, "with", "with"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206519567123880, 14860984398233775563, 18446744073709551615, 18446744073709551615, 778, 784, 778, 784, 144, 145, true, "versus", "versus"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14638857868319795209, 10777935728644887730, 18446744073709551615, 18446744073709551615, 804, 812, 804, 812, 147, 149, true, "with the", "with the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206566339127348, 18133865400670791260, 18446744073709551615, 18446744073709551615, 877, 883, 877, 883, 162, 164, true, "on the", "on the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206566339127348, 18133865400670794962, 18446744073709551615, 18446744073709551615, 916, 922, 916, 922, 169, 171, true, "on the", "on the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169470569, 18446744073709551615, 18446744073709551615, 930, 936, 930, 936, 172, 174, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206560518651853, 2434079979099920259, 18446744073709551615, 18446744073709551615, 943, 949, 943, 949, 175, 177, true, "in the", "in the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15441160910541485865, 16635202727199936440, 18446744073709551615, 18446744073709551615, 181, 183, 181, 183, 35, 36, true, "to", "to"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15441160910541485865, 16635202727200087940, 18446744073709551615, 18446744073709551615, 868, 870, 868, 870, 160, 161, true, "to", "to"], ["numval", "ival", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 17767354399704235161, 12733743888687180225, 18446744073709551615, 18446744073709551615, 93, 94, 93, 94, 16, 17, true, "1", "1"], ["numval", "ival", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415896426714, 4652804192217870476, 18446744073709551615, 18446744073709551615, 291, 294, 291, 294, 53, 54, true, "100", "100"], ["numval", "ival", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104147765109382, 8033726402022826926, 18446744073709551615, 18446744073709551615, 312, 317, 312, 317, 58, 59, true, "25000", "25000"], ["numval", "ival", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415896426714, 4652804192217923506, 18446744073709551615, 18446744073709551615, 354, 357, 354, 357, 66, 67, true, "100", "100"], ["numval", "ival", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 17767354399704235152, 12733743887789901018, 18446744073709551615, 18446744073709551615, 508, 509, 508, 509, 91, 92, true, "8", "8"], ["numval", "ival", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541481982, 2599356122854466353, 18446744073709551615, 18446744073709551615, 894, 896, 892, 894, 164, 165, true, "10", "10"], ["parenthesis", "round brackets", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 10542145200230278619, 11521937276206628775, 18446744073709551615, 18446744073709551615, 889, 912, 889, 910, 162, 171, true, "(\u2248 10 pages/sec/node)", "(\u2248 10 pages/sec/node)"], ["expression", "common", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541486545, 2599358878961543341, 18446744073709551615, 18446744073709551615, 303, 307, 303, 307, 56, 57, true, "ie", "i.e."], ["expression", "word-concatenation", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 6285955549867796622, 12901492066051428715, 18446744073709551615, 18446744073709551615, 108, 124, 108, 124, 21, 22, true, "time-to-solution", "time-to-solution"], ["expression", "word-concatenation", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15656590191683919916, 3502038016915722737, 18446744073709551615, 18446744073709551615, 385, 398, 385, 398, 73, 74, true, "out-ofthe-box", "out-ofthe-box"], ["expression", "word-concatenation", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104162326555074, 12378649640990487310, 18446744073709551615, 18446744073709551615, 406, 411, 406, 411, 75, 76, true, "R-CNN", "R-CNN"], ["expression", "word-concatenation", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 6285955549867796622, 12901492066051459793, 18446744073709551615, 18446744073709551615, 651, 667, 651, 667, 119, 120, true, "time-to-solution", "time-to-solution"], ["expression", "wtoken-concatenation", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206533950151485, 7463375822213972642, 18446744073709551615, 18446744073709551615, 493, 499, 493, 499, 89, 90, true, "YOLOv2", "YOLOv2"], ["expression", "wtoken-concatenation", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206533950151485, 7463375822214056128, 18446744073709551615, 18446744073709551615, 787, 793, 787, 793, 145, 146, true, "YOLOv2", "YOLOv2"], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 11214795667451364706, 15381220353542038442, 18446744073709551615, 18446744073709551615, 0, 83, 0, 83, 0, 14, true, "Let us now discuss both deep neural network training microservices on the platform.", "Let us now discuss both deep neural network training microservices on the platform."], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 17449560956934989976, 12526021364899620960, 18446744073709551615, 18446744073709551615, 84, 227, 84, 227, 14, 41, true, "In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision.", "In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision."], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 13058222401901188325, 14090621328054154871, 18446744073709551615, 18446744073709551615, 228, 364, 228, 364, 41, 69, true, "In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times.", "In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times."], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16675190523738339061, 7202929718160933759, 18446744073709551615, 18446744073709551615, 365, 587, 365, 587, 69, 107, true, "We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied.", "We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied."], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 10235041227958384786, 9628423971346406996, 18446744073709551615, 18446744073709551615, 588, 691, 588, 691, 107, 125, true, "We believe that this is the main origin for the discrepancy of time-to-solution for the training phase.", "We believe that this is the main origin for the discrepancy of time-to-solution for the training phase."], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 11909429825414533491, 7916582600131240808, 18446744073709551615, 18446744073709551615, 692, 731, 692, 731, 125, 133, true, "The same holds true for the prediction.", "The same holds true for the prediction."], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 7447987213947934224, 363147361352019607, 18446744073709551615, 18446744073709551615, 732, 913, 732, 911, 133, 172, true, "Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", "Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node)."], ["term", "enum-term-mark-2", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 11037453576911667853, 14703723871622436608, 18446744073709551615, 18446744073709551615, 206, 226, 206, 226, 37, 40, true, "recall and precision", "recall and precision"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 13848731310568719727, 15095939915134652393, 18446744073709551615, 18446744073709551615, 24, 66, 24, 66, 5, 10, true, "deep neural network training microservices", "deep neural network training microservices"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 1353284443403550494, 17158735888603064564, 18446744073709551615, 18446744073709551615, 155, 166, 155, 166, 27, 29, true, "single page", "single page"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12141441254112579393, 8271858979549873106, 18446744073709551615, 18446744073709551615, 235, 249, 235, 249, 43, 45, true, "training phase", "training phase"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 18169256434676190331, 11634553033353850813, 18446744073709551615, 18446744073709551615, 318, 329, 318, 329, 59, 61, true, "page images", "page images"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 1151653930094198889, 6279210758650536115, 18446744073709551615, 18446744073709551615, 385, 411, 385, 411, 73, 76, true, "out-ofthe-box Faster R-CNN", "out-ofthe-box Faster R-CNN"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12141441254112579393, 8271858979549955993, 18446744073709551615, 18446744073709551615, 471, 485, 471, 485, 85, 87, true, "training phase", "training phase"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 2503288761659507641, 9743919505994936922, 18446744073709551615, 18446744073709551615, 493, 507, 493, 507, 89, 91, true, "YOLOv2 batches", "YOLOv2 batches"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16269569307198368878, 14888617347479270783, 18446744073709551615, 18446744073709551615, 616, 627, 616, 627, 113, 115, true, "main origin", "main origin"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12141441254112579393, 8271858979549787104, 18446744073709551615, 18446744073709551615, 676, 690, 676, 690, 122, 124, true, "training phase", "training phase"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 4237078182846444452, 7428907322213125011, 18446744073709551615, 18446744073709551615, 787, 806, 787, 806, 145, 147, true, "YOLOv2 architecture", "YOLOv2 architecture"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14814125365076808131, 10453527503990612347, 18446744073709551615, 18446744073709551615, 74, 82, 74, 82, 12, 13, true, "platform", "platform"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 6285955549867796622, 12901492066051428715, 18446744073709551615, 18446744073709551615, 108, 124, 108, 124, 21, 22, true, "time-to-solution", "time-to-solution"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14634153919632515335, 365322755488345032, 18446744073709551615, 18446744073709551615, 129, 137, 129, 137, 23, 24, true, "training", "training"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 5731695876385560379, 1758035992340926235, 18446744073709551615, 18446744073709551615, 182, 193, 182, 193, 33, 34, true, "performance", "performance"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104159246284497, 8646809584775625185, 18446744073709551615, 18446744073709551615, 197, 202, 197, 202, 35, 36, true, "terms", "terms"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206521531485437, 11024740562177031234, 18446744073709551615, 18446744073709551615, 206, 212, 206, 212, 37, 38, true, "recall", "recall"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 6184954595655792282, 2740680839011190488, 18446744073709551615, 18446744073709551615, 217, 226, 217, 226, 39, 40, true, "precision", "precision"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15359670209433732834, 11505488180295702106, 18446744073709551615, 18446744073709551615, 271, 281, 271, 281, 50, 51, true, "algorithms", "algorithms"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206565270919865, 7578403846550666862, 18446744073709551615, 18446744073709551615, 295, 301, 295, 301, 54, 55, true, "epochs", "epochs"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8106342689863369930, 11135817727321581998, 18446744073709551615, 18446744073709551615, 346, 353, 346, 353, 65, 66, true, "network", "network"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104159219994925, 8640251348534211245, 18446744073709551615, 18446744073709551615, 358, 363, 358, 363, 67, 68, true, "times", "times"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 2455254482033220466, 11766388440552122471, 18446744073709551615, 18446744073709551615, 417, 427, 417, 427, 77, 78, true, "Tensorflow", "Tensorflow"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14652257119591248677, 16033503133782517052, 18446744073709551615, 18446744073709551615, 451, 459, 451, 459, 82, 83, true, "batching", "batching"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206560620045048, 7774432132927566429, 18446744073709551615, 18446744073709551615, 510, 516, 510, 516, 92, 93, true, "images", "images"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 389609625631241985, 11701890325058806343, 18446744073709551615, 18446744073709551615, 522, 526, 522, 526, 95, 96, true, "time", "time"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206519429140242, 7379520217990130218, 18446744073709551615, 18446744073709551615, 528, 534, 528, 534, 97, 98, true, "thanks", "thanks"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104161828335551, 12350292282878253456, 18446744073709551615, 18446744073709551615, 541, 546, 541, 546, 100, 101, true, "image", "image"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 1478855739373258073, 16768663803468661998, 18446744073709551615, 18446744073709551615, 636, 647, 636, 647, 117, 118, true, "discrepancy", "discrepancy"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 6285955549867796622, 12901492066051459793, 18446744073709551615, 18446744073709551615, 651, 667, 651, 667, 119, 120, true, "time-to-solution", "time-to-solution"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14103651237077221583, 1262912962491166125, 18446744073709551615, 18446744073709551615, 720, 730, 720, 730, 131, 132, true, "prediction", "prediction"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104161594416377, 12352174572142722555, 18446744073709551615, 18446744073709551615, 752, 757, 752, 757, 137, 138, true, "point", "point"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 389609625619349298, 11674445135708463101, 18446744073709551615, 18446744073709551615, 761, 765, 761, 765, 139, 140, true, "view", "view"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14814125365076808131, 10453527503990666008, 18446744073709551615, 18446744073709551615, 773, 781, 773, 781, 142, 143, true, "platform", "platform"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 5300910362436626583, 8416985596353960814, 18446744073709551615, 18446744073709551615, 831, 841, 831, 841, 151, 152, true, "deployment", "deployment"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 5748881733723959229, 774140163382881369, 18446744073709551615, 18446744073709551615, 878, 888, 878, 888, 161, 162, true, "throughput", "throughput"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104161667992688, 11860182035845045407, 18446744073709551615, 18446744073709551615, 897, 902, 895, 900, 165, 166, true, "pages", "pages"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415895638619, 4652781528079557311, 18446744073709551615, 18446744073709551615, 903, 906, 901, 904, 167, 168, true, "sec", "sec"], ["verb", "compound-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8526860058636487735, 15955870111469140752, 18446744073709551615, 18446744073709551615, 330, 341, 330, 341, 61, 64, true, "were fed to", "were fed to"], ["verb", "compound-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 436128332273723128, 12647681645588449593, 18446744073709551615, 18446744073709551615, 428, 446, 428, 446, 78, 81, true, "does not implement", "does not implement"], ["verb", "compound-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 2778023241922598008, 5238034027547162597, 18446744073709551615, 18446744073709551615, 562, 586, 562, 586, 103, 106, true, "is automatically applied", "is automatically applied"], ["verb", "compound-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 18110906195041757747, 18325478196446152715, 18446744073709551615, 18446744073709551615, 807, 826, 807, 826, 147, 150, true, "seems better suited", "seems better suited"], ["verb", "compound-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 2039260297159993470, 11990526724975601040, 18446744073709551615, 18446744073709551615, 849, 863, 849, 863, 155, 158, true, "allows to have", "allows to have"], ["verb", "compound-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104147695762436, 8034268094721023599, 18446744073709551615, 18446744073709551615, 906, 911, 904, 909, 168, 170, true, "/node", "/node"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415896275389, 4652821010771256286, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "Let", "Let"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8106397868479560363, 5980952610294528544, 18446744073709551615, 18446744073709551615, 11, 18, 11, 18, 3, 4, true, "discuss", "discuss"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 389609625741152123, 11698558665309690548, 18446744073709551615, 18446744073709551615, 99, 103, 99, 103, 19, 20, true, "show", "show"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14103651237077222912, 1262912573528208063, 18446744073709551615, 18446744073709551615, 142, 152, 142, 152, 25, 26, true, "predicting", "predicting"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206564578053366, 7676681725158730412, 18446744073709551615, 18446744073709551615, 254, 260, 254, 260, 47, 48, true, "ensure", "ensure"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415895649364, 4652781883350111182, 18446744073709551615, 18446744073709551615, 282, 285, 282, 285, 51, 52, true, "ran", "ran"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541486545, 2599358878961543341, 18446744073709551615, 18446744073709551615, 303, 307, 303, 307, 56, 57, true, "ie", "i.e."], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8106342033696543838, 10720166011679309151, 18446744073709551615, 18446744073709551615, 368, 375, 368, 375, 70, 71, true, "observe", "observe"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14634109260174176887, 3059970276159290973, 18446744073709551615, 18446744073709551615, 547, 555, 547, 555, 101, 102, true, "resizing", "resizing"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8106397860663428876, 2379893300042418437, 18446744073709551615, 18446744073709551615, 591, 598, 591, 598, 108, 109, true, "believe", "believe"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541486535, 2599358878751709903, 18446744073709551615, 18446744073709551615, 609, 611, 609, 611, 111, 112, true, "is", "is"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104161533598953, 11928511646589428500, 18446744073709551615, 18446744073709551615, 701, 706, 701, 706, 127, 128, true, "holds", "holds"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 17767354399704339168, 12733722225655458138, 18446744073709551615, 18446744073709551615, 890, 893, 890, 891, 163, 164, true, "\u2248", "\u2248"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14634153888224917429, 9004783391296823986, 18446744073709551615, 18446744073709551615, 707, 715, 707, 715, 128, 130, true, "true for", "true for"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206566339127348, 7523956295610612753, 18446744073709551615, 18446744073709551615, 67, 73, 67, 73, 10, 12, true, "on the", "on the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541480354, 2599356225275492892, 18446744073709551615, 18446744073709551615, 84, 86, 84, 86, 14, 15, true, "In", "In"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415895625940, 4653059449996398372, 18446744073709551615, 18446744073709551615, 125, 128, 125, 128, 22, 23, true, "for", "for"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206568455155979, 8062169836442615762, 18446744073709551615, 18446744073709551615, 175, 181, 175, 181, 31, 33, true, "as the", "as the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541486538, 2599358879133688732, 18446744073709551615, 18446744073709551615, 194, 196, 194, 196, 34, 35, true, "in", "in"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541485670, 2599358870315263905, 18446744073709551615, 18446744073709551615, 203, 205, 203, 205, 36, 37, true, "of", "of"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16380809977974811061, 11732651135400697626, 18446744073709551615, 18446744073709551615, 228, 234, 228, 234, 41, 43, true, "In the", "In the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 3504047303032829403, 14383519537824238604, 18446744073709551615, 18446744073709551615, 261, 270, 261, 270, 48, 50, true, "that both", "that both"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14634130761162415388, 10901511361886185107, 18446744073709551615, 18446744073709551615, 376, 384, 376, 384, 71, 73, true, "that the", "that the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 389609625697843734, 11702137981936100184, 18446744073709551615, 18446744073709551615, 412, 416, 412, 416, 76, 77, true, "from", "from"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 2511937742856062086, 2355253536228937084, 18446744073709551615, 18446744073709551615, 460, 470, 460, 470, 83, 85, true, "during the", "during the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104161580427521, 12357508218241612915, 18446744073709551615, 18446744073709551615, 487, 492, 487, 492, 88, 89, true, "while", "while"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 389609625700792947, 11701923673037716898, 18446744073709551615, 18446744073709551615, 517, 521, 517, 521, 93, 95, true, "at a", "at a"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 3504047303127782210, 14386938221778026486, 18446744073709551615, 18446744073709551615, 599, 608, 599, 608, 109, 111, true, "that this", "that this"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8106397727991264470, 4625930078648415204, 18446744073709551615, 18446744073709551615, 628, 635, 628, 635, 115, 117, true, "for the", "for the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541485670, 2599358870315233503, 18446744073709551615, 18446744073709551615, 648, 650, 648, 650, 118, 119, true, "of", "of"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8106397727991264470, 4625930078648412606, 18446744073709551615, 18446744073709551615, 668, 675, 668, 675, 120, 122, true, "for the", "for the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14637917359887717745, 11341143089950838331, 18446744073709551615, 18446744073709551615, 743, 751, 743, 751, 135, 137, true, "from the", "from the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541485670, 2599358870315209500, 18446744073709551615, 18446744073709551615, 758, 760, 758, 760, 138, 139, true, "of", "of"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206565712212855, 7825456364758516667, 18446744073709551615, 18446744073709551615, 766, 772, 766, 772, 140, 142, true, "of the", "of the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415895625940, 4653059449996278256, 18446744073709551615, 18446744073709551615, 827, 830, 827, 830, 150, 151, true, "for", "for"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541487053, 2599358845406797182, 18446744073709551615, 18446744073709551615, 843, 845, 843, 845, 153, 154, true, "as", "as"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206519425733256, 7379223398534589543, 18446744073709551615, 18446744073709551615, 339, 345, 339, 345, 63, 65, true, "to the", "to the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104159243175056, 8638673086732548345, 18446744073709551615, 18446744073709551615, 535, 540, 535, 540, 98, 100, true, "to an", "to an"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541485865, 2599358851656141726, 18446744073709551615, 18446744073709551615, 856, 858, 856, 858, 156, 157, true, "to", "to"], ["numval", "ival", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 17767354399704235160, 8675424045619207091, 18446744073709551615, 18446744073709551615, 231, 232, 231, 232, 35, 36, true, "0", "0"], ["numval", "ival", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 17767354399704235161, 8675424045602759514, 18446744073709551615, 18446744073709551615, 237, 238, 237, 238, 37, 38, true, "1", "1"], ["name", "name-concatenation", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 2906527799746313192, 7162268361091325108, 18446744073709551615, 18446744073709551615, 354, 363, 354, 363, 61, 64, true, "Not-Table", "Not-Table"], ["expression", "word-concatenation", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 3002943871017471876, 6314608314970297277, 18446744073709551615, 18446744073709551615, 49, 63, 49, 63, 9, 10, true, "pre-processing", "pre-processing"], ["expression", "word-concatenation", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 3458523808570659318, 9975991896240937817, 18446744073709551615, 18446744073709551615, 141, 157, 141, 157, 22, 23, true, "object-detection", "object-detection"], ["sentence", "", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 7429795002768371766, 12580216355924388710, 18446744073709551615, 18446744073709551615, 0, 136, 0, 136, 0, 21, true, "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously.", "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously."], ["sentence", "", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 16291040095568243120, 1594236025068685140, 18446744073709551615, 18446744073709551615, 137, 239, 137, 239, 21, 39, true, "The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1.", "The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1."], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 4471200074237295914, 1456466697102274833, 18446744073709551615, 18446744073709551615, 8, 28, 8, 28, 2, 4, true, "performance analysis", "performance analysis"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 4048925549312111393, 15542194947650577050, 18446744073709551615, 18446744073709551615, 49, 69, 49, 69, 9, 11, true, "pre-processing stage", "pre-processing stage"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 15479850329146856745, 787461524154987429, 18446744073709551615, 18446744073709551615, 141, 166, 141, 166, 22, 24, true, "object-detection networks", "object-detection networks"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 4874473477449861741, 3504061852580538950, 18446744073709551615, 18446744073709551615, 206, 222, 206, 222, 32, 34, true, "confidence level", "confidence level"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 8106464574171450434, 15318495777273702751, 18446744073709551615, 18446744073709551615, 107, 114, 107, 114, 17, 18, true, "metrics", "metrics"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 12178341415895638602, 6222934568051327791, 18446744073709551615, 18446744073709551615, 177, 180, 177, 180, 26, 27, true, "set", "set"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 329104159325617355, 15838640579331060931, 18446744073709551615, 18446744073709551615, 193, 198, 193, 198, 29, 30, true, "boxes", "boxes"], ["verb", "compound-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 6181919773618307675, 13087072183397009947, 18446744073709551615, 18446744073709551615, 76, 85, 76, 85, 12, 14, true, "is needed", "is needed"], ["verb", "compound-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 3312537848285575572, 3682069485478563076, 18446744073709551615, 18446744073709551615, 115, 135, 115, 135, 18, 20, true, "described previously", "described previously"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 12178341415895617983, 6222927924466837926, 18446744073709551615, 18446744073709551615, 30, 33, 30, 33, 5, 6, true, "let", "let"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 8106342536055423396, 1623603363237275433, 18446744073709551615, 18446744073709551615, 37, 44, 37, 44, 7, 8, true, "outline", "outline"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 5947879507992892292, 3137884750946432419, 18446744073709551615, 18446744073709551615, 93, 102, 93, 102, 15, 16, true, "computing", "computing"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 8106476016678293182, 8897474810961070939, 18446744073709551615, 18446744073709551615, 167, 174, 167, 174, 24, 25, true, "predict", "predict"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 14652253380850532610, 15688350870772298580, 18446744073709551615, 18446744073709551615, 184, 192, 184, 192, 28, 29, true, "bounding", "bounding"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 8106351438779293396, 7036921387199751321, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 2, true, "For the", "For the"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 16381206569837301772, 829894264837423586, 18446744073709551615, 18446744073709551615, 86, 92, 86, 92, 14, 15, true, "before", "before"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 15441160910541485670, 15053982237527373603, 18446744073709551615, 18446744073709551615, 181, 183, 181, 183, 27, 28, true, "of", "of"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 16381206557726458966, 4275353707798328089, 18446744073709551615, 18446744073709551615, 199, 205, 199, 205, 30, 32, true, "with a", "with a"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 8106397860038858133, 2367955007216749470, 18446744073709551615, 18446744073709551615, 223, 230, 223, 230, 34, 35, true, "between", "between"], ["numval", "ival", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 17767354399704235162, 15759397524433803932, 18446744073709551615, 18446744073709551615, 6, 7, 6, 7, 1, 2, true, "2", "2"], ["sentence", "", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 13412490586202463721, 17653988074073433733, 18446744073709551615, 18446744073709551615, 0, 95, 0, 95, 0, 17, true, "Table 2: Performance results for the template specific model of the Physical Review B journals.", "Table 2: Performance results for the template specific model of the Physical Review B journals."], ["sentence", "", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 2713668199866952841, 4447940936101437620, 18446744073709551615, 18446744073709551615, 96, 202, 96, 202, 17, 34, true, "The confusion matrix highlights the huge imbalance between the number of text cells with different labels.", "The confusion matrix highlights the huge imbalance between the number of text cells with different labels."], ["sentence", "", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 12325075441819606052, 4798224535047183092, 18446744073709551615, 18446744073709551615, 203, 310, 203, 310, 34, 53, true, "The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types.", "The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types."], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 8087581502811400566, 7573439973442034769, 18446744073709551615, 18446744073709551615, 9, 28, 9, 28, 3, 5, true, "Performance results", "Performance results"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 13356790934987174038, 18420992769499992239, 18446744073709551615, 18446744073709551615, 37, 60, 37, 60, 7, 10, true, "template specific model", "template specific model"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 9872729223299515659, 7908640068811257205, 18446744073709551615, 18446744073709551615, 68, 94, 68, 94, 12, 16, true, "Physical Review B journals", "Physical Review B journals"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 5497358094214601811, 7433163521566214246, 18446744073709551615, 18446744073709551615, 100, 116, 100, 116, 18, 20, true, "confusion matrix", "confusion matrix"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 1488936167715046380, 16637143750883657942, 18446744073709551615, 18446744073709551615, 132, 146, 132, 146, 22, 24, true, "huge imbalance", "huge imbalance"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 5748925367544727060, 15357132638157717228, 18446744073709551615, 18446744073709551615, 169, 179, 169, 179, 28, 30, true, "text cells", "text cells"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 220880076010336098, 14991640362132342656, 18446744073709551615, 18446744073709551615, 185, 201, 185, 201, 31, 33, true, "different labels", "different labels"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 4360412890788129778, 6086964040649348468, 18446744073709551615, 18446744073709551615, 216, 232, 216, 232, 37, 39, true, "ensemble machine", "ensemble machine"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 9628232334734286437, 15559530413649010038, 18446744073709551615, 18446744073709551615, 275, 288, 275, 288, 46, 48, true, "high accuracy", "high accuracy"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 5579859536360440221, 12384760726355576022, 18446744073709551615, 18446744073709551615, 298, 309, 298, 309, 50, 52, true, "label types", "label types"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 16381206574973295053, 15664074499384566316, 18446744073709551615, 18446744073709551615, 159, 165, 159, 165, 26, 27, true, "number", "number"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 329104159157898666, 7979932887321468479, 18446744073709551615, 18446744073709551615, 207, 212, 207, 212, 35, 36, true, "usage", "usage"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 8106464574531629743, 13092511743146000891, 18446744073709551615, 18446744073709551615, 242, 249, 242, 249, 40, 41, true, "methods", "methods"], ["verb", "compound-verb", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 12736124800502880399, 3048726189598552717, 18446744073709551615, 18446744073709551615, 250, 267, 250, 267, 41, 44, true, "allows to achieve", "allows to achieve"], ["verb", "single-verb", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 15927123199600624159, 11830974991863511971, 18446744073709551615, 18446744073709551615, 117, 127, 117, 127, 20, 21, true, "highlights", "highlights"], ["verb", "single-verb", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 14639581097006750428, 17977442740486581742, 18446744073709551615, 18446744073709551615, 233, 241, 233, 241, 39, 40, true, "learning", "learning"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 8106397727991264470, 13939727220022896426, 18446744073709551615, 18446744073709551615, 29, 36, 29, 36, 5, 7, true, "for the", "for the"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 16381206565712212855, 15527423972997370423, 18446744073709551615, 18446744073709551615, 61, 67, 61, 67, 10, 12, true, "of the", "of the"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 2011002864325523456, 16665978214615422828, 18446744073709551615, 18446744073709551615, 147, 158, 147, 158, 24, 26, true, "between the", "between the"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 15441160910541485670, 10632466984953712528, 18446744073709551615, 18446744073709551615, 166, 168, 166, 168, 27, 28, true, "of", "of"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 389609625618037948, 18050712937266565062, 18446744073709551615, 18446744073709551615, 180, 184, 180, 184, 30, 31, true, "with", "with"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 15441160910541485670, 10632466984953723750, 18446744073709551615, 18446744073709551615, 213, 215, 213, 215, 36, 37, true, "of", "of"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 14814149446809805987, 2376885852812773633, 18446744073709551615, 18446744073709551615, 289, 297, 289, 297, 48, 50, true, "over all", "over all"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 15441160910541485865, 10632466981388317765, 18446744073709551615, 18446744073709551615, 257, 259, 257, 259, 42, 43, true, "to", "to"], ["numval", "ival", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 17767354399704235160, 13994996428325642210, 18446744073709551615, 18446744073709551615, 443, 444, 443, 444, 78, 79, true, "0", "0"], ["numval", "ival", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 17767354399704235157, 13994996428928834278, 18446744073709551615, 18446744073709551615, 446, 447, 446, 447, 80, 81, true, "5", "5"], ["parenthesis", "round brackets", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 5763721985249138201, 11333613653010201493, 18446744073709551615, 18446744073709551615, 726, 746, 726, 746, 129, 135, true, "(made with a camera)", "(made with a camera)"], ["expression", "word-concatenation", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 2772095701715059387, 18429532044600751065, 18446744073709551615, 18446744073709551615, 99, 109, 99, 109, 16, 17, true, "dual-class", "dual-class"], ["expression", "word-concatenation", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 329104162326555074, 15570664097727008132, 18446744073709551615, 18446744073709551615, 460, 465, 460, 465, 84, 85, true, "R-CNN", "R-CNN"], ["expression", "word-concatenation", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 329104162326555074, 15570664097727047898, 18446744073709551615, 18446744073709551615, 815, 820, 815, 820, 145, 146, true, "R-CNN", "R-CNN"], ["expression", "wtoken-concatenation", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206533950151485, 198566132787583629, 18446744073709551615, 18446744073709551615, 278, 284, 278, 284, 47, 48, true, "YOLOv2", "YOLOv2"], ["expression", "wtoken-concatenation", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541480158, 10477275210029982213, 18446744073709551615, 18446744073709551615, 400, 402, 400, 402, 69, 70, true, "F1", "F1"], ["expression", "wtoken-concatenation", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 329104147618556708, 15461264859114081015, 18446744073709551615, 18446744073709551615, 412, 417, 412, 417, 72, 73, true, "98.7%", "98.7%"], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 784428348664963687, 2735229758044296436, 18446744073709551615, 18446744073709551615, 33, 133, 33, 133, 6, 20, true, "The corresponding recall and precision are then computed for this dual-class classification problem.", "The corresponding recall and precision are then computed for this dual-class classification problem."], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 3927917834152176938, 12569591881522562313, 18446744073709551615, 18446744073709551615, 134, 273, 134, 273, 20, 46, true, "In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence.", "In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence."], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 3956872905292683881, 2752157999599851583, 18446744073709551615, 18446744073709551615, 274, 445, 274, 445, 46, 80, true, "For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0.", "For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0."], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 17055744903410885404, 12761534484507818149, 18446744073709551615, 18446744073709551615, 449, 556, 449, 556, 82, 101, true, "The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers.", "The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers."], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14420414998277701657, 3037581738866623003, 18446744073709551615, 18446744073709551615, 557, 667, 557, 667, 101, 119, true, "We believe this originates from the selective search algorithm which is used to determine regions of interest.", "We believe this originates from the selective search algorithm which is used to determine regions of interest."], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14678097696923692160, 11491609575789433741, 18446744073709551615, 18446744073709551615, 668, 773, 668, 773, 119, 139, true, "The images we feed it are not typical photographic images (made with a camera) but layout visualisations.", "The images we feed it are not typical photographic images (made with a camera) but layout visualisations."], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 1336288703622935510, 15435580690586079242, 18446744073709551615, 18446744073709551615, 774, 867, 774, 867, 139, 156, true, "The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", "The selective search algorithm in Faster R-CNN might not be optimal for such type of objects."], ["term", "enum-term-mark-2", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 11037453576911667853, 12443097430245333421, 18446744073709551615, 18446744073709551615, 51, 71, 51, 71, 8, 11, true, "recall and precision", "recall and precision"], ["term", "enum-term-mark-2", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 767578358531619449, 1472685584560725507, 18446744073709551615, 18446744073709551615, 204, 224, 204, 224, 35, 38, true, "precision and recall", "precision and recall"], ["term", "enum-term-mark-2", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 767578358531619449, 1472685584560746355, 18446744073709551615, 18446744073709551615, 527, 547, 527, 547, 96, 99, true, "precision and recall", "precision and recall"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 7737036869804521677, 431221867393766623, 18446744073709551615, 18446744073709551615, 37, 57, 37, 57, 7, 9, true, "corresponding recall", "corresponding recall"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 11075783049363921732, 14381818982688268241, 18446744073709551615, 18446744073709551615, 99, 132, 99, 132, 16, 19, true, "dual-class classification problem", "dual-class classification problem"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 8581372359543855162, 10333944193716453687, 18446744073709551615, 18446744073709551615, 151, 166, 151, 166, 25, 27, true, "fair comparison", "fair comparison"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16904814960714419182, 7305130667909903014, 18446744073709551615, 18446744073709551615, 218, 232, 218, 232, 37, 39, true, "recall metrics", "recall metrics"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 5859613489047657680, 4575208165015881094, 18446744073709551615, 18446744073709551615, 392, 408, 392, 408, 68, 71, true, "maximum F1 score", "maximum F1 score"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 4874473477449861741, 7312361899298084317, 18446744073709551615, 18446744073709551615, 423, 439, 423, 439, 75, 77, true, "confidence level", "confidence level"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6927970521128218953, 6482828839300817669, 18446744073709551615, 18446744073709551615, 453, 472, 453, 472, 83, 86, true, "Faster R-CNN method", "Faster R-CNN method"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16904814894749305757, 5737021334745277149, 18446744073709551615, 18446744073709551615, 541, 555, 541, 555, 98, 100, true, "recall numbers", "recall numbers"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 4349380732135272089, 16458298459980248480, 18446744073709551615, 18446744073709551615, 593, 619, 593, 619, 107, 110, true, "selective search algorithm", "selective search algorithm"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 2351536754407393176, 12969141846351017301, 18446744073709551615, 18446744073709551615, 698, 725, 698, 725, 126, 129, true, "typical photographic images", "typical photographic images"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 18245848170103364623, 3851473044777784430, 18446744073709551615, 18446744073709551615, 751, 772, 751, 772, 136, 138, true, "layout visualisations", "layout visualisations"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 4349380732135272089, 16458298459980260537, 18446744073709551615, 18446744073709551615, 778, 804, 778, 804, 140, 143, true, "selective search algorithm", "selective search algorithm"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 5327781098613689502, 14889487484335627658, 18446744073709551615, 18446744073709551615, 808, 820, 808, 820, 144, 146, true, "Faster R-CNN", "Faster R-CNN"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6165459236568103333, 2812369711373771464, 18446744073709551615, 18446744073709551615, 846, 855, 846, 855, 151, 153, true, "such type", "such type"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6184954595655792282, 18387321712019319773, 18446744073709551615, 18446744073709551615, 62, 71, 62, 71, 10, 11, true, "precision", "precision"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 329104161571401725, 15575423851065642052, 18446744073709551615, 18446744073709551615, 137, 142, 137, 142, 21, 22, true, "order", "order"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14814151113413570861, 12729204908894192489, 18446744073709551615, 18446744073709551615, 178, 186, 178, 186, 30, 31, true, "networks", "networks"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6184954595655792282, 18387321712019245881, 18446744073709551615, 18446744073709551615, 204, 213, 204, 213, 35, 36, true, "precision", "precision"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206521526353544, 16408450721845756506, 18446744073709551615, 18446744073709551615, 238, 244, 238, 244, 40, 41, true, "regard", "regard"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 2702871111219879214, 2512541272008941381, 18446744073709551615, 18446744073709551615, 262, 272, 262, 272, 44, 45, true, "confidence", "confidence"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206533950151485, 198566132787583629, 18446744073709551615, 18446744073709551615, 278, 284, 278, 284, 47, 48, true, "YOLOv2", "YOLOv2"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206521531485437, 16408606466535231414, 18446744073709551615, 18446744073709551615, 305, 311, 305, 311, 52, 53, true, "recall", "recall"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6184954595655792282, 18387321712019270016, 18446744073709551615, 18446744073709551615, 330, 339, 330, 339, 57, 58, true, "precision", "precision"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 2702871111219879214, 2512541272008894019, 18446744073709551615, 18446744073709551615, 355, 365, 355, 365, 62, 63, true, "confidence", "confidence"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6184954595655792282, 18387321712019273929, 18446744073709551615, 18446744073709551615, 527, 536, 527, 536, 96, 97, true, "precision", "precision"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 8106478448964548679, 12701825139671272799, 18446744073709551615, 18446744073709551615, 647, 654, 647, 654, 115, 116, true, "regions", "regions"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14637953883246475850, 7956817731702541219, 18446744073709551615, 18446744073709551615, 658, 666, 658, 666, 117, 118, true, "interest", "interest"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206560620045048, 3784940468244328560, 18446744073709551615, 18446744073709551615, 672, 678, 672, 678, 120, 121, true, "images", "images"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206563351041630, 1952046848832586628, 18446744073709551615, 18446744073709551615, 739, 745, 739, 745, 133, 134, true, "camera", "camera"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 8106342034010873556, 18238380662499221230, 18446744073709551615, 18446744073709551615, 859, 866, 859, 866, 154, 155, true, "objects", "objects"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 11891944663675020942, 13358251629780069780, 18446744073709551615, 18446744073709551615, 72, 89, 72, 89, 11, 14, true, "are then computed", "are then computed"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6183880245133195430, 11375315636474919011, 18446744073709551615, 18446744073709551615, 312, 321, 312, 321, 53, 55, true, "goes down", "goes down"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 2694830089385977061, 235012322887490211, 18446744073709551615, 18446744073709551615, 366, 378, 366, 378, 63, 65, true, "is increased", "is increased"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 7743689594175537908, 4826765463732452457, 18446744073709551615, 18446744073709551615, 473, 502, 473, 502, 86, 91, true, "is also performing quite well", "is also performing quite well"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14568989124066371477, 1068965357575472568, 18446744073709551615, 18446744073709551615, 508, 520, 508, 520, 93, 95, true, "has slightly", "has slightly"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16534452113033443144, 7065494418204761025, 18446744073709551615, 18446744073709551615, 626, 646, 626, 646, 111, 115, true, "is used to determine", "is used to determine"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 8106397797831668975, 18220343756781523026, 18446744073709551615, 18446744073709551615, 690, 697, 690, 697, 124, 126, true, "are not", "are not"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541486853, 10477289391110259759, 18446744073709551615, 18446744073709551615, 146, 148, 146, 148, 23, 24, true, "do", "do"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14814150880980441564, 5851167619774412175, 18446744073709551615, 18446744073709551615, 191, 199, 191, 199, 33, 34, true, "optimise", "optimise"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6184954633443293966, 15964917443528178420, 18446744073709551615, 18446744073709551615, 252, 261, 252, 261, 43, 44, true, "predicted", "predicted"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 8106342033696543838, 18232753974273180210, 18446744073709551615, 18446744073709551615, 288, 295, 288, 295, 49, 50, true, "observe", "observe"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 389609625699055541, 1239396878369861980, 18446744073709551615, 18446744073709551615, 340, 344, 340, 344, 58, 59, true, "goes", "goes"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6168826060228989821, 9992741985777267919, 18446744073709551615, 18446744073709551615, 380, 389, 380, 389, 66, 67, true, "obtaining", "obtaining"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 8106397860663428876, 16964248131253901291, 18446744073709551615, 18446744073709551615, 560, 567, 560, 567, 102, 103, true, "believe", "believe"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 13983620007877845674, 12955352785275452378, 18446744073709551615, 18446744073709551615, 573, 583, 573, 583, 104, 105, true, "originates", "originates"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 389609625697838276, 1239402610955961201, 18446744073709551615, 18446744073709551615, 682, 686, 682, 686, 122, 123, true, "feed", "feed"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 389609625618411791, 1242783662433971802, 18446744073709551615, 18446744073709551615, 727, 731, 727, 731, 130, 131, true, "made", "made"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541487001, 10477275230049640367, 18446744073709551615, 18446744073709551615, 831, 833, 831, 833, 148, 149, true, "be", "be"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16553501753141503400, 15045481503517904124, 18446744073709551615, 18446744073709551615, 834, 845, 834, 845, 149, 151, true, "optimal for", "optimal for"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14637917333165224513, 10908983268505451281, 18446744073709551615, 18446744073709551615, 90, 98, 90, 98, 14, 16, true, "for this", "for this"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541480354, 10477275240531848205, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 20, 21, true, "In", "In"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206565712212855, 1966173897978141572, 18446744073709551615, 18446744073709551615, 167, 173, 167, 173, 27, 29, true, "of the", "of the"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 389609625618037948, 1242787593333487218, 18446744073709551615, 18446744073709551615, 233, 237, 233, 237, 39, 40, true, "with", "with"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 12178341415896108722, 156309885604541418, 18446744073709551615, 18446744073709551615, 274, 277, 274, 277, 46, 47, true, "For", "For"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14634130761162415388, 14288776936577427060, 18446744073709551615, 18446744073709551615, 296, 304, 296, 304, 50, 52, true, "that the", "that the"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206568455155979, 1869095877123778211, 18446744073709551615, 18446744073709551615, 348, 354, 348, 354, 60, 62, true, "as the", "as the"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541485670, 10477275256518274646, 18446744073709551615, 18446744073709551615, 409, 411, 409, 411, 71, 72, true, "of", "of"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 389609625700792947, 1238530397841875604, 18446744073709551615, 18446744073709551615, 418, 422, 418, 422, 73, 75, true, "at a", "at a"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541485670, 10477275256518295884, 18446744073709551615, 18446744073709551615, 440, 442, 440, 442, 77, 78, true, "of", "of"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14637917359887717745, 1544745809668392834, 18446744073709551615, 18446744073709551615, 584, 592, 584, 592, 105, 107, true, "from the", "from the"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541485670, 10477275256518301113, 18446744073709551615, 18446744073709551615, 655, 657, 655, 657, 116, 117, true, "of", "of"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206557726458966, 3788832551851477825, 18446744073709551615, 18446744073709551615, 732, 738, 732, 738, 131, 133, true, "with a", "with a"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541486538, 10477275205185242704, 18446744073709551615, 18446744073709551615, 805, 807, 805, 807, 143, 144, true, "in", "in"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541485670, 10477275256518310244, 18446744073709551615, 18446744073709551615, 856, 858, 856, 858, 153, 154, true, "of", "of"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541485865, 10477275215095288698, 18446744073709551615, 18446744073709551615, 143, 145, 143, 145, 22, 23, true, "to", "to"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206519425733256, 370344314517327407, 18446744073709551615, 18446744073709551615, 245, 251, 245, 251, 41, 43, true, "to the", "to the"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541485865, 10477275215095322459, 18446744073709551615, 18446744073709551615, 634, 636, 634, 636, 113, 114, true, "to", "to"], ["expression", "wtoken-concatenation", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 329104147725158908, 18028372742913290156, 18446744073709551615, 18446744073709551615, 0, 5, 0, 5, 0, 1, true, "3.4.3", "3.4.3"], ["sentence", "", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 7718133462399744108, 17823198661305637266, 18446744073709551615, 18446744073709551615, 0, 31, 0, 31, 0, 5, true, "3.4.3 Template specific Models.", "3.4.3 Template specific Models."], ["sentence", "", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 10092485441396158590, 1921679794908306598, 18446744073709551615, 18446744073709551615, 32, 159, 32, 159, 5, 27, true, "The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template.", "The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template."], ["sentence", "", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15812734743858168044, 5104988671183900609, 18446744073709551615, 18446744073709551615, 160, 272, 160, 272, 27, 47, true, "This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance.", "This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance."], ["sentence", "", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 551135567978634707, 9805137836117614428, 18446744073709551615, 18446744073709551615, 273, 460, 273, 460, 47, 78, true, "Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality.", "Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality."], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 11907907877741579530, 940094317087021995, 18446744073709551615, 18446744073709551615, 6, 30, 6, 30, 1, 4, true, "Template specific Models", "Template specific Models"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 3663813169945470735, 17139564151051767194, 18446744073709551615, 18446744073709551615, 44, 68, 44, 68, 8, 11, true, "template specific models", "template specific models"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 16960645913427248555, 7662141651479474713, 18446744073709551615, 18446744073709551615, 91, 109, 91, 109, 16, 18, true, "extraction quality", "extraction quality"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 10137510760641589283, 15174113578041628274, 18446744073709551615, 18446744073709551615, 141, 158, 141, 158, 24, 26, true, "specific template", "specific template"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 7342862043108457350, 10866470711373289678, 18446744073709551615, 18446744073709551615, 181, 202, 181, 202, 31, 34, true, "many technical fields", "many technical fields"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 3376407656379762908, 17651500245932752692, 18446744073709551615, 18446744073709551615, 251, 271, 251, 271, 44, 46, true, "paramount importance", "paramount importance"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 879437392081459464, 10698589901478685905, 18446744073709551615, 18446744073709551615, 286, 310, 286, 310, 49, 52, true, "many technical documents", "many technical documents"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15130402050161305835, 1457144697725364176, 18446744073709551615, 18446744073709551615, 316, 330, 316, 330, 54, 56, true, "specific field", "specific field"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 5723400002059657755, 8384905200420629131, 18446744073709551615, 18446744073709551615, 353, 369, 353, 369, 60, 62, true, "certain template", "certain template"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 16960645913427248555, 7662141651479431440, 18446744073709551615, 18446744073709551615, 441, 459, 441, 459, 75, 77, true, "extraction quality", "extraction quality"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 389609625699055241, 14883359024073212478, 18446744073709551615, 18446744073709551615, 36, 40, 36, 40, 6, 7, true, "goal", "goal"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 329104161610777240, 15370809836743986311, 18446744073709551615, 18446744073709551615, 130, 135, 130, 135, 21, 22, true, "model", "model"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 14650440612701450082, 10632661340355574917, 18446744073709551615, 18446744073709551615, 214, 222, 214, 222, 37, 38, true, "accuracy", "accuracy"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 389609625696431489, 14876459829455684771, 18446744073709551615, 18446744073709551615, 240, 244, 240, 244, 41, 42, true, "data", "data"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 329104161787480235, 15382185116652927163, 18446744073709551615, 18446744073709551615, 389, 394, 389, 394, 66, 67, true, "sense", "sense"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 5946904284821171904, 7436968498862967568, 18446744073709551615, 18446744073709551615, 403, 412, 403, 412, 69, 70, true, "advantage", "advantage"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 14634130803848280536, 13102933406250746055, 18446744073709551615, 18446744073709551615, 421, 429, 421, 429, 72, 73, true, "template", "template"], ["verb", "compound-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 6623118764989562485, 9686528964214635468, 18446744073709551615, 18446744073709551615, 69, 81, 69, 81, 11, 14, true, "is to obtain", "is to obtain"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15180748647375949898, 15041926949817059678, 18446744073709551615, 18446744073709551615, 113, 125, 113, 125, 19, 20, true, "specializing", "specializing"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541486535, 1662040640859036333, 18446744073709551615, 18446744073709551615, 165, 167, 165, 167, 28, 29, true, "is", "is"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 6168374324562720592, 8408511475472730744, 18446744073709551615, 18446744073709551615, 230, 239, 230, 239, 40, 41, true, "extracted", "extracted"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541486535, 1662040640859038873, 18446744073709551615, 18446744073709551615, 245, 247, 245, 247, 42, 43, true, "is", "is"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 16381206574684919940, 8690278604869594595, 18446744073709551615, 18446744073709551615, 341, 347, 341, 347, 57, 58, true, "appear", "appear"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 329104161505838030, 15370325700124998836, 18446744073709551615, 18446744073709551615, 383, 388, 383, 388, 65, 66, true, "makes", "makes"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 389609625631208371, 14878114134196888026, 18446744073709551615, 18446744073709551615, 398, 402, 398, 402, 68, 69, true, "take", "take"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 8106398106568099440, 4690670493670021785, 18446744073709551615, 18446744073709551615, 433, 440, 433, 440, 74, 75, true, "improve", "improve"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 3701312585595488544, 14499465500010376427, 18446744073709551615, 18446744073709551615, 168, 180, 168, 180, 29, 31, true, "necessary in", "necessary in"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541485670, 1662040798765251967, 18446744073709551615, 18446744073709551615, 41, 43, 41, 43, 7, 8, true, "of", "of"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541486989, 1662040951000079940, 18446744073709551615, 18446744073709551615, 110, 112, 110, 112, 18, 19, true, "by", "by"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 389609625618762887, 14878547061345061059, 18446744073709551615, 18446744073709551615, 136, 140, 136, 140, 22, 24, true, "on a", "on a"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 16381206565712212855, 5026312373792128532, 18446744073709551615, 18446744073709551615, 223, 229, 223, 229, 38, 40, true, "of the", "of the"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541485670, 1662040798765106998, 18446744073709551615, 18446744073709551615, 248, 250, 248, 250, 43, 44, true, "of", "of"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 389609625698530964, 14883385687690770855, 18446744073709551615, 18446744073709551615, 311, 315, 311, 315, 52, 54, true, "in a", "in a"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 389609625698530964, 14883385687690756753, 18446744073709551615, 18446744073709551615, 348, 352, 348, 352, 58, 60, true, "in a", "in a"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 8106342927224204628, 15389357728224894046, 18446744073709551615, 18446744073709551615, 413, 420, 413, 420, 70, 72, true, "of this", "of this"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541485865, 1662040545925493605, 18446744073709551615, 18446744073709551615, 72, 74, 72, 74, 12, 13, true, "to", "to"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541485865, 1662040545925472456, 18446744073709551615, 18446744073709551615, 395, 397, 395, 397, 67, 68, true, "to", "to"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541485865, 1662040545925478963, 18446744073709551615, 18446744073709551615, 430, 432, 430, 432, 73, 74, true, "to", "to"], ["numval", "irng", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 8104408072639178553, 3436406430005144206, 18446744073709551615, 18446744073709551615, 251, 258, 251, 258, 47, 48, true, "100-400", "100-400"], ["expression", "word-concatenation", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 6179391750322252074, 7609163653836261740, 18446744073709551615, 18446744073709551615, 309, 318, 309, 318, 58, 59, true, "man-hours", "man-hours"], ["sentence", "", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15260123598736784563, 9203716626740230654, 18446744073709551615, 18446744073709551615, 0, 96, 0, 96, 0, 17, true, "For an algorithm to fit in the interactive platform design we identified a few key requirements.", "For an algorithm to fit in the interactive platform design we identified a few key requirements."], ["sentence", "", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 4991934942360344417, 464502954292559498, 18446744073709551615, 18446744073709551615, 97, 187, 97, 187, 17, 36, true, "First, it is crucial that the model can generate good results with a limited set of pages.", "First, it is crucial that the model can generate good results with a limited set of pages."], ["sentence", "", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 5804013038180128821, 5640425092716649521, 18446744073709551615, 18446744073709551615, 188, 334, 188, 334, 36, 62, true, "In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation.", "In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation."], ["sentence", "", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 4985111903766472827, 3150108135738516677, 18446744073709551615, 18446744073709551615, 335, 406, 335, 406, 62, 75, true, "Second it must be robust against extreme imbalance of the labeled data.", "Second it must be robust against extreme imbalance of the labeled data."], ["sentence", "", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 785870778923519952, 737901236245424740, 18446744073709551615, 18446744073709551615, 407, 510, 407, 510, 75, 97, true, "It is clear that cells of the label Title will be much more uncommon than cells with the label of Text.", "It is clear that cells of the label Title will be much more uncommon than cells with the label of Text."], ["sentence", "", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 9332774394694438134, 16216949282398930168, 18446744073709551615, 18446744073709551615, 511, 635, 511, 635, 97, 120, true, "Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process.", "Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process."], ["term", "enum-term-mark-2", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 4382912746554659998, 10486697078971674734, 18446744073709551615, 18446744073709551615, 553, 576, 553, 576, 107, 110, true, "training and predicting", "training and predicting"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15648340612932904440, 3282829189538707082, 18446744073709551615, 18446744073709551615, 31, 58, 31, 58, 7, 10, true, "interactive platform design", "interactive platform design"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 6908529689048994003, 5714340384362777333, 18446744073709551615, 18446744073709551615, 75, 95, 75, 95, 13, 16, true, "few key requirements", "few key requirements"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 8407082861571023662, 15351385334075847588, 18446744073709551615, 18446744073709551615, 146, 158, 146, 158, 27, 29, true, "good results", "good results"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 5934032006560084170, 4928254777620989731, 18446744073709551615, 18446744073709551615, 166, 177, 166, 177, 31, 33, true, "limited set", "limited set"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 4147505635383066832, 5727214689898978610, 18446744073709551615, 18446744073709551615, 259, 274, 259, 274, 48, 50, true, "annotated pages", "annotated pages"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 18385357359584472461, 13895653974217727096, 18446744073709551615, 18446744073709551615, 368, 385, 368, 385, 68, 70, true, "extreme imbalance", "extreme imbalance"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 5579859539081108650, 13588256740689789301, 18446744073709551615, 18446744073709551615, 437, 448, 437, 448, 82, 84, true, "label Title", "label Title"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 11179896262039516860, 11055979193001627571, 18446744073709551615, 18446744073709551615, 604, 634, 604, 634, 116, 119, true, "interactive annotation process", "interactive annotation process"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 5946733998943492893, 12428281114523894179, 18446744073709551615, 18446744073709551615, 7, 16, 7, 16, 2, 3, true, "algorithm", "algorithm"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161610777240, 12260173115935047807, 18446744073709551615, 18446744073709551615, 127, 132, 127, 132, 24, 25, true, "model", "model"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161667992688, 12263531108286392881, 18446744073709551615, 18446744073709551615, 181, 186, 181, 186, 34, 35, true, "pages", "pages"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 14814125472896938138, 16750744787410262504, 18446744073709551615, 18446744073709551615, 191, 199, 191, 199, 37, 38, true, "practice", "practice"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 5946733998943492893, 12428281114523913343, 18446744073709551615, 18446744073709551615, 215, 224, 215, 224, 41, 42, true, "algorithm", "algorithm"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 5364746105625482840, 9265279288259411951, 18446744073709551615, 18446744073709551615, 283, 293, 283, 293, 53, 54, true, "equivalent", "equivalent"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206562413425049, 9436735506305030424, 18446744073709551615, 18446744073709551615, 299, 305, 299, 305, 56, 57, true, "couple", "couple"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 6179391750322252074, 7609163653836261740, 18446744073709551615, 18446744073709551615, 309, 318, 309, 318, 58, 59, true, "man-hours", "man-hours"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15359807916847495711, 1899733546879222276, 18446744073709551615, 18446744073709551615, 323, 333, 323, 333, 60, 61, true, "annotation", "annotation"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 389609625696431489, 14766578679289820558, 18446744073709551615, 18446744073709551615, 401, 405, 401, 405, 73, 74, true, "data", "data"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161531686411, 12388798607045074404, 18446744073709551615, 18446744073709551615, 424, 429, 424, 429, 79, 80, true, "cells", "cells"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161531686411, 12388798607045077590, 18446744073709551615, 18446744073709551615, 481, 486, 481, 486, 90, 91, true, "cells", "cells"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161624445793, 12260240665376036393, 18446744073709551615, 18446744073709551615, 496, 501, 496, 501, 93, 94, true, "label", "label"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 389609625541629035, 11509492960090225407, 18446744073709551615, 18446744073709551615, 505, 509, 505, 509, 95, 96, true, "Text", "Text"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161610777240, 12260173115935041441, 18446744073709551615, 18446744073709551615, 521, 526, 521, 526, 100, 101, true, "model", "model"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 14634153919632515335, 18085216687124440147, 18446744073709551615, 18446744073709551615, 553, 561, 553, 561, 107, 108, true, "training", "training"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 14103651237077222912, 68076985749864369, 18446744073709551615, 18446744073709551615, 566, 576, 566, 576, 109, 110, true, "predicting", "predicting"], ["verb", "compound-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 17533333750889623004, 9472769641599292929, 18446744073709551615, 18446744073709551615, 225, 246, 225, 246, 42, 46, true, "needs to perform well", "needs to perform well"], ["verb", "compound-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 11061360903444416284, 2129300334891083426, 18446744073709551615, 18446744073709551615, 449, 461, 449, 461, 84, 87, true, "will be much", "will be much"], ["verb", "compound-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 8076044115168679328, 14919913684743101171, 18446744073709551615, 18446744073709551615, 527, 543, 527, 543, 101, 105, true, "needs to be very", "needs to be very"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 12178341415895625823, 9936689573502689067, 18446744073709551615, 18446744073709551615, 20, 23, 20, 23, 4, 5, true, "fit", "fit"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15995920061809434499, 9928580205597280350, 18446744073709551615, 18446744073709551615, 62, 72, 62, 72, 11, 12, true, "identified", "identified"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541486535, 1453464821964574132, 18446744073709551615, 18446744073709551615, 107, 109, 107, 109, 20, 21, true, "is", "is"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 4017818373869155501, 4278801556520084204, 18446744073709551615, 18446744073709551615, 133, 145, 133, 145, 25, 27, true, "can generate", "can generate"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161618623456, 12265975418452186520, 18446744073709551615, 18446744073709551615, 205, 210, 205, 210, 39, 40, true, "means", "means"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206514640520764, 9808439134705831038, 18446744073709551615, 18446744073709551615, 335, 341, 335, 341, 62, 63, true, "Second", "Second"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 8106464533804387051, 8890708598446613982, 18446744073709551615, 18446744073709551615, 345, 352, 345, 352, 64, 66, true, "must be", "must be"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 8106342931007190203, 9172963578761412207, 18446744073709551615, 18446744073709551615, 393, 400, 393, 400, 72, 73, true, "labeled", "labeled"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541486535, 1453464821964494202, 18446744073709551615, 18446744073709551615, 410, 412, 410, 412, 76, 77, true, "is", "is"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 11040131848055511293, 9945998267769661183, 18446744073709551615, 18446744073709551615, 587, 599, 587, 599, 113, 115, true, "will support", "will support"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 14228775800344759211, 1968197584824290171, 18446744073709551615, 18446744073709551615, 110, 122, 110, 122, 21, 23, true, "crucial that", "crucial that"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 1993790582685692910, 4432318201339565934, 18446744073709551615, 18446744073709551615, 353, 367, 353, 367, 66, 68, true, "robust against", "robust against"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 2617690495147367356, 14858078827112379350, 18446744073709551615, 18446744073709551615, 413, 423, 413, 423, 77, 79, true, "clear that", "clear that"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 11114365829612930466, 10473638374653243311, 18446744073709551615, 18446744073709551615, 467, 480, 467, 480, 88, 90, true, "uncommon than", "uncommon than"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 14637920980696500808, 1921583426992803260, 18446744073709551615, 18446744073709551615, 544, 552, 544, 552, 105, 107, true, "quick in", "quick in"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206535218531925, 10766932565669116762, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 2, true, "For an", "For an"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206560518651853, 18004306290400611053, 18446744073709551615, 18446744073709551615, 24, 30, 24, 30, 5, 7, true, "in the", "in the"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206557726458966, 18019159896119976996, 18446744073709551615, 18446744073709551615, 159, 165, 159, 165, 29, 31, true, "with a", "with a"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541485670, 1453464820859683358, 18446744073709551615, 18446744073709551615, 178, 180, 178, 180, 33, 34, true, "of", "of"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541480354, 1453453982064787832, 18446744073709551615, 18446744073709551615, 188, 190, 188, 190, 36, 37, true, "In", "In"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 12178341415895625940, 9936689583318236091, 18446744073709551615, 18446744073709551615, 247, 250, 247, 250, 46, 47, true, "for", "for"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 389609625620237736, 11496284453519477340, 18446744073709551615, 18446744073709551615, 294, 298, 294, 298, 54, 56, true, "of a", "of a"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541485670, 1453464820859611646, 18446744073709551615, 18446744073709551615, 306, 308, 306, 308, 57, 58, true, "of", "of"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 12178341415895625940, 9936689583318206889, 18446744073709551615, 18446744073709551615, 319, 322, 319, 322, 59, 60, true, "for", "for"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206565712212855, 9482496689021090789, 18446744073709551615, 18446744073709551615, 386, 392, 386, 392, 70, 72, true, "of the", "of the"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206565712212855, 9482496689021060650, 18446744073709551615, 18446744073709551615, 430, 436, 430, 436, 80, 82, true, "of the", "of the"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 14638857868319795209, 2580294575252347269, 18446744073709551615, 18446744073709551615, 487, 495, 487, 495, 91, 93, true, "with the", "with the"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541485670, 1453464820859607533, 18446744073709551615, 18446744073709551615, 502, 504, 502, 504, 94, 95, true, "of", "of"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161786618045, 12383315557724444592, 18446744073709551615, 18446744073709551615, 578, 583, 578, 583, 111, 112, true, "since", "since"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541485865, 1453464826493133085, 18446744073709551615, 18446744073709551615, 17, 19, 17, 19, 3, 4, true, "to", "to"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541485865, 1453464826493115912, 18446744073709551615, 18446744073709551615, 231, 233, 231, 233, 43, 44, true, "to", "to"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541485865, 1453464826493232036, 18446744073709551615, 18446744073709551615, 533, 535, 533, 535, 102, 103, true, "to", "to"], ["parenthesis", "reference", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 12178341415895551595, 3578610866005582188, 18446744073709551615, 18446744073709551615, 42, 45, 42, 45, 8, 9, true, "[2]", "[2]"], ["expression", "wtoken-concatenation", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 12178341415895551595, 3578610866005582188, 18446744073709551615, 18446744073709551615, 42, 45, 42, 45, 8, 9, true, "[2]", "[2]"], ["sentence", "", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 1321865598114444635, 2215181109629009596, 18446744073709551615, 18446744073709551615, 0, 107, 0, 107, 0, 19, true, "For these reasons, we chose random forest [2] as a machine learning algorithm for template specific models.", "For these reasons, we chose random forest [2] as a machine learning algorithm for template specific models."], ["sentence", "", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 5933656628218586373, 4615859734668438555, 18446744073709551615, 18446744073709551615, 108, 243, 108, 243, 19, 42, true, "Random forest algorithms are known to be trained fast and can produce very accurate results on limited, but relatively structured data.", "Random forest algorithms are known to be trained fast and can produce very accurate results on limited, but relatively structured data."], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 8479084307598384759, 13789581635532023596, 18446744073709551615, 18446744073709551615, 28, 41, 28, 41, 6, 8, true, "random forest", "random forest"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 3663813169945470735, 13771905067446488114, 18446744073709551615, 18446744073709551615, 82, 106, 82, 106, 15, 18, true, "template specific models", "template specific models"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 6407272496581372949, 9529106438636169186, 18446744073709551615, 18446744073709551615, 108, 132, 108, 132, 19, 22, true, "Random forest algorithms", "Random forest algorithms"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 363090472507169169, 8561180021799884651, 18446744073709551615, 18446744073709551615, 183, 199, 183, 199, 32, 34, true, "accurate results", "accurate results"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 4517874168209370779, 3521538572593201674, 18446744073709551615, 18446744073709551615, 227, 242, 227, 242, 39, 41, true, "structured data", "structured data"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 8106478449187889361, 13919224543462497012, 18446744073709551615, 18446744073709551615, 10, 17, 10, 17, 2, 3, true, "reasons", "reasons"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 8106464587473865376, 16361697669749387702, 18446744073709551615, 18446744073709551615, 51, 58, 51, 58, 11, 12, true, "machine", "machine"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 5946733998943492893, 15258259268393418233, 18446744073709551615, 18446744073709551615, 68, 77, 68, 77, 13, 14, true, "algorithm", "algorithm"], ["verb", "compound-verb", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 3672237414008980378, 8168753950103855601, 18446744073709551615, 18446744073709551615, 133, 161, 133, 161, 22, 28, true, "are known to be trained fast", "are known to be trained fast"], ["verb", "compound-verb", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 15705911622867996458, 18088967880959376572, 18446744073709551615, 18446744073709551615, 166, 182, 166, 182, 29, 32, true, "can produce very", "can produce very"], ["verb", "single-verb", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 329104161556620669, 14781203896201770352, 18446744073709551615, 18446744073709551615, 22, 27, 22, 27, 5, 6, true, "chose", "chose"], ["verb", "single-verb", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 14639581097006750428, 4008136393337002779, 18446744073709551615, 18446744073709551615, 59, 67, 59, 67, 12, 13, true, "learning", "learning"], ["conn", "single-conn", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 6560705639796409909, 14533538587347702670, 18446744073709551615, 18446744073709551615, 0, 9, 0, 9, 0, 2, true, "For these", "For these"], ["conn", "single-conn", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 389609625700764258, 6136167252535921243, 18446744073709551615, 18446744073709551615, 46, 50, 46, 50, 9, 11, true, "as a", "as a"], ["conn", "single-conn", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 12178341415895625940, 3578619344503340053, 18446744073709551615, 18446744073709551615, 78, 81, 78, 81, 14, 15, true, "for", "for"], ["conn", "single-conn", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 15441160910541485678, 2424995179865918878, 18446744073709551615, 18446744073709551615, 200, 202, 200, 202, 34, 35, true, "on", "on"], ["conn", "single-conn", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 15441160910541485865, 2424995192349443979, 18446744073709551615, 18446744073709551615, 143, 145, 143, 145, 24, 25, true, "to", "to"], ["sentence", "", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 3849526231748074223, 318347409270939671, 18446744073709551615, 18446744073709551615, 55, 207, 55, 207, 9, 35, true, "Furthermore, random forest is an ensemble method, meaning that they learn on the distribution function of the features, and not individual dataelements.", "Furthermore, random forest is an ensemble method, meaning that they learn on the distribution function of the features, and not individual dataelements."], ["sentence", "", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 5442113846327811609, 17895930310474857340, 18446744073709551615, 18446744073709551615, 208, 346, 208, 346, 35, 58, true, "As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised.", "As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised."], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 8479084307598384759, 16847024660628644782, 18446744073709551615, 18446744073709551615, 68, 81, 68, 81, 11, 13, true, "random forest", "random forest"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 17604104298028087389, 5936050629867305383, 18446744073709551615, 18446744073709551615, 88, 103, 88, 103, 15, 17, true, "ensemble method", "ensemble method"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 2221030665390994181, 12248418922188426900, 18446744073709551615, 18446744073709551615, 136, 157, 136, 157, 24, 26, true, "distribution function", "distribution function"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 79538879438919706, 10884144148399035709, 18446744073709551615, 18446744073709551615, 183, 206, 183, 206, 32, 34, true, "individual dataelements", "individual dataelements"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 12791568251594841134, 3705185927483894330, 18446744073709551615, 18446744073709551615, 306, 328, 306, 328, 53, 55, true, "distribution functions", "distribution functions"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 14637918593917529467, 18355240018153108157, 18446744073709551615, 18446744073709551615, 165, 173, 165, 173, 28, 29, true, "features", "features"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 2343822922798056892, 6095175646980864634, 18446744073709551615, 18446744073709551615, 213, 224, 213, 224, 37, 38, true, "consequence", "consequence"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 6187814126721711351, 3003359478748945666, 18446744073709551615, 18446744073709551615, 265, 274, 265, 274, 45, 46, true, "imbalance", "imbalance"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 389609625696431489, 2843435508709525326, 18446744073709551615, 18446744073709551615, 290, 294, 290, 294, 49, 50, true, "data", "data"], ["verb", "compound-verb", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 11444323110081493576, 8704817656229962117, 18446744073709551615, 18446744073709551615, 231, 244, 231, 244, 40, 42, true, "are typically", "are typically"], ["verb", "compound-verb", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 1544956657377891563, 14235903438134168102, 18446744073709551615, 18446744073709551615, 329, 345, 329, 345, 55, 57, true, "are renormalised", "are renormalised"], ["verb", "single-verb", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 15441160910541486535, 2182812634861815046, 18446744073709551615, 18446744073709551615, 82, 84, 82, 84, 13, 14, true, "is", "is"], ["verb", "single-verb", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 8106464574621932200, 11319020816616427866, 18446744073709551615, 18446744073709551615, 105, 112, 105, 112, 18, 19, true, "meaning", "meaning"], ["verb", "single-verb", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 329104161602730070, 3875356443080677100, 18446744073709551615, 18446744073709551615, 123, 128, 123, 128, 21, 22, true, "learn", "learn"], ["verb", "single-verb", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 8106342931007190203, 4209020374123555528, 18446744073709551615, 18446744073709551615, 282, 289, 282, 289, 48, 49, true, "labeled", "labeled"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 1993790582685692910, 7138562820453798245, 18446744073709551615, 18446744073709551615, 250, 264, 250, 264, 43, 45, true, "robust against", "robust against"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 389609625631229034, 2814456960913688391, 18446744073709551615, 18446744073709551615, 113, 117, 113, 117, 19, 20, true, "that", "that"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 16381206566339127348, 5718424238614799049, 18446744073709551615, 18446744073709551615, 129, 135, 129, 135, 22, 24, true, "on the", "on the"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 16381206565712212855, 5705860195075376256, 18446744073709551615, 18446744073709551615, 158, 164, 158, 164, 26, 28, true, "of the", "of the"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 389609625539850184, 2844626104089028763, 18446744073709551615, 18446744073709551615, 208, 212, 208, 212, 35, 37, true, "As a", "As a"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 16381206565712212855, 5705860195075407861, 18446744073709551615, 18446744073709551615, 275, 281, 275, 281, 46, 48, true, "of the", "of the"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 6168057894310307081, 5201584897100688456, 18446744073709551615, 18446744073709551615, 296, 305, 296, 305, 51, 53, true, "since the", "since the"], ["parenthesis", "round brackets", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 18214073896143357061, 14194881149350742521, 18446744073709551615, 18446744073709551615, 361, 386, 361, 386, 66, 74, true, "(normal, italic, or bold)", "(normal, italic, or bold)"], ["sentence", "", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14954795956210947038, 8148012250980112641, 18446744073709551615, 18446744073709551615, 0, 122, 0, 122, 0, 23, true, "The random forest method is applied to each cell of the page based on a feature vector representing all of its properties.", "The random forest method is applied to each cell of the page based on a feature vector representing all of its properties."], ["sentence", "", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 1490071144008831449, 94755760377075300, 18446744073709551615, 18446744073709551615, 123, 289, 123, 289, 23, 56, true, "For example, the feature vector contains information as the page number, the size of the text cell, its position, as well as the distance from the neighbouring cells.", "For example, the feature vector contains information as the page number, the size of the text cell, its position, as well as the distance from the neighbouring cells."], ["sentence", "", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 11740301738178623667, 17550032900348134352, 18446744073709551615, 18446744073709551615, 290, 451, 290, 451, 56, 86, true, "Additionally to pure geometrical information we include the text style (normal, italic, or bold) and some text statistics, as the fraction of numeric characters.", "Additionally to pure geometrical information we include the text style (normal, italic, or bold) and some text statistics, as the fraction of numeric characters."], ["sentence", "", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 11705059311263698902, 5031191200048059765, 18446744073709551615, 18446744073709551615, 452, 689, 452, 689, 86, 122, true, "We then improve the obtained results by performing subsequent iterations with other random forest methods, which operate on an enlarged feature space including the previously predicted labels of the neighbourhood around the current cell.", "We then improve the obtained results by performing subsequent iterations with other random forest methods, which operate on an enlarged feature space including the previously predicted labels of the neighbourhood around the current cell."], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 7925527528304634469, 3204166286438856601, 18446744073709551615, 18446744073709551615, 4, 24, 4, 24, 1, 4, true, "random forest method", "random forest method"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 6742946212951218383, 17384086898033499364, 18446744073709551615, 18446744073709551615, 72, 86, 72, 86, 15, 17, true, "feature vector", "feature vector"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 6742946212951218383, 17384086898033529847, 18446744073709551615, 18446744073709551615, 140, 154, 140, 154, 27, 29, true, "feature vector", "feature vector"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 18169256436041200544, 976921865353389358, 18446744073709551615, 18446744073709551615, 183, 194, 183, 194, 33, 35, true, "page number", "page number"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 3503955255877193443, 7762227561921017193, 18446744073709551615, 18446744073709551615, 212, 221, 212, 221, 40, 42, true, "text cell", "text cell"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 273962029668891890, 13794602264048178557, 18446744073709551615, 18446744073709551615, 311, 334, 311, 334, 59, 61, true, "geometrical information", "geometrical information"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 5748925367277359212, 2937463379481336632, 18446744073709551615, 18446744073709551615, 350, 360, 350, 360, 64, 66, true, "text style", "text style"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8543478576579995429, 5058478595230737997, 18446744073709551615, 18446744073709551615, 396, 411, 396, 411, 76, 78, true, "text statistics", "text statistics"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 3609517846327801127, 8654423261512076457, 18446744073709551615, 18446744073709551615, 432, 450, 432, 450, 83, 85, true, "numeric characters", "numeric characters"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 2494731039743157046, 13423733116746670576, 18446744073709551615, 18446744073709551615, 503, 524, 503, 524, 94, 96, true, "subsequent iterations", "subsequent iterations"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 5742902273463386997, 4995768139577900128, 18446744073709551615, 18446744073709551615, 530, 557, 530, 557, 97, 101, true, "other random forest methods", "other random forest methods"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 15287023664061002798, 10516263643469914706, 18446744073709551615, 18446744073709551615, 579, 601, 579, 601, 106, 109, true, "enlarged feature space", "enlarged feature space"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 5679217233560191195, 14779368924185208427, 18446744073709551615, 18446744073709551615, 676, 688, 676, 688, 119, 121, true, "current cell", "current cell"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 389609625696024605, 9253399486045178962, 18446744073709551615, 18446744073709551615, 44, 48, 44, 48, 8, 9, true, "cell", "cell"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 389609625632301461, 9250410801167727201, 18446744073709551615, 18446744073709551615, 56, 60, 56, 60, 11, 12, true, "page", "page"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14088628410271132453, 13936532626365380428, 18446744073709551615, 18446744073709551615, 111, 121, 111, 121, 21, 22, true, "properties", "properties"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8106397496085150773, 3178546719087309481, 18446744073709551615, 18446744073709551615, 127, 134, 127, 134, 24, 25, true, "example", "example"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14388065630035882329, 7808684742840080268, 18446744073709551615, 18446744073709551615, 164, 175, 164, 175, 30, 31, true, "information", "information"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 389609625741058932, 9253648583843157057, 18446744073709551615, 18446744073709551615, 200, 204, 200, 204, 37, 38, true, "size", "size"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14814126611988074969, 9583758094086294160, 18446744073709551615, 18446744073709551615, 227, 235, 227, 235, 44, 45, true, "position", "position"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14652260393507470214, 16689376904140573845, 18446744073709551615, 18446744073709551615, 252, 260, 252, 260, 50, 51, true, "distance", "distance"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 329104161531686411, 6302060538749910757, 18446744073709551615, 18446744073709551615, 283, 288, 283, 288, 54, 55, true, "cells", "cells"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14637917407223052431, 12528486671610885230, 18446744073709551615, 18446744073709551615, 420, 428, 420, 428, 81, 82, true, "fraction", "fraction"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8106478445190161533, 16793107486304810346, 18446744073709551615, 18446744073709551615, 481, 488, 481, 488, 91, 92, true, "results", "results"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206590740615814, 12391907012289685737, 18446744073709551615, 18446744073709551615, 637, 643, 637, 643, 113, 114, true, "labels", "labels"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 6287765270733427081, 3171255276160848390, 18446744073709551615, 18446744073709551615, 651, 664, 651, 664, 116, 117, true, "neighbourhood", "neighbourhood"], ["verb", "compound-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 17539759121844497705, 10937437759161684631, 18446744073709551615, 18446744073709551615, 25, 38, 25, 38, 4, 7, true, "is applied to", "is applied to"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 329104159219515955, 5948276622309129008, 18446744073709551615, 18446744073709551615, 61, 66, 61, 66, 12, 13, true, "based", "based"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 9321541094732601010, 1440697959529725234, 18446744073709551615, 18446744073709551615, 87, 99, 87, 99, 17, 18, true, "representing", "representing"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14652282307552191074, 8945592419970363175, 18446744073709551615, 18446744073709551615, 155, 163, 155, 163, 29, 30, true, "contains", "contains"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 819608854126323397, 13629426058076355470, 18446744073709551615, 18446744073709551615, 270, 282, 270, 282, 53, 54, true, "neighbouring", "neighbouring"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 389609625632445688, 9250410036172686662, 18446744073709551615, 18446744073709551615, 306, 310, 306, 310, 58, 59, true, "pure", "pure"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8106398345764800179, 11692781860860359624, 18446744073709551615, 18446744073709551615, 338, 345, 338, 345, 62, 63, true, "include", "include"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8106398106568099440, 984708905024410978, 18446744073709551615, 18446744073709551615, 460, 467, 460, 467, 88, 89, true, "improve", "improve"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14814126654807168093, 9749940858670701320, 18446744073709551615, 18446744073709551615, 472, 480, 472, 480, 90, 91, true, "obtained", "obtained"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 13928988056851964444, 2695484601897645328, 18446744073709551615, 18446744073709551615, 492, 502, 492, 502, 93, 94, true, "performing", "performing"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8106342542940968443, 11775695301087073544, 18446744073709551615, 18446744073709551615, 565, 572, 565, 572, 103, 104, true, "operate", "operate"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 6182652448619835769, 17369145016370015604, 18446744073709551615, 18446744073709551615, 602, 611, 602, 611, 109, 110, true, "including", "including"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 6184954633443293966, 11039243749541224362, 18446744073709551615, 18446744073709551615, 627, 636, 627, 636, 112, 113, true, "predicted", "predicted"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206565712212855, 12155489767757724071, 18446744073709551615, 18446744073709551615, 49, 55, 49, 55, 9, 11, true, "of the", "of the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 389609625618762887, 9250465343613782814, 18446744073709551615, 18446744073709551615, 67, 71, 67, 71, 13, 15, true, "on a", "on a"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206569373536144, 16646083731239545289, 18446744073709551615, 18446744073709551615, 100, 106, 100, 106, 18, 20, true, "all of", "all of"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 12178341415896108722, 6936029374485055754, 18446744073709551615, 18446744073709551615, 123, 126, 123, 126, 23, 24, true, "For", "For"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206568455155979, 16651545914296571149, 18446744073709551615, 18446744073709551615, 176, 182, 176, 182, 31, 33, true, "as the", "as the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206565712212855, 12155489767757717168, 18446744073709551615, 18446744073709551615, 205, 211, 205, 211, 38, 40, true, "of the", "of the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206568455155979, 16651545914296575484, 18446744073709551615, 18446744073709551615, 245, 251, 245, 251, 48, 50, true, "as the", "as the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14637917359887717745, 6508784519982619302, 18446744073709551615, 18446744073709551615, 261, 269, 261, 269, 51, 53, true, "from the", "from the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206568455155979, 16651545914296653810, 18446744073709551615, 18446744073709551615, 413, 419, 413, 419, 79, 81, true, "as the", "as the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 15441160910541485670, 2853352555985182029, 18446744073709551615, 18446744073709551615, 429, 431, 429, 431, 82, 83, true, "of", "of"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 15441160910541486989, 2853352954143451202, 18446744073709551615, 18446744073709551615, 489, 491, 489, 491, 92, 93, true, "by", "by"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 389609625618037948, 9250626276987800313, 18446744073709551615, 18446744073709551615, 525, 529, 525, 529, 96, 97, true, "with", "with"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 329104161572724641, 6270983890392409045, 18446744073709551615, 18446744073709551615, 573, 578, 573, 578, 104, 106, true, "on an", "on an"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206565712212855, 12155489767757875831, 18446744073709551615, 18446744073709551615, 644, 650, 644, 650, 114, 116, true, "of the", "of the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 15388840276945242407, 564998623247738541, 18446744073709551615, 18446744073709551615, 665, 675, 665, 675, 117, 119, true, "around the", "around the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8106351192289801590, 15045507645878621842, 18446744073709551615, 18446744073709551615, 36, 43, 36, 43, 6, 8, true, "to each", "to each"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 15441160910541485865, 2853352555402880464, 18446744073709551615, 18446744073709551615, 303, 305, 303, 305, 57, 58, true, "to", "to"], ["sentence", "", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 6373769167897665877, 13936013081570568770, 18446744073709551615, 18446744073709551615, 0, 84, 0, 84, 0, 15, true, "It is important to realize that almost all of these features are purely geometrical.", "It is important to realize that almost all of these features are purely geometrical."], ["sentence", "", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 13043292274507419925, 9292264259899558631, 18446744073709551615, 18446744073709551615, 85, 198, 85, 198, 15, 34, true, "This allows us to apply exactly the same machine learning methods on both scanned and programmatic PDF documents.", "This allows us to apply exactly the same machine learning methods on both scanned and programmatic PDF documents."], ["term", "single-term", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 4047408680480058129, 791889066255431706, 18446744073709551615, 18446744073709551615, 121, 133, 121, 133, 22, 24, true, "same machine", "same machine"], ["term", "single-term", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 1482873404926828774, 10226222577235274804, 18446744073709551615, 18446744073709551615, 171, 197, 171, 197, 30, 33, true, "programmatic PDF documents", "programmatic PDF documents"], ["term", "single-term", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 14637918593917529467, 8112106004831142076, 18446744073709551615, 18446744073709551615, 52, 60, 52, 60, 10, 11, true, "features", "features"], ["term", "single-term", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 8106464574531629743, 10962337505744550461, 18446744073709551615, 18446744073709551615, 143, 150, 143, 150, 25, 26, true, "methods", "methods"], ["verb", "compound-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 15388942013532414882, 7899202709814799236, 18446744073709551615, 18446744073709551615, 61, 71, 61, 71, 11, 13, true, "are purely", "are purely"], ["verb", "compound-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 3371194906505970753, 16420304959018069903, 18446744073709551615, 18446744073709551615, 103, 116, 103, 116, 19, 21, true, "apply exactly", "apply exactly"], ["verb", "single-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 15441160910541486535, 11324533273861188960, 18446744073709551615, 18446744073709551615, 3, 5, 3, 5, 1, 2, true, "is", "is"], ["verb", "single-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 8106478449187824165, 11112254232972886588, 18446744073709551615, 18446744073709551615, 19, 26, 19, 26, 4, 5, true, "realize", "realize"], ["verb", "single-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 16381206569317834029, 9402022932348997822, 18446744073709551615, 18446744073709551615, 90, 96, 90, 96, 16, 17, true, "allows", "allows"], ["verb", "single-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 14639581097006750428, 9268359803668835067, 18446744073709551615, 18446744073709551615, 134, 142, 134, 142, 24, 25, true, "learning", "learning"], ["verb", "single-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 8106478648743879659, 9110333708159415479, 18446744073709551615, 18446744073709551615, 159, 166, 159, 166, 28, 29, true, "scanned", "scanned"], ["conn", "single-conn", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 389609625631229034, 15586907952937505973, 18446744073709551615, 18446744073709551615, 27, 31, 27, 31, 5, 6, true, "that", "that"], ["conn", "single-conn", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 7969468038485950075, 10183770825831524298, 18446744073709551615, 18446744073709551615, 39, 51, 39, 51, 7, 10, true, "all of these", "all of these"], ["conn", "single-conn", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 8106342614190349012, 9383049340028722314, 18446744073709551615, 18446744073709551615, 151, 158, 151, 158, 26, 28, true, "on both", "on both"], ["conn", "single-conn", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 15441160910541485865, 11324533292921284119, 18446744073709551615, 18446744073709551615, 16, 18, 16, 18, 3, 4, true, "to", "to"], ["conn", "single-conn", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 15441160910541485865, 11324533292921287046, 18446744073709551615, 18446744073709551615, 100, 102, 100, 102, 18, 19, true, "to", "to"], ["numval", "ival", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 17767354399704235162, 5756427125969625434, 18446744073709551615, 18446744073709551615, 9, 10, 9, 10, 2, 3, true, "2", "2"], ["numval", "ival", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 12178341415896426714, 2843618427898669081, 18446744073709551615, 18446744073709551615, 145, 148, 145, 148, 25, 26, true, "100", "100"], ["numval", "ival", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 12178341415896306457, 2843616608679996346, 18446744073709551615, 18446744073709551615, 182, 185, 182, 185, 30, 31, true, "400", "400"], ["numval", "ival", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 17767354399704235158, 5756427126305291768, 18446744073709551615, 18446744073709551615, 205, 206, 205, 206, 35, 36, true, "6", "6"], ["numval", "ival", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 17767354399704235162, 5756427125969610002, 18446744073709551615, 18446744073709551615, 231, 232, 231, 232, 40, 41, true, "2", "2"], ["expression", "word-concatenation", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 17169426656242239826, 9947433032897910154, 18446744073709551615, 18446744073709551615, 149, 160, 149, 160, 26, 27, true, "open-access", "open-access"], ["expression", "wtoken-concatenation", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206535642250146, 7407819848928651078, 18446744073709551615, 18446744073709551615, 117, 125, 117, 125, 20, 21, true, "B^{12}", "B$^{12}$"], ["expression", "wtoken-concatenation", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 12178341415896195376, 2843610961077514238, 18446744073709551615, 18446744073709551615, 458, 461, 458, 461, 81, 82, true, "99%", "99%"], ["sentence", "", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16985873230757285690, 4799033660507198173, 18446744073709551615, 18446744073709551615, 0, 126, 0, 126, 0, 22, true, "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$.", "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$."], ["sentence", "", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 9489295008065044229, 10701823520823813640, 18446744073709551615, 18446744073709551615, 127, 223, 127, 223, 22, 39, true, "We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels.", "We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels."], ["sentence", "", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 9055398559525017790, 8115071261852986143, 18446744073709551615, 18446744073709551615, 224, 369, 224, 369, 39, 65, true, "Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label.", "Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label."], ["sentence", "", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 9522204688480473546, 14969526070984716859, 18446744073709551615, 18446744073709551615, 370, 462, 370, 462, 65, 83, true, "We observe that the recall and precision numbers are excellent, with most of them above 99%.", "We observe that the recall and precision numbers are excellent, with most of them above 99%."], ["sentence", "", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 1074921700746209986, 10442039656480201719, 18446744073709551615, 18446744073709551615, 463, 558, 463, 558, 83, 100, true, "This is not surprising, since we are building models that specialise for a particular template.", "This is not surprising, since we are building models that specialise for a particular template."], ["term", "enum-term-mark-2", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 11037453576911667853, 1588379900983990272, 18446744073709551615, 18446744073709551615, 325, 345, 325, 345, 57, 60, true, "recall and precision", "recall and precision"], ["term", "enum-term-mark-2", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 11037453576911667853, 1588379900983994608, 18446744073709551615, 18446744073709551615, 390, 410, 390, 410, 69, 72, true, "recall and precision", "recall and precision"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 7309351122725453953, 337257349231081751, 18446744073709551615, 18446744073709551615, 30, 49, 30, 49, 7, 9, true, "performance results", "performance results"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16684308181841452846, 17463669784208672593, 18446744073709551615, 18446744073709551615, 70, 99, 70, 99, 14, 17, true, "particular scientific journal", "particular scientific journal"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16636937637740271145, 5405861143207605185, 18446744073709551615, 18446744073709551615, 101, 125, 101, 125, 18, 21, true, "Physical Review B^{12}", "Physical Review B$^{12}$"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 3970679774622312652, 3451308206383338191, 18446744073709551615, 18446744073709551615, 149, 167, 149, 167, 26, 28, true, "open-access papers", "open-access papers"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 17144395416522725511, 2403796765467765884, 18446744073709551615, 18446744073709551615, 207, 222, 207, 222, 36, 38, true, "semantic labels", "semantic labels"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 5497358094214601811, 15347237438195368414, 18446744073709551615, 18446744073709551615, 243, 259, 243, 259, 43, 45, true, "confusion matrix", "confusion matrix"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 17430011809584307966, 717352244476807441, 18446744073709551615, 18446744073709551615, 317, 331, 317, 331, 56, 58, true, "derived recall", "derived recall"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 13620323371457554126, 402277245215938637, 18446744073709551615, 18446744073709551615, 336, 353, 336, 353, 59, 61, true, "precision metrics", "precision metrics"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 13620323047369748942, 9949010308293253292, 18446744073709551615, 18446744073709551615, 401, 418, 401, 418, 71, 73, true, "precision numbers", "precision numbers"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 6423268802182337214, 8832367853516231633, 18446744073709551615, 18446744073709551615, 538, 557, 538, 557, 97, 99, true, "particular template", "particular template"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206567230470443, 6809318760293640371, 18446744073709551615, 18446744073709551615, 57, 63, 57, 63, 11, 12, true, "models", "models"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161667992688, 12417150832156993513, 18446744073709551615, 18446744073709551615, 186, 191, 186, 191, 31, 32, true, "pages", "pages"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206490439671949, 6475152685903048525, 18446744073709551615, 18446744073709551615, 224, 230, 224, 230, 39, 40, true, "Tables", "Tables"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161786359270, 12407069593524672865, 18446744073709551615, 18446744073709551615, 233, 238, 233, 238, 41, 42, true, "shows", "shows"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206590740615814, 3989685327840662497, 18446744073709551615, 18446744073709551615, 295, 301, 295, 301, 51, 52, true, "labels", "labels"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161624445793, 12403043577860159502, 18446744073709551615, 18446744073709551615, 363, 368, 363, 368, 63, 64, true, "label", "label"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206521531485437, 5908878450625800036, 18446744073709551615, 18446744073709551615, 390, 396, 390, 396, 69, 70, true, "recall", "recall"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206567230470443, 6809318760293617748, 18446744073709551615, 18446744073709551615, 509, 515, 509, 515, 92, 93, true, "models", "models"], ["verb", "compound-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206478391039341, 6631544301857728246, 18446744073709551615, 18446744073709551615, 468, 474, 468, 474, 84, 86, true, "is not", "is not"], ["verb", "compound-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 12677514500900765355, 13122361205873164518, 18446744073709551615, 18446744073709551615, 496, 508, 496, 508, 90, 92, true, "are building", "are building"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 2512422596140069222, 889649029264302336, 18446744073709551615, 18446744073709551615, 15, 25, 15, 25, 5, 6, true, "illustrate", "illustrate"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161556620669, 12424395445817823224, 18446744073709551615, 18446744073709551615, 139, 144, 139, 144, 24, 25, true, "chose", "chose"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 5946726816546568286, 13670733632578799336, 18446744073709551615, 18446744073709551615, 172, 181, 172, 181, 29, 30, true, "annotated", "annotated"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 6184954633443293966, 6350969766554330414, 18446744073709551615, 18446744073709551615, 285, 294, 285, 294, 50, 51, true, "predicted", "predicted"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 8106342033696543838, 3620688114961400856, 18446744073709551615, 18446744073709551615, 373, 380, 373, 380, 66, 67, true, "observe", "observe"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 12178341415895564896, 2843664315664361569, 18446744073709551615, 18446744073709551615, 419, 422, 419, 422, 73, 74, true, "are", "are"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 14105928813319051554, 14261583300981947149, 18446744073709551615, 18446744073709551615, 521, 531, 521, 531, 94, 95, true, "specialise", "specialise"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 15441160910541480354, 13052544442623061343, 18446744073709551615, 18446744073709551615, 0, 2, 0, 2, 0, 1, true, "In", "In"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206565712212855, 6848890042535190429, 18446744073709551615, 18446744073709551615, 50, 56, 50, 56, 9, 11, true, "of the", "of the"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161711024499, 12417932501529865029, 18446744073709551615, 18446744073709551615, 64, 69, 64, 69, 12, 14, true, "for a", "for a"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 15441160910541485670, 13052544660078805351, 18446744073709551615, 18446744073709551615, 192, 194, 192, 194, 32, 33, true, "of", "of"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 389609625618037948, 2187892054443163370, 18446744073709551615, 18446744073709551615, 200, 204, 200, 204, 34, 35, true, "with", "with"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 2011002864325523456, 4381318699110906503, 18446744073709551615, 18446744073709551615, 260, 271, 260, 271, 45, 47, true, "between the", "between the"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206568455155979, 7803994972436827427, 18446744073709551615, 18446744073709551615, 310, 316, 310, 316, 54, 56, true, "as the", "as the"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 14637917333167503367, 3527732148100938158, 18446744073709551615, 18446744073709551615, 354, 362, 354, 362, 61, 63, true, "for each", "for each"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 14634130761162415388, 6412837279061811049, 18446744073709551615, 18446744073709551615, 381, 389, 381, 389, 67, 69, true, "that the", "that the"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 389609625618037948, 2187892054439452539, 18446744073709551615, 18446744073709551615, 434, 438, 434, 438, 76, 77, true, "with", "with"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 15441160910541485670, 13052544660078698144, 18446744073709551615, 18446744073709551615, 444, 446, 444, 446, 78, 79, true, "of", "of"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104159273688418, 11826923278090038152, 18446744073709551615, 18446744073709551615, 452, 457, 452, 457, 80, 81, true, "above", "above"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161786618045, 12406057958801194093, 18446744073709551615, 18446744073709551615, 487, 492, 487, 492, 88, 89, true, "since", "since"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161711024499, 12417932501529436504, 18446744073709551615, 18446744073709551615, 532, 537, 532, 537, 95, 97, true, "for a", "for a"], ["numval", "ival", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 17767354399704235163, 3904423922360684838, 18446744073709551615, 18446744073709551615, 139, 140, 139, 140, 25, 26, true, "3", "3"], ["expression", "word-concatenation", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 16381206521481257058, 14297813622105226054, 18446744073709551615, 18446744073709551615, 250, 256, 250, 256, 49, 50, true, "re-use", "re-use"], ["expression", "word-concatenation", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 6307689511527468252, 1214499694592820290, 18446744073709551615, 18446744073709551615, 266, 282, 266, 282, 52, 53, true, "machine-learning", "machine-learning"], ["sentence", "", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 13329249844625854201, 11647195949968361009, 18446744073709551615, 18446744073709551615, 0, 201, 0, 201, 0, 38, true, "Moreover, the same ML algorithm proves to perform very well on different document templates, as is evident from the numbers shown in Table 3, simply by providing it with different datasets to train on.", "Moreover, the same ML algorithm proves to perform very well on different document templates, as is evident from the numbers shown in Table 3, simply by providing it with different datasets to train on."], ["sentence", "", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14866244958236799246, 1582241290890638298, 18446744073709551615, 18446744073709551615, 202, 390, 202, 390, 38, 71, true, "The latter is the power of our platform: we can re-use the same machine-learning algorithm to generate different models solely based on the data gathered by the annotation on the platform.", "The latter is the power of our platform: we can re-use the same machine-learning algorithm to generate different models solely based on the data gathered by the annotation on the platform."], ["sentence", "", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 7056640689505297457, 15898107189486909355, 18446744073709551615, 18446744073709551615, 391, 497, 391, 497, 71, 93, true, "We do not need to define rules and heuristics or update code in order to deal with new types of documents.", "We do not need to define rules and heuristics or update code in order to deal with new types of documents."], ["sentence", "", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 10417108364281472724, 12592678685937578318, 18446744073709551615, 18446744073709551615, 498, 531, 498, 531, 93, 101, true, "We only need to gather more data.", "We only need to gather more data."], ["term", "enum-term-mark-3", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14516628572457889571, 3134557747501443101, 18446744073709551615, 18446744073709551615, 416, 436, 416, 436, 77, 80, true, "rules and heuristics", "rules and heuristics"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14797292965502676681, 16865966694447962424, 18446744073709551615, 18446744073709551615, 14, 31, 14, 31, 3, 6, true, "same ML algorithm", "same ML algorithm"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14139930886739072217, 17176244257802466831, 18446744073709551615, 18446744073709551615, 63, 91, 63, 91, 12, 15, true, "different document templates", "different document templates"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 5449238095598290102, 787778668063090810, 18446744073709551615, 18446744073709551615, 170, 188, 170, 188, 32, 34, true, "different datasets", "different datasets"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 3259939013257966773, 11632558026542658713, 18446744073709551615, 18446744073709551615, 261, 292, 261, 292, 51, 54, true, "same machine-learning algorithm", "same machine-learning algorithm"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 220880082258103430, 3861022815522000004, 18446744073709551615, 18446744073709551615, 305, 321, 305, 321, 56, 58, true, "different models", "different models"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 6774260833957264902, 18407524815623595945, 18446744073709551615, 18446744073709551615, 440, 451, 440, 451, 81, 83, true, "update code", "update code"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 6172031744431621751, 3978488892788269949, 18446744073709551615, 18446744073709551615, 474, 483, 474, 483, 88, 90, true, "new types", "new types"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 8106352625329644634, 17757554404763683613, 18446744073709551615, 18446744073709551615, 116, 123, 116, 123, 21, 22, true, "numbers", "numbers"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 16381206590630461421, 3492035996256028050, 18446744073709551615, 18446744073709551615, 206, 212, 206, 212, 39, 40, true, "latter", "latter"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 329104161594617373, 7515263185749906232, 18446744073709551615, 18446744073709551615, 220, 225, 220, 225, 42, 43, true, "power", "power"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14814125365076808131, 3729402058695751624, 18446744073709551615, 18446744073709551615, 233, 241, 233, 241, 45, 46, true, "platform", "platform"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 389609625696431489, 7894812289861145312, 18446744073709551615, 18446744073709551615, 342, 346, 342, 346, 62, 63, true, "data", "data"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15359807916847495711, 4163871681010445755, 18446744073709551615, 18446744073709551615, 363, 373, 363, 373, 66, 67, true, "annotation", "annotation"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14814125365076808131, 3729402058695741659, 18446744073709551615, 18446744073709551615, 381, 389, 381, 389, 69, 70, true, "platform", "platform"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 329104161825278214, 8012832663492087338, 18446744073709551615, 18446744073709551615, 416, 421, 416, 421, 77, 78, true, "rules", "rules"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15990705612308896517, 3857150851002098021, 18446744073709551615, 18446744073709551615, 426, 436, 426, 436, 79, 80, true, "heuristics", "heuristics"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 329104161571401725, 7516826412201686197, 18446744073709551615, 18446744073709551615, 455, 460, 455, 460, 84, 85, true, "order", "order"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 6167933651658664291, 13301884288216877389, 18446744073709551615, 18446744073709551615, 487, 496, 487, 496, 91, 92, true, "documents", "documents"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 389609625696431489, 7894812289861139762, 18446744073709551615, 18446744073709551615, 526, 530, 526, 530, 99, 100, true, "data", "data"], ["verb", "compound-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14747077774534403201, 7105556452324242867, 18446744073709551615, 18446744073709551615, 32, 59, 32, 59, 6, 11, true, "proves to perform very well", "proves to perform very well"], ["verb", "compound-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14239158811794973922, 10315187407549061159, 18446744073709551615, 18446744073709551615, 394, 415, 394, 415, 72, 77, true, "do not need to define", "do not need to define"], ["verb", "compound-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 16971139356283959223, 11139264805031781559, 18446744073709551615, 18446744073709551615, 506, 520, 506, 520, 95, 98, true, "need to gather", "need to gather"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541486535, 15153334710149597966, 18446744073709551615, 18446744073709551615, 96, 98, 96, 98, 17, 18, true, "is", "is"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 329104161786359265, 8061425452439289620, 18446744073709551615, 18446744073709551615, 124, 129, 124, 129, 22, 23, true, "shown", "shown"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 6171748239210728040, 9366341145459489170, 18446744073709551615, 18446744073709551615, 152, 161, 152, 161, 29, 30, true, "providing", "providing"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 329104159241569908, 7610535763181481526, 18446744073709551615, 18446744073709551615, 192, 197, 192, 197, 35, 36, true, "train", "train"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541486535, 15153334710149605791, 18446744073709551615, 18446744073709551615, 213, 215, 213, 215, 40, 41, true, "is", "is"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 2873469186180050816, 16698193277509785614, 18446744073709551615, 18446744073709551615, 246, 256, 246, 256, 48, 50, true, "can re-use", "can re-use"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14639576584100401389, 15397542964574277795, 18446744073709551615, 18446744073709551615, 296, 304, 296, 304, 55, 56, true, "generate", "generate"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 329104159219515955, 7608650457402937501, 18446744073709551615, 18446744073709551615, 329, 334, 329, 334, 59, 60, true, "based", "based"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14639712996089786853, 8663919117501187161, 18446744073709551615, 18446744073709551615, 347, 355, 347, 355, 63, 64, true, "gathered", "gathered"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 389609625696287852, 7895900577300378447, 18446744073709551615, 18446744073709551615, 464, 468, 464, 468, 86, 87, true, "deal", "deal"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 3610960565523737115, 13561712172663392615, 18446744073709551615, 18446744073709551615, 99, 111, 99, 111, 18, 20, true, "evident from", "evident from"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485678, 15153334713242377590, 18446744073709551615, 18446744073709551615, 60, 62, 60, 62, 11, 12, true, "on", "on"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541487053, 15153334931560349328, 18446744073709551615, 18446744073709551615, 93, 95, 93, 95, 16, 17, true, "as", "as"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541486538, 15153334711652936703, 18446744073709551615, 18446744073709551615, 130, 132, 130, 132, 23, 24, true, "in", "in"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541486989, 15153334930388051925, 18446744073709551615, 18446744073709551615, 149, 151, 149, 151, 28, 29, true, "by", "by"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 389609625618037948, 7897898173905938343, 18446744073709551615, 18446744073709551615, 165, 169, 165, 169, 31, 32, true, "with", "with"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485670, 15153334714140199582, 18446744073709551615, 18446744073709551615, 226, 228, 226, 228, 43, 44, true, "of", "of"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 16381206566339127348, 14669148860732540835, 18446744073709551615, 18446744073709551615, 335, 341, 335, 341, 60, 62, true, "on the", "on the"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 16381206574363061705, 2766189990559691740, 18446744073709551615, 18446744073709551615, 356, 362, 356, 362, 64, 66, true, "by the", "by the"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 16381206566339127348, 14669148860732601697, 18446744073709551615, 18446744073709551615, 374, 380, 374, 380, 67, 69, true, "on the", "on the"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541486538, 15153334711652948392, 18446744073709551615, 18446744073709551615, 452, 454, 452, 454, 83, 84, true, "in", "in"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 389609625618037948, 7897898173905972427, 18446744073709551615, 18446744073709551615, 469, 473, 469, 473, 87, 88, true, "with", "with"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485670, 15153334714140216286, 18446744073709551615, 18446744073709551615, 484, 486, 484, 486, 90, 91, true, "of", "of"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485865, 15153334765432013192, 18446744073709551615, 18446744073709551615, 39, 41, 39, 41, 7, 8, true, "to", "to"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485865, 15153334765432022097, 18446744073709551615, 18446744073709551615, 189, 191, 189, 191, 34, 35, true, "to", "to"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485865, 15153334765432033275, 18446744073709551615, 18446744073709551615, 293, 295, 293, 295, 54, 55, true, "to", "to"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485865, 15153334765432040505, 18446744073709551615, 18446744073709551615, 406, 408, 406, 408, 75, 76, true, "to", "to"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485865, 15153334765432019581, 18446744073709551615, 18446744073709551615, 461, 463, 461, 463, 85, 86, true, "to", "to"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485865, 15153334765432014978, 18446744073709551615, 18446744073709551615, 511, 513, 511, 513, 96, 97, true, "to", "to"], ["numval", "fval", 10082834006373808153, "TEXT", "#/texts/63", 1.0, 12178341415896435186, 12354888335591318213, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "3.5", "3.5"], ["parenthesis", "round brackets", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 1949057018516412029, 4731319093472217309, 18446744073709551615, 18446744073709551615, 114, 127, 114, 127, 23, 27, true, "(e.g. tables)", "(e.g. tables)"], ["expression", "common", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 15441160910541487324, 7536681162710661076, 18446744073709551615, 18446744073709551615, 115, 119, 115, 119, 24, 25, true, "eg", "e.g."], ["sentence", "", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 5437557385871984946, 3509044475987188634, 18446744073709551615, 18446744073709551615, 0, 187, 0, 187, 0, 37, true, "In this component, we build a structured data file in JSON or XML format, which contains all the text and objects (e.g. tables) from the original document, retaining the layout semantics.", "In this component, we build a structured data file in JSON or XML format, which contains all the text and objects (e.g. tables) from the original document, retaining the layout semantics."], ["term", "enum-term-mark-4", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 11674491770136657522, 974643053390547455, 18446744073709551615, 18446744073709551615, 54, 65, 54, 65, 11, 14, true, "JSON or XML", "JSON or XML"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 3435211303988053560, 11739782976917057217, 18446744073709551615, 18446744073709551615, 30, 50, 30, 50, 7, 10, true, "structured data file", "structured data file"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 15966067594173682327, 4212498403530896421, 18446744073709551615, 18446744073709551615, 62, 72, 62, 72, 13, 15, true, "XML format", "XML format"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 6167884912352185035, 12959445755126332733, 18446744073709551615, 18446744073709551615, 115, 126, 115, 126, 24, 26, true, "eg tables", "e.g. tables"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 11738704476441755021, 12646170625007623252, 18446744073709551615, 18446744073709551615, 137, 154, 137, 154, 29, 31, true, "original document", "original document"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 8089093614662532807, 16534350903274590994, 18446744073709551615, 18446744073709551615, 170, 186, 170, 186, 34, 36, true, "layout semantics", "layout semantics"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 5947879501615734370, 16723148886486918663, 18446744073709551615, 18446744073709551615, 8, 17, 8, 17, 2, 3, true, "component", "component"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 389609625541450799, 17523067649600904691, 18446744073709551615, 18446744073709551615, 54, 58, 54, 58, 11, 12, true, "JSON", "JSON"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 389609625631325904, 17516519717884761463, 18446744073709551615, 18446744073709551615, 97, 101, 97, 101, 20, 21, true, "text", "text"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 8106342034010873556, 2969114644928140757, 18446744073709551615, 18446744073709551615, 106, 113, 106, 113, 22, 23, true, "objects", "objects"], ["verb", "single-verb", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 329104159303279946, 1921849107251509286, 18446744073709551615, 18446744073709551615, 22, 27, 22, 27, 5, 6, true, "build", "build"], ["verb", "single-verb", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 14652282307552191074, 8217532366089348940, 18446744073709551615, 18446744073709551615, 80, 88, 80, 88, 17, 18, true, "contains", "contains"], ["verb", "single-verb", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 6168253838748177623, 17899337151654565740, 18446744073709551615, 18446744073709551615, 156, 165, 156, 165, 32, 33, true, "retaining", "retaining"], ["conn", "single-conn", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 8106396862006371970, 13010212846614719649, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 2, true, "In this", "In this"], ["conn", "single-conn", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 15441160910541486538, 7536681282669100229, 18446744073709551615, 18446744073709551615, 51, 53, 51, 53, 10, 11, true, "in", "in"], ["conn", "single-conn", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 14637917359887717745, 313033658418924384, 18446744073709551615, 18446744073709551615, 128, 136, 128, 136, 27, 29, true, "from the", "from the"], ["numval", "ival", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 17767354399704235161, 5588649276010093912, 18446744073709551615, 18446744073709551615, 8, 9, 8, 9, 1, 2, true, "1", "1"], ["sentence", "", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 4079383948124449940, 3119139511864959531, 18446744073709551615, 18446744073709551615, 0, 104, 0, 104, 0, 19, true, "Listing 1: Excerpt from the JSON output of the Corpus Conversion Service after conversion of this paper.", "Listing 1: Excerpt from the JSON output of the Corpus Conversion Service after conversion of this paper."], ["term", "single-term", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 11674491770136880709, 5217965483515470753, 18446744073709551615, 18446744073709551615, 28, 39, 28, 39, 6, 8, true, "JSON output", "JSON output"], ["term", "single-term", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 12638008641667971393, 2522219315756212794, 18446744073709551615, 18446744073709551615, 47, 72, 47, 72, 10, 13, true, "Corpus Conversion Service", "Corpus Conversion Service"], ["term", "single-term", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 2703018679320364082, 2865230855669483164, 18446744073709551615, 18446744073709551615, 79, 89, 79, 89, 14, 15, true, "conversion", "conversion"], ["term", "single-term", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 329104161668023890, 9676341964876116743, 18446744073709551615, 18446744073709551615, 98, 103, 98, 103, 17, 18, true, "paper", "paper"], ["verb", "single-verb", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 8106471806274607440, 321004264845765781, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 1, true, "Listing", "Listing"], ["conn", "single-conn", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 14637917359887717745, 10704083857127895113, 18446744073709551615, 18446744073709551615, 19, 27, 19, 27, 4, 6, true, "from the", "from the"], ["conn", "single-conn", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 16381206565712212855, 12911931045271253420, 18446744073709551615, 18446744073709551615, 40, 46, 40, 46, 8, 10, true, "of the", "of the"], ["conn", "single-conn", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 329104159268432372, 9679527653046328826, 18446744073709551615, 18446744073709551615, 73, 78, 73, 78, 13, 14, true, "after", "after"], ["conn", "single-conn", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 8106342927224204628, 535620869506699266, 18446744073709551615, 18446744073709551615, 90, 97, 90, 97, 15, 17, true, "of this", "of this"], ["numval", "fval", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 16380809986263074798, 5937101393416439947, 18446744073709551615, 18446744073709551615, 395, 401, 389, 395, 102, 103, true, "52.304", "52.304"], ["numval", "fval", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8104408040055799238, 2943403399868708284, 18446744073709551615, 18446744073709551615, 403, 410, 397, 404, 104, 105, true, "509.750", "509.750"], ["numval", "fval", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8104408102331224046, 4490268864724477756, 18446744073709551615, 18446744073709551615, 412, 419, 406, 413, 106, 107, true, "168.099", "168.099"], ["numval", "fval", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8104407293157033488, 7192866711925861658, 18446744073709551615, 18446744073709551615, 421, 428, 415, 422, 108, 109, true, "523.980", "523.980"], ["numval", "fval", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 16380809986263074798, 5937101393416465084, 18446744073709551615, 18446744073709551615, 530, 536, 524, 530, 149, 150, true, "52.304", "52.304"], ["numval", "fval", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8104407374744654892, 16042562905739309700, 18446744073709551615, 18446744073709551615, 538, 545, 532, 539, 151, 152, true, "337.678", "337.678"], ["numval", "fval", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8104408552830081932, 18196868546913757514, 18446744073709551615, 18446744073709551615, 547, 554, 541, 548, 153, 154, true, "286.067", "286.067"], ["numval", "fval", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8104407369288224173, 13969194233940722473, 18446744073709551615, 18446744073709551615, 556, 563, 550, 557, 155, 156, true, "380.475", "380.475"], ["numval", "ival", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 17767354399704235161, 18295152010191180304, 18446744073709551615, 18446744073709551615, 440, 441, 434, 435, 115, 116, true, "1", "1"], ["numval", "ival", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 17767354399704235161, 18295152010191236684, 18446744073709551615, 18446744073709551615, 486, 487, 480, 481, 132, 133, true, "1", "1"], ["numval", "ival", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 17767354399704235161, 18295152010191238436, 18446744073709551615, 18446744073709551615, 575, 576, 569, 570, 162, 163, true, "1", "1"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 11736687447036233654, 17687067960907041877, 18446744073709551615, 18446744073709551615, 2, 16, 2, 16, 1, 4, true, "'description '", "'description '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 14654356831728624299, 18254855415209487570, 18446744073709551615, 18446744073709551615, 20, 28, 20, 28, 6, 9, true, "'title '", "'title '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 13795373644677474927, 18104819000623483187, 18446744073709551615, 18446744073709551615, 124, 135, 122, 133, 29, 32, true, "'abstract '", "'abstract '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 10719594227580180780, 14421532022078249720, 18446744073709551615, 18446744073709551615, 213, 228, 209, 224, 50, 53, true, "'affiliations '", "'affiliations '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 575770324487261834, 3605258785962640201, 18446744073709551615, 18446744073709551615, 272, 282, 268, 278, 62, 65, true, "'authors '", "'authors '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 7381608760917382202, 10883295277425239735, 18446744073709551615, 18446744073709551615, 356, 368, 350, 362, 85, 88, true, "'main-text '", "'main-text '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8106340713288551065, 3107944983165031366, 18446744073709551615, 18446744073709551615, 373, 380, 367, 374, 91, 94, true, "'prov '", "'prov '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8106340710974510622, 3199816550281400145, 18446744073709551615, 18446744073709551615, 385, 392, 379, 386, 97, 100, true, "'bbox '", "'bbox '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8106340713016800444, 3157500034515162704, 18446744073709551615, 18446744073709551615, 431, 438, 425, 432, 111, 114, true, "'page '", "'page '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8106340478591275654, 9077972039930584664, 18446744073709551615, 18446744073709551615, 446, 453, 440, 447, 119, 122, true, "'type '", "'type '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 15831486378781541, 6042498588803505873, 18446744073709551615, 18446744073709551615, 455, 474, 449, 468, 123, 126, true, "'subtitle-level-1 '", "'subtitle-level-1 '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8106340479380661033, 510261449979103449, 18446744073709551615, 18446744073709551615, 476, 483, 470, 477, 127, 130, true, "'text '", "'text '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8106340713288551065, 3107944983165120697, 18446744073709551615, 18446744073709551615, 508, 515, 502, 509, 138, 141, true, "'prov '", "'prov '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8106340710974510622, 3199816550281359091, 18446744073709551615, 18446744073709551615, 520, 527, 514, 521, 144, 147, true, "'bbox '", "'bbox '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8106340713016800444, 3157500034515154295, 18446744073709551615, 18446744073709551615, 566, 573, 560, 567, 158, 161, true, "'page '", "'page '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8106340478591275654, 9077972039930591696, 18446744073709551615, 18446744073709551615, 581, 588, 575, 582, 166, 169, true, "'type '", "'type '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 10766821071270484704, 14839301776352559560, 18446744073709551615, 18446744073709551615, 590, 602, 584, 596, 170, 173, true, "'paragraph '", "'paragraph '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8106340479380661033, 510261449979111737, 18446744073709551615, 18446744073709551615, 604, 611, 598, 605, 174, 177, true, "'text '", "'text '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 10272324097281288742, 10872858060505935110, 18446744073709551615, 18446744073709551615, 675, 684, 669, 678, 195, 198, true, "'tables '", "'tables '"], ["quote", "quote", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 10268666146106372529, 13850873794523955762, 18446744073709551615, 18446744073709551615, 699, 708, 693, 702, 201, 204, true, "'images '", "'images '"], ["parenthesis", "square brackets", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 329104147602500341, 9083963063919851072, 18446744073709551615, 18446744073709551615, 199, 204, 197, 202, 45, 46, true, "[...]", "[...]"], ["parenthesis", "square brackets", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 15056564897104309604, 11093721742743237784, 18446744073709551615, 18446744073709551615, 394, 429, 388, 423, 101, 110, true, "[52.304, 509.750, 168.099, 523.980]", "[52.304, 509.750, 168.099, 523.980]"], ["parenthesis", "square brackets", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 17333147805298038874, 15591114653425531874, 18446744073709551615, 18446744073709551615, 529, 564, 523, 558, 148, 157, true, "[52.304, 337.678, 286.067, 380.475]", "[52.304, 337.678, 286.067, 380.475]"], ["parenthesis", "square brackets", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 329104147602500341, 9083963063919593431, 18446744073709551615, 18446744073709551615, 635, 640, 629, 634, 183, 184, true, "[...]", "[...]"], ["parenthesis", "square brackets", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 12487634319941207155, 12405585016679207624, 18446744073709551615, 18446744073709551615, 686, 697, 680, 691, 199, 200, true, "[{...},...]", "[{...},...]"], ["parenthesis", "square brackets", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 12487634319941207155, 12405585016679253918, 18446744073709551615, 18446744073709551615, 710, 721, 704, 715, 205, 206, true, "[{...},...]", "[{...},...]"], ["parenthesis", "square brackets", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 17837765232122465801, 16060040637329888456, 18446744073709551615, 18446744073709551615, 382, 444, 376, 438, 95, 118, true, "[{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }]", "[{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }]"], ["parenthesis", "square brackets", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 621801424787223927, 15343265756874626607, 18446744073709551615, 18446744073709551615, 517, 579, 511, 573, 142, 165, true, "[{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }]", "[{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }]"], ["parenthesis", "square brackets", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 590157718694844200, 8757219046110088769, 18446744073709551615, 18446744073709551615, 370, 673, 364, 667, 89, 194, true, "[{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...]", "[{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...]"], ["expression", "common", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 12178341415895450733, 31485684207307624, 18446744073709551615, 18446744073709551615, 669, 672, 663, 666, 192, 193, true, "etc", "..."], ["expression", "common", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 12178341415895450733, 31485684207307624, 18446744073709551615, 18446744073709551615, 669, 672, 663, 666, 192, 193, true, "etc", "..."], ["expression", "wtoken-concatenation", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 329104147688259688, 9079636758847877802, 18446744073709551615, 18446744073709551615, 199, 204, 197, 202, 45, 46, true, "[etc]", "[etc]"], ["expression", "wtoken-concatenation", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 6179391932534576043, 11778382618801418834, 18446744073709551615, 18446744073709551615, 357, 366, 351, 360, 86, 87, true, "main-text", "main-text"], ["expression", "wtoken-concatenation", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 201465026221884313, 4446366458756758572, 18446744073709551615, 18446744073709551615, 456, 472, 450, 466, 124, 125, true, "subtitle-level-1", "subtitle-level-1"], ["expression", "wtoken-concatenation", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 329104147688259688, 9079636758847848121, 18446744073709551615, 18446744073709551615, 635, 640, 629, 634, 183, 184, true, "[etc]", "[etc]"], ["expression", "wtoken-concatenation", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 3719221080692272657, 10124958637147004614, 18446744073709551615, 18446744073709551615, 686, 697, 680, 691, 199, 200, true, "[{etc},etc]", "[{etc},etc]"], ["expression", "wtoken-concatenation", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 3719221080692272657, 10124958637146997952, 18446744073709551615, 18446744073709551615, 710, 721, 704, 715, 205, 206, true, "[{etc},etc]", "[{etc},etc]"], ["sentence", "", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 12747379470653179806, 13947797496153661718, 18446744073709551615, 18446744073709551615, 2, 115, 2, 115, 1, 25, true, "'description ': { 'title ': 'Corpus Conversion Service: A machine learning platform to ingest documents at scale.", "'description ': { 'title ': 'Corpus Conversion Service: A machine learning platform to ingest documents at scale."], ["term", "single-term", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 12638008641667971393, 9787328837340625402, 18446744073709551615, 18446744073709551615, 31, 56, 31, 56, 11, 14, true, "Corpus Conversion Service", "Corpus Conversion Service"], ["term", "single-term", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 1569117581610251126, 11518170912874304704, 18446744073709551615, 18446744073709551615, 3, 14, 3, 14, 2, 3, true, "description", "description"], ["term", "single-term", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 329104159220026466, 8933668979039903764, 18446744073709551615, 18446744073709551615, 21, 26, 21, 26, 7, 8, true, "title", "title"], ["term", "single-term", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8106464587473865376, 11926086110051212520, 18446744073709551615, 18446744073709551615, 60, 67, 60, 67, 16, 17, true, "machine", "machine"], ["term", "single-term", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 14814125365076808131, 15564610995631240518, 18446744073709551615, 18446744073709551615, 77, 85, 77, 85, 18, 19, true, "platform", "platform"], ["term", "single-term", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 6167933651658664291, 14499880107689901366, 18446744073709551615, 18446744073709551615, 96, 105, 96, 105, 21, 22, true, "documents", "documents"], ["term", "single-term", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 329104161785194305, 10362495811380044883, 18446744073709551615, 18446744073709551615, 109, 114, 109, 114, 23, 24, true, "scale", "scale"], ["verb", "single-verb", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 14639581097006750428, 657388776835985868, 18446744073709551615, 18446744073709551615, 68, 76, 68, 76, 17, 18, true, "learning", "learning"], ["verb", "single-verb", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 16381206560503286032, 6900030997592563197, 18446744073709551615, 18446744073709551615, 89, 95, 89, 95, 20, 21, true, "ingest", "ingest"], ["conn", "single-conn", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 15441160910541487054, 13499006115035219017, 18446744073709551615, 18446744073709551615, 106, 108, 106, 108, 22, 23, true, "at", "at"], ["conn", "single-conn", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 15441160910541485865, 13499010106333859091, 18446744073709551615, 18446744073709551615, 86, 88, 86, 88, 19, 20, true, "to", "to"], ["geoloc", "country", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 2664439525053388608, 9116367361930621434, 18446744073709551615, 18446744073709551615, 257, 268, 253, 264, 59, 60, true, "Switzerland", "Switzerland"], ["parenthesis", "round brackets", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 11589998698201685701, 2108045663283293889, 18446744073709551615, 18446744073709551615, 47, 67, 47, 67, 6, 12, true, "(or human-annotated)", "(or human-annotated)"], ["sentence", "", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 3678194766815209883, 5187453010508481258, 18446744073709551615, 18446744073709551615, 92, 162, 92, 162, 16, 30, true, "It should be noted that no machine learning is used in this component.", "It should be noted that no machine learning is used in this component."], ["sentence", "", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 6629453114376390697, 8094247588633397965, 18446744073709551615, 18446744073709551615, 163, 226, 163, 226, 30, 40, true, "It is purely rule based and therefore completely deterministic.", "It is purely rule based and therefore completely deterministic."], ["term", "single-term", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 16568806906391567217, 17793791609084484746, 18446744073709551615, 18446744073709551615, 119, 135, 119, 135, 22, 24, true, "machine learning", "machine learning"], ["term", "single-term", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 5947879501615734370, 3445417967466009645, 18446744073709551615, 18446744073709551615, 152, 161, 152, 161, 28, 29, true, "component", "component"], ["term", "single-term", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 389609625633008101, 12949720961570224958, 18446744073709551615, 18446744073709551615, 176, 180, 176, 180, 33, 34, true, "rule", "rule"], ["verb", "compound-verb", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 10453859466047522884, 10239611338580811250, 18446744073709551615, 18446744073709551615, 95, 110, 95, 110, 17, 20, true, "should be noted", "should be noted"], ["verb", "compound-verb", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 8106398132977396513, 9427955354524457620, 18446744073709551615, 18446744073709551615, 136, 143, 136, 143, 24, 26, true, "is used", "is used"], ["verb", "compound-verb", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 6181919770894982462, 17141845908897276483, 18446744073709551615, 18446744073709551615, 166, 175, 166, 175, 31, 33, true, "is purely", "is purely"], ["verb", "single-verb", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 329104159219515955, 12104426966588498612, 18446744073709551615, 18446744073709551615, 181, 186, 181, 186, 34, 35, true, "based", "based"], ["conn", "single-conn", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 8106351186178321347, 18145291139271698703, 18446744073709551615, 18446744073709551615, 111, 118, 111, 118, 20, 22, true, "that no", "that no"], ["conn", "single-conn", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 8106398107541152403, 17574223839716805875, 18446744073709551615, 18446744073709551615, 144, 151, 144, 151, 26, 28, true, "in this", "in this"], ["numval", "ival", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 17767354399704235161, 5543555095985442958, 18446744073709551615, 18446744073709551615, 528, 529, 528, 529, 97, 98, true, "1", "1"], ["sentence", "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 10627520535034650380, 17531239345629359200, 18446744073709551615, 18446744073709551615, 0, 41, 0, 41, 0, 9, true, "The assembly phase is a two step process.", "The assembly phase is a two step process."], ["sentence", "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16727770016948924314, 14992801873823104190, 18446744073709551615, 18446744073709551615, 42, 161, 42, 161, 9, 30, true, "First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order.", "First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order."], ["sentence", "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 14844075509771675718, 14837722875086120924, 18446744073709551615, 18446744073709551615, 162, 263, 162, 263, 30, 50, true, "Then, the text of all cells that have the same label is contracted into a temporary document objects.", "Then, the text of all cells that have the same label is contracted into a temporary document objects."], ["sentence", "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 13321150786190303145, 15098908199975296360, 18446744073709551615, 18446744073709551615, 264, 386, 264, 386, 50, 72, true, "Third, we build the internal structure of the temporary document objects, based on the information provided by the models.", "Third, we build the internal structure of the temporary document objects, based on the information provided by the models."], ["sentence", "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 13420423012039011390, 1170303508595995079, 18446744073709551615, 18446744073709551615, 387, 467, 387, 467, 72, 86, true, "The latter is only applicable for internally structured objects, such as tables.", "The latter is only applicable for internally structured objects, such as tables."], ["sentence", "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 12664594763449438938, 4431678268083815697, 18446744073709551615, 18446744073709551615, 468, 530, 468, 530, 86, 99, true, "An example of the generated JSON output is shown in Listing 1.", "An example of the generated JSON output is shown in Listing 1."], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 14290303081280478932, 16534232039347859570, 18446744073709551615, 18446744073709551615, 4, 18, 4, 18, 1, 3, true, "assembly phase", "assembly phase"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 17347109100190648605, 3336910274907778664, 18446744073709551615, 18446744073709551615, 28, 40, 28, 40, 6, 8, true, "step process", "step process"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 2317020437411802284, 2943170210053648815, 18446744073709551615, 18446744073709551615, 97, 118, 97, 118, 19, 22, true, "layout semantic label", "layout semantic label"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 15944815540688621742, 12538918597954147758, 18446744073709551615, 18446744073709551615, 204, 214, 204, 214, 40, 42, true, "same label", "same label"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16002692145973620163, 4639543490213745049, 18446744073709551615, 18446744073709551615, 236, 262, 236, 262, 46, 49, true, "temporary document objects", "temporary document objects"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 10566132640081128, 4599927001618331381, 18446744073709551615, 18446744073709551615, 284, 302, 284, 302, 55, 57, true, "internal structure", "internal structure"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16002692145973620163, 4639543490213757407, 18446744073709551615, 18446744073709551615, 310, 336, 310, 336, 59, 62, true, "temporary document objects", "temporary document objects"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 7430002429723240008, 11164050789656870747, 18446744073709551615, 18446744073709551615, 486, 507, 486, 507, 90, 93, true, "generated JSON output", "generated JSON output"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 8106397416725855571, 6968734469406140499, 18446744073709551615, 18446744073709551615, 53, 60, 53, 60, 12, 13, true, "gathers", "gathers"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104161531686411, 14389223814653826808, 18446744073709551615, 18446744073709551615, 69, 74, 69, 74, 15, 16, true, "cells", "cells"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104161571401725, 14130794494432512208, 18446744073709551615, 18446744073709551615, 155, 160, 155, 160, 28, 29, true, "order", "order"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 389609625631325904, 11472131617453103029, 18446744073709551615, 18446744073709551615, 172, 176, 172, 176, 33, 34, true, "text", "text"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104161531686411, 14389223814653817543, 18446744073709551615, 18446744073709551615, 184, 189, 184, 189, 36, 37, true, "cells", "cells"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104161844229707, 14389961847775103244, 18446744073709551615, 18446744073709551615, 264, 269, 264, 269, 50, 51, true, "Third", "Third"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 14388065630035882329, 10056850550847032004, 18446744073709551615, 18446744073709551615, 351, 362, 351, 362, 66, 67, true, "information", "information"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206567230470443, 12055872324404544162, 18446744073709551615, 18446744073709551615, 379, 385, 379, 385, 70, 71, true, "models", "models"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206590630461421, 13437572129232177666, 18446744073709551615, 18446744073709551615, 391, 397, 391, 397, 73, 74, true, "latter", "latter"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 8106342034010873556, 5767697418284233272, 18446744073709551615, 18446744073709551615, 443, 450, 443, 450, 80, 81, true, "objects", "objects"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206513098478539, 14656247513331790784, 18446744073709551615, 18446744073709551615, 460, 466, 460, 466, 84, 85, true, "tables", "tables"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 8106397496085150773, 7053203505372722327, 18446744073709551615, 18446744073709551615, 471, 478, 471, 478, 87, 88, true, "example", "example"], ["verb", "compound-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 17902514739826327922, 7529083656148566052, 18446744073709551615, 18446744073709551615, 134, 154, 134, 154, 25, 28, true, "according to reading", "according to reading"], ["verb", "compound-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 12000496086994902479, 13430903801362966440, 18446744073709551615, 18446744073709551615, 215, 228, 215, 228, 42, 44, true, "is contracted", "is contracted"], ["verb", "compound-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 8106398132970509785, 7717436398183375812, 18446744073709551615, 18446744073709551615, 398, 405, 398, 405, 74, 76, true, "is only", "is only"], ["verb", "compound-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 14637951881518043285, 9028879385327672482, 18446744073709551615, 18446744073709551615, 508, 516, 508, 516, 93, 95, true, "is shown", "is shown"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 15441160910541486535, 10491326776662798407, 18446744073709551615, 18446744073709551615, 19, 21, 19, 21, 3, 4, true, "is", "is"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 5615021626537608757, 17464799388347342780, 18446744073709551615, 18446744073709551615, 86, 96, 86, 96, 18, 19, true, "associated", "associated"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104161786092648, 14156877157946491894, 18446744073709551615, 18446744073709551615, 123, 128, 123, 128, 23, 24, true, "sorts", "sorts"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 389609625695387621, 11482694222264222746, 18446744073709551615, 18446744073709551615, 195, 199, 195, 199, 38, 39, true, "have", "have"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104159303279946, 14234820330716235313, 18446744073709551615, 18446744073709551615, 274, 279, 274, 279, 53, 54, true, "build", "build"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104159219515955, 13988686724284707554, 18446744073709551615, 18446744073709551615, 338, 343, 338, 343, 63, 64, true, "based", "based"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 14814125838089603136, 10841486009179486453, 18446744073709551615, 18446744073709551615, 363, 371, 363, 371, 67, 68, true, "provided", "provided"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 14120356269929906423, 17410768018743515205, 18446744073709551615, 18446744073709551615, 432, 442, 432, 442, 79, 80, true, "structured", "structured"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 8106471806274607440, 1670327284070813530, 18446744073709551615, 18446744073709551615, 520, 527, 520, 527, 96, 97, true, "Listing", "Listing"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 901249285509952446, 4327457466414367608, 18446744073709551615, 18446744073709551615, 406, 420, 406, 420, 76, 78, true, "applicable for", "applicable for"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 8106478685702231057, 8652417891385661854, 18446744073709551615, 18446744073709551615, 452, 459, 452, 459, 82, 84, true, "such as", "such as"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 389609625618037948, 11467344892940421528, 18446744073709551615, 18446744073709551615, 75, 79, 75, 79, 16, 17, true, "with", "with"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206565712007226, 11723414488362140611, 18446744073709551615, 18446744073709551615, 177, 183, 177, 183, 34, 36, true, "of all", "of all"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206560517276114, 11772523745347271510, 18446744073709551615, 18446744073709551615, 229, 235, 229, 235, 44, 46, true, "into a", "into a"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206565712212855, 11847078438284722432, 18446744073709551615, 18446744073709551615, 303, 309, 303, 309, 57, 59, true, "of the", "of the"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206566339127348, 11698215495646926996, 18446744073709551615, 18446744073709551615, 344, 350, 344, 350, 64, 66, true, "on the", "on the"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206574363061705, 17340129670882622867, 18446744073709551615, 18446744073709551615, 372, 378, 372, 378, 68, 70, true, "by the", "by the"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206565712212855, 11847078438284787116, 18446744073709551615, 18446744073709551615, 479, 485, 479, 485, 88, 90, true, "of the", "of the"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 15441160910541486538, 10491326776470778829, 18446744073709551615, 18446744073709551615, 517, 519, 517, 519, 95, 96, true, "in", "in"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 15441160910541485865, 10491326711005526490, 18446744073709551615, 18446744073709551615, 144, 146, 144, 146, 26, 27, true, "to", "to"], ["numval", "ival", 2142320548375900929, "TEXT", "#/texts/69", 1.0, 17767354399704235156, 16458659285473085163, 18446744073709551615, 18446744073709551615, 0, 1, 0, 1, 0, 1, true, "4", "4"], ["expression", "word-concatenation", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6285955549867796622, 12192460564545960229, 18446744073709551615, 18446744073709551615, 618, 634, 618, 634, 111, 112, true, "time-to-solution", "time-to-solution"], ["sentence", "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 11044655914692672378, 2888733359687006370, 18446744073709551615, 18446744073709551615, 0, 123, 0, 123, 0, 22, true, "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated.", "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated."], ["sentence", "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 9774189456888168740, 4152543508246757256, 18446744073709551615, 18446744073709551615, 124, 246, 124, 246, 22, 43, true, "Before discussing the technical details, we would like to point out our requirements for the architecture of the platform.", "Before discussing the technical details, we would like to point out our requirements for the architecture of the platform."], ["sentence", "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 12407957798033762804, 13470604212648561724, 18446744073709551615, 18446744073709551615, 247, 293, 247, 293, 43, 51, true, "These requirements are all related to scaling.", "These requirements are all related to scaling."], ["sentence", "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 4653964671317425985, 17216044985232325101, 18446744073709551615, 18446744073709551615, 294, 461, 294, 461, 51, 83, true, "Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources.", "Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources."], ["sentence", "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 17228622883758304054, 15113971675963977401, 18446744073709551615, 18446744073709551615, 462, 680, 462, 680, 83, 121, true, "In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation.", "In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation."], ["sentence", "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6820290209528918513, 3633182920105543370, 18446744073709551615, 18446744073709551615, 681, 777, 681, 777, 121, 138, true, "It is clear that the architecture of such a service is heavily influenced by these requirements.", "It is clear that the architecture of such a service is heavily influenced by these requirements."], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 11289641670498948963, 4109634796027215399, 18446744073709551615, 18446744073709551615, 146, 163, 146, 163, 25, 27, true, "technical details", "technical details"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 4421383392096991748, 4820655472322214248, 18446744073709551615, 18446744073709551615, 443, 460, 443, 460, 80, 82, true, "compute resources", "compute resources"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16088126245064377604, 12842078242820415728, 18446744073709551615, 18446744073709551615, 465, 476, 465, 476, 84, 86, true, "other words", "other words"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 4421383392096991748, 4820655472321830361, 18446744073709551615, 18446744073709551615, 586, 603, 586, 603, 106, 108, true, "compute resources", "compute resources"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106478708629288965, 853306226471699405, 18446744073709551615, 18446744073709551615, 8, 15, 8, 15, 2, 3, true, "section", "section"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 990358581043194791, 2414189034056929402, 18446744073709551615, 18446744073709551615, 37, 50, 37, 50, 8, 9, true, "microservices", "microservices"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 2703018952916355661, 17317252314622786864, 18446744073709551615, 18446744073709551615, 66, 76, 66, 76, 13, 14, true, "components", "components"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14814125365076808131, 4170838424915628816, 18446744073709551615, 18446744073709551615, 84, 92, 84, 92, 16, 17, true, "platform", "platform"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 13240311013633905449, 11928407068432787608, 18446744073709551615, 18446744073709551615, 196, 208, 196, 208, 35, 36, true, "requirements", "requirements"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 11899564443746965611, 1669599917395635316, 18446744073709551615, 18446744073709551615, 217, 229, 217, 229, 38, 39, true, "architecture", "architecture"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14814125365076808131, 4170838424915634854, 18446744073709551615, 18446744073709551615, 237, 245, 237, 245, 41, 42, true, "platform", "platform"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 13240311013633905449, 11928407068432751416, 18446744073709551615, 18446744073709551615, 253, 265, 253, 265, 44, 45, true, "requirements", "requirements"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14814125365076808131, 4170838424915633248, 18446744073709551615, 18446744073709551615, 326, 334, 326, 334, 57, 58, true, "platform", "platform"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206574973295053, 579996873747921936, 18446744073709551615, 18446744073709551615, 353, 359, 353, 359, 62, 63, true, "number", "number"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6167933651658664291, 7440866408497827921, 18446744073709551615, 18446744073709551615, 363, 372, 363, 372, 64, 65, true, "documents", "documents"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206574973295053, 579996873747911416, 18446744073709551615, 18446744073709551615, 378, 384, 378, 384, 67, 68, true, "number", "number"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104159157820437, 15600004509778203866, 18446744073709551615, 18446744073709551615, 388, 393, 388, 393, 69, 70, true, "users", "users"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206574973295053, 579996873747912575, 18446744073709551615, 18446744073709551615, 421, 427, 421, 427, 76, 77, true, "number", "number"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104161517016668, 13957283097469922549, 18446744073709551615, 18446744073709551615, 431, 436, 431, 436, 78, 79, true, "cloud", "cloud"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106478708506632112, 1549478568074441550, 18446744073709551615, 18446744073709551615, 488, 495, 488, 495, 90, 91, true, "service", "service"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14638289822750178210, 16529051670404838156, 18446744073709551615, 18446744073709551615, 512, 520, 512, 520, 94, 95, true, "millions", "millions"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6167933651658664291, 7440866408497716574, 18446744073709551615, 18446744073709551615, 524, 533, 524, 533, 96, 97, true, "documents", "documents"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 3504070246238334482, 7971751554704088263, 18446744073709551615, 18446744073709551615, 553, 562, 553, 562, 100, 101, true, "thousands", "thousands"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104159157820437, 15600004509778174339, 18446744073709551615, 18446744073709551615, 566, 571, 566, 571, 102, 103, true, "users", "users"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6285955549867796622, 12192460564545960229, 18446744073709551615, 18446744073709551615, 618, 634, 618, 634, 111, 112, true, "time-to-solution", "time-to-solution"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104159219994925, 15605472043071850604, 18446744073709551615, 18446744073709551615, 656, 661, 656, 661, 116, 117, true, "times", "times"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6167836358624304835, 12533972813433648220, 18446744073709551615, 18446744073709551615, 670, 679, 670, 679, 119, 120, true, "operation", "operation"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 11899564443746965611, 1669599917395666812, 18446744073709551615, 18446744073709551615, 702, 714, 702, 714, 126, 127, true, "architecture", "architecture"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106478708506632112, 1549478568074460237, 18446744073709551615, 18446744073709551615, 725, 732, 725, 732, 130, 131, true, "service", "service"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 13240311013633905449, 11928407068432784250, 18446744073709551615, 18446744073709551615, 764, 776, 764, 776, 136, 137, true, "requirements", "requirements"], ["verb", "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 12669508327642496792, 11272358114773168348, 18446744073709551615, 18446744073709551615, 93, 105, 93, 105, 17, 19, true, "are deployed", "are deployed"], ["verb", "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 17737636265695672887, 8822130707725823076, 18446744073709551615, 18446744073709551615, 168, 187, 168, 187, 29, 33, true, "would like to point", "would like to point"], ["verb", "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 4717893903194484574, 13497868670598652853, 18446744073709551615, 18446744073709551615, 274, 292, 274, 292, 47, 50, true, "related to scaling", "related to scaling"], ["verb", "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 9576455331508001963, 2005151878314602116, 18446744073709551615, 18446744073709551615, 535, 552, 535, 552, 98, 100, true, "serve potentially", "serve potentially"], ["verb", "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6062403169006746003, 8883787506358796560, 18446744073709551615, 18446744073709551615, 733, 754, 733, 754, 131, 134, true, "is heavily influenced", "is heavily influenced"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14652261806242873016, 7890494648004461696, 18446744073709551615, 18446744073709551615, 20, 28, 20, 28, 5, 6, true, "describe", "describe"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 13632574162947055061, 147315883317329044, 18446744073709551615, 18446744073709551615, 110, 122, 110, 122, 20, 21, true, "orchestrated", "orchestrated"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 5314857828561765555, 11123792899717439144, 18446744073709551615, 18446744073709551615, 131, 141, 131, 141, 23, 24, true, "discussing", "discussing"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 12178341415895564896, 16193825294775180695, 18446744073709551615, 18446744073709551615, 266, 269, 266, 269, 45, 46, true, "are", "are"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8380894560351698162, 15803013507579142869, 18446744073709551615, 18446744073709551615, 311, 321, 311, 321, 54, 56, true, "would like", "would like"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104161785194305, 13942660614226268092, 18446744073709551615, 18446744073709551615, 338, 343, 338, 343, 59, 60, true, "scale", "scale"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104159219515955, 15594698497900091739, 18446744073709551615, 18446744073709551615, 437, 442, 437, 442, 79, 80, true, "based", "based"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 389609625633595931, 15688211806062539958, 18446744073709551615, 18446744073709551615, 481, 485, 481, 485, 88, 89, true, "want", "want"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 2873440693780286732, 10449764614793007239, 18446744073709551615, 18446744073709551615, 501, 511, 501, 511, 92, 94, true, "can ingest", "can ingest"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104161785194305, 13942660614225758865, 18446744073709551615, 18446744073709551615, 576, 581, 576, 581, 104, 105, true, "scale", "scale"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541486535, 2048505449065788699, 18446744073709551615, 18446744073709551615, 635, 637, 635, 637, 112, 113, true, "is", "is"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541486535, 2048505449065787833, 18446744073709551615, 18446744073709551615, 684, 686, 684, 686, 122, 123, true, "is", "is"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6165459236568015364, 497035845389833334, 18446744073709551615, 18446744073709551615, 604, 613, 604, 613, 108, 110, true, "such that", "such that"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16386233399945118620, 6139299107000348345, 18446744073709551615, 18446744073709551615, 638, 651, 638, 651, 113, 115, true, "reasonable at", "reasonable at"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 2617690495147367356, 5753489008096455564, 18446744073709551615, 18446744073709551615, 687, 697, 687, 697, 123, 125, true, "clear that", "clear that"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106396862006371970, 10149877881189646287, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 2, true, "In this", "In this"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106398107541243064, 4725564592462762947, 18446744073709551615, 18446744073709551615, 51, 58, 51, 58, 9, 11, true, "in each", "in each"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206565712212855, 16630894630023874072, 18446744073709551615, 18446744073709551615, 59, 65, 59, 65, 11, 13, true, "of the", "of the"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206565712212855, 16630894630023888451, 18446744073709551615, 18446744073709551615, 77, 83, 77, 83, 14, 16, true, "of the", "of the"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206535679983326, 14828520614292756444, 18446744073709551615, 18446744073709551615, 124, 130, 124, 130, 22, 23, true, "Before", "Before"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106397727991264470, 15908125160341103167, 18446744073709551615, 18446744073709551615, 209, 216, 209, 216, 36, 38, true, "for the", "for the"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206565712212855, 16630894630015899877, 18446744073709551615, 18446744073709551615, 230, 236, 230, 236, 39, 41, true, "of the", "of the"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14638857868319795209, 4352025152199097228, 18446744073709551615, 18446744073709551615, 344, 352, 344, 352, 60, 62, true, "with the", "with the"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485670, 2048505603752346374, 18446744073709551615, 18446744073709551615, 360, 362, 360, 362, 63, 64, true, "of", "of"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485670, 2048505603752352577, 18446744073709551615, 18446744073709551615, 385, 387, 385, 387, 68, 69, true, "of", "of"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485670, 2048505603752367355, 18446744073709551615, 18446744073709551615, 428, 430, 428, 430, 77, 78, true, "of", "of"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541480354, 2048505281272838279, 18446744073709551615, 18446744073709551615, 462, 464, 462, 464, 83, 84, true, "In", "In"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485670, 2048505603752360531, 18446744073709551615, 18446744073709551615, 521, 523, 521, 523, 95, 96, true, "of", "of"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485670, 2048505603752440089, 18446744073709551615, 18446744073709551615, 563, 565, 563, 565, 101, 102, true, "of", "of"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106397728094825258, 15814643395009540075, 18446744073709551615, 18446744073709551615, 662, 669, 662, 669, 117, 119, true, "for any", "for any"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485670, 2048505603752438307, 18446744073709551615, 18446744073709551615, 715, 717, 715, 717, 127, 128, true, "of", "of"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14652255025526908904, 16607703748518201877, 18446744073709551615, 18446744073709551615, 755, 763, 755, 763, 134, 136, true, "by these", "by these"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485865, 2048505449664077172, 18446744073709551615, 18446744073709551615, 179, 181, 179, 181, 31, 32, true, "to", "to"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485865, 2048505449663964299, 18446744073709551615, 18446744073709551615, 282, 284, 282, 284, 48, 49, true, "to", "to"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485865, 2048505449663959877, 18446744073709551615, 18446744073709551615, 335, 337, 335, 337, 58, 59, true, "to", "to"], ["numval", "fval", 174789262945188010, "TEXT", "#/texts/71", 1.0, 12178341415896306585, 8581499132904184537, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "4.1", "4.1"], ["numval", "ival", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 17767354399704235161, 5235953771215622646, 18446744073709551615, 18446744073709551615, 10, 11, 10, 11, 2, 3, true, "1", "1"], ["numval", "ival", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 17767354399704235158, 5235953771432357895, 18446744073709551615, 18446744073709551615, 101, 102, 101, 102, 21, 22, true, "6", "6"], ["sentence", "", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 17007226042152908832, 18331404462945221276, 18446744073709551615, 18446744073709551615, 0, 90, 0, 90, 0, 19, true, "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents.", "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents."], ["term", "single-term", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 16381206514091025767, 10265022769446664856, 18446744073709551615, 18446744073709551615, 3, 9, 3, 9, 1, 2, true, "Figure", "Figure"], ["term", "single-term", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 8106396896178898697, 4219910857709835922, 18446744073709551615, 18446744073709551615, 29, 36, 29, 36, 8, 9, true, "diagram", "diagram"], ["term", "single-term", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 14814125852840540191, 5403353526375880725, 18446744073709551615, 18446744073709551615, 44, 52, 44, 52, 11, 12, true, "pipeline", "pipeline"], ["term", "single-term", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 14814125365076808131, 1502793658629529948, 18446744073709551615, 18446744073709551615, 60, 68, 60, 68, 14, 15, true, "platform", "platform"], ["term", "single-term", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 6167933651658664291, 2252968926517446007, 18446744073709551615, 18446744073709551615, 80, 89, 80, 89, 17, 18, true, "documents", "documents"], ["verb", "compound-verb", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 5518720687765131523, 9686268265492720351, 18446744073709551615, 18446744073709551615, 16, 26, 16, 26, 5, 7, true, "have shown", "have shown"], ["verb", "single-verb", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 8106476000254393164, 1942641588307467677, 18446744073709551615, 18446744073709551615, 72, 79, 72, 79, 16, 17, true, "process", "process"], ["conn", "single-conn", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 15441160910541480354, 17121927414994045497, 18446744073709551615, 18446744073709551615, 0, 2, 0, 2, 0, 1, true, "In", "In"], ["conn", "single-conn", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 15441160910541485670, 17121926226924974742, 18446744073709551615, 18446744073709551615, 37, 39, 37, 39, 9, 10, true, "of", "of"], ["conn", "single-conn", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 16381206566339127348, 9987704510349709695, 18446744073709551615, 18446744073709551615, 53, 59, 53, 59, 12, 14, true, "on the", "on the"], ["conn", "single-conn", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 15441160910541485865, 17121926292459753487, 18446744073709551615, 18446744073709551615, 69, 71, 69, 71, 15, 16, true, "to", "to"], ["sentence", "", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 105368025718952442, 5450071664030950078, 18446744073709551615, 18446744073709551615, 14, 79, 14, 79, 2, 16, true, "As one can observe, we have grouped the service into four layers.", "As one can observe, we have grouped the service into four layers."], ["term", "single-term", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 8106478708506632112, 6233289218919425562, 18446744073709551615, 18446744073709551615, 54, 61, 54, 61, 11, 12, true, "service", "service"], ["term", "single-term", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 16381206590620802860, 16233116481575014775, 18446744073709551615, 18446744073709551615, 72, 78, 72, 78, 14, 15, true, "layers", "layers"], ["verb", "compound-verb", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 189925242426617641, 13895959288404047356, 18446744073709551615, 18446744073709551615, 37, 49, 37, 49, 8, 10, true, "have grouped", "have grouped"], ["verb", "single-verb", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 14892726175400695403, 16590583946158903014, 18446744073709551615, 18446744073709551615, 21, 32, 21, 32, 4, 6, true, "can observe", "can observe"], ["conn", "single-conn", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 15441160910541480533, 4819755269055644271, 18446744073709551615, 18446744073709551615, 14, 16, 14, 16, 2, 3, true, "As", "As"], ["conn", "single-conn", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 389609625698622943, 11283567657878655855, 18446744073709551615, 18446744073709551615, 62, 66, 62, 66, 12, 13, true, "into", "into"], ["numval", "ival", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 17767354399704235161, 17804011231002177177, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 15441160910541481977, 15610781920844557983, 18446744073709551615, 18446744073709551615, 275, 277, 275, 277, 46, 47, true, "13", "13"], ["parenthesis", "reference", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 12178341415896395122, 13204308870015609887, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 3, true, "(1)", "(1)"], ["expression", "word-concatenation", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14652188385287077849, 15073411726952517228, 18446744073709551615, 18446744073709551615, 42, 50, 42, 50, 9, 10, true, "REST-API", "REST-API"], ["expression", "word-concatenation", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14652188385287077849, 15073411726952601508, 18446744073709551615, 18446744073709551615, 138, 146, 138, 146, 27, 28, true, "REST-API", "REST-API"], ["expression", "word-concatenation", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 3753411203337468488, 5100377154689721404, 18446744073709551615, 18446744073709551615, 181, 193, 181, 193, 33, 34, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14652188385287077849, 15073411726952522114, 18446744073709551615, 18446744073709551615, 209, 217, 209, 217, 37, 38, true, "REST-API", "REST-API"], ["sentence", "", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 5975418266041050143, 7900187415786058174, 18446744073709551615, 18446744073709551615, 4, 204, 4, 204, 3, 36, true, "An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering.", "An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering."], ["sentence", "", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 16292425914778879272, 3552138339604334986, 18446744073709551615, 18446744073709551615, 205, 307, 205, 307, 36, 53, true, "The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", "The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python."], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 8692614377683751894, 6543009394914129596, 18446744073709551615, 18446744073709551615, 7, 22, 7, 22, 4, 6, true, "interface layer", "interface layer"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 11968118699453218413, 16853549282351953165, 18446744073709551615, 18446744073709551615, 57, 70, 57, 70, 12, 14, true, "user frontend", "user frontend"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 11968118699453218413, 16853549282351954610, 18446744073709551615, 18446744073709551615, 76, 89, 76, 89, 16, 18, true, "user frontend", "user frontend"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 17244914598722202958, 15499641398155697595, 18446744073709551615, 18446744073709551615, 96, 117, 96, 117, 20, 22, true, "AngularJS application", "AngularJS application"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 1591019414094504294, 17465194081095954832, 18446744073709551615, 18446744073709551615, 181, 203, 181, 203, 33, 35, true, "ground-truth gathering", "ground-truth gathering"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 17622757252402159492, 18169862670430999681, 18446744073709551615, 18446744073709551615, 252, 274, 252, 274, 44, 46, true, "OpenAPI specifications", "OpenAPI specifications"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14652188385287077849, 15073411726952517228, 18446744073709551615, 18446744073709551615, 42, 50, 42, 50, 9, 10, true, "REST-API", "REST-API"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 12178341415895527965, 13202575941196575545, 18446744073709551615, 18446744073709551615, 127, 130, 127, 130, 24, 25, true, "top", "top"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14652188385287077849, 15073411726952601508, 18446744073709551615, 18446744073709551615, 138, 146, 138, 146, 27, 28, true, "REST-API", "REST-API"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 15359807916847569012, 12690297070768585539, 18446744073709551615, 18446744073709551615, 166, 176, 166, 176, 31, 32, true, "annotators", "annotators"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14652188385287077849, 15073411726952522114, 18446744073709551615, 18446744073709551615, 209, 217, 209, 217, 37, 38, true, "REST-API", "REST-API"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 16381206485156459004, 2190123068096885489, 18446744073709551615, 18446744073709551615, 300, 306, 300, 306, 51, 52, true, "Python", "Python"], ["verb", "compound-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14637952033947516066, 1170598421847841274, 18446744073709551615, 18446744073709551615, 218, 226, 218, 226, 38, 40, true, "is built", "is built"], ["verb", "compound-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 116696039858106091, 15266057091493719963, 18446744073709551615, 18446744073709551615, 231, 247, 231, 247, 41, 43, true, "documented using", "documented using"], ["verb", "compound-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 37170045853396780, 10862211224580790545, 18446744073709551615, 18446744073709551615, 282, 296, 282, 296, 48, 50, true, "is implemented", "is implemented"], ["verb", "single-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 5584174880054122043, 13797832223961649041, 18446744073709551615, 18446744073709551615, 29, 39, 29, 39, 7, 8, true, "implements", "implements"], ["verb", "single-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 15441160910541486535, 15610783856184804840, 18446744073709551615, 18446744073709551615, 90, 92, 90, 92, 18, 19, true, "is", "is"], ["verb", "single-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 329104159303279946, 14770817403596920463, 18446744073709551615, 18446744073709551615, 118, 123, 118, 123, 22, 23, true, "build", "build"], ["verb", "single-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 5584174880054122043, 13797832223961674239, 18446744073709551615, 18446744073709551615, 151, 161, 151, 161, 29, 30, true, "implements", "implements"], ["conn", "single-conn", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 15441160910541485678, 15610783856720662618, 18446744073709551615, 18446744073709551615, 124, 126, 124, 126, 23, 24, true, "on", "on"], ["conn", "single-conn", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 16381206565712212855, 3629200545768582333, 18446744073709551615, 18446744073709551615, 131, 137, 131, 137, 25, 27, true, "of the", "of the"], ["conn", "single-conn", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 12178341415895625940, 13202525365648469119, 18446744073709551615, 18446744073709551615, 177, 180, 177, 180, 32, 33, true, "for", "for"], ["conn", "single-conn", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 15441160910541486538, 15610783856135866232, 18446744073709551615, 18446744073709551615, 297, 299, 297, 299, 50, 51, true, "in", "in"], ["numval", "ival", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 17767354399704235162, 766019618037252930, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "2", "2"], ["parenthesis", "reference", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 12178341415896395187, 17029329038495000300, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 3, true, "(2)", "(2)"], ["parenthesis", "round brackets", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 1812897535394120128, 3870874385890063204, 18446744073709551615, 18446744073709551615, 303, 497, 303, 497, 51, 87, true, "(e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images)", "(e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images)"], ["expression", "common", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15441160910541487324, 5910392785272575830, 18446744073709551615, 18446744073709551615, 304, 309, 304, 309, 52, 53, true, "eg", "e. g."], ["expression", "word-concatenation", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 6187817560337829240, 10074786117573267255, 18446744073709551615, 18446744073709551615, 222, 231, 222, 231, 39, 40, true, "in-memory", "in-memory"], ["expression", "word-concatenation", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 10210587797782980674, 269685690895573883, 18446744073709551615, 18446744073709551615, 653, 667, 653, 667, 114, 115, true, "fault-tolerant", "fault-tolerant"], ["expression", "wtoken-concatenation", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15503455610017494293, 14071574465570134500, 18446744073709551615, 18446744073709551615, 175, 190, 175, 190, 31, 32, true, "RabbitMQ^{14}", "RabbitMQ$^{14}$"], ["expression", "wtoken-concatenation", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 9275871508895795608, 13145605651607786139, 18446744073709551615, 18446744073709551615, 243, 255, 243, 255, 42, 43, true, "Redis^{15}", "Redis$^{15}$"], ["sentence", "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 4025108859080697854, 7684932098811697541, 18446744073709551615, 18446744073709551615, 4, 122, 4, 122, 3, 22, true, "An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result.", "An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result."], ["sentence", "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 10578140482875773017, 5422705622766817180, 18446744073709551615, 18446744073709551615, 123, 191, 123, 191, 22, 33, true, "The task scheduling is done with the Message Broker RabbitMQ$^{14}$.", "The task scheduling is done with the Message Broker RabbitMQ$^{14}$."], ["sentence", "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 13063193318125667342, 9479480819005320541, 18446744073709551615, 18446744073709551615, 192, 256, 192, 256, 33, 44, true, "The results are stored in the in-memory data store Redis$^{15}$.", "The results are stored in the in-memory data store Redis$^{15}$."], ["sentence", "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 1495400135141364806, 13254541674959843841, 18446744073709551615, 18446744073709551615, 257, 612, 257, 612, 44, 106, true, "In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully.", "In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully."], ["sentence", "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 1841397930198716309, 11891118918529386819, 18446744073709551615, 18446744073709551615, 613, 702, 613, 702, 106, 121, true, "This approach allows for a very robust, fault-tolerant service with very little downtime.", "This approach allows for a very robust, fault-tolerant service with very little downtime."], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 881931955171775830, 1850213153382221251, 18446744073709551615, 18446744073709551615, 7, 26, 7, 26, 4, 6, true, "orchestration layer", "orchestration layer"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16569031532297427649, 2288991119528845313, 18446744073709551615, 18446744073709551615, 88, 104, 88, 104, 16, 18, true, "execution status", "execution status"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 12318137194760091867, 7829156630170179123, 18446744073709551615, 18446744073709551615, 109, 121, 109, 121, 19, 21, true, "final result", "final result"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 6315348039533026141, 3213548333245122529, 18446744073709551615, 18446744073709551615, 127, 142, 127, 142, 23, 25, true, "task scheduling", "task scheduling"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8295209353697935236, 9538738240789817737, 18446744073709551615, 18446744073709551615, 160, 190, 160, 190, 29, 32, true, "Message Broker RabbitMQ^{14}", "Message Broker RabbitMQ$^{14}$"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 10696383395384997690, 12295999164377771107, 18446744073709551615, 18446744073709551615, 222, 255, 222, 255, 39, 43, true, "in-memory data store Redis^{15}", "in-memory data store Redis$^{15}$"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 1969668578613914549, 8249504550464603474, 18446744073709551615, 18446744073709551615, 277, 302, 277, 302, 48, 51, true, "certain consecutive tasks", "certain consecutive tasks"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 14650937348812924036, 6147429119200407258, 18446744073709551615, 18446744073709551615, 320, 328, 320, 328, 55, 57, true, "PDF page", "PDF page"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15096203362930329687, 5146287085934249298, 18446744073709551615, 18446744073709551615, 390, 411, 390, 411, 67, 70, true, "programmatic PDF page", "programmatic PDF page"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 9914165367421220601, 17615564355797482202, 18446744073709551615, 18446744073709551615, 446, 457, 446, 457, 77, 79, true, "OCR service", "OCR service"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 10873436773834842694, 9664366626516883504, 18446744073709551615, 18446744073709551615, 537, 553, 537, 553, 95, 97, true, "subsequent steps", "subsequent steps"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16455430351063957858, 8578040699139249120, 18446744073709551615, 18446744073709551615, 653, 675, 653, 675, 114, 116, true, "fault-tolerant service", "fault-tolerant service"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 3478107702264293237, 8540445734424787667, 18446744073709551615, 18446744073709551615, 686, 701, 686, 701, 118, 120, true, "little downtime", "little downtime"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 329104159214088329, 8194156825567116069, 18446744073709551615, 18446744073709551615, 46, 51, 46, 51, 9, 10, true, "tasks", "tasks"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 990358581043194791, 1684395431220408370, 18446744073709551615, 18446744073709551615, 60, 73, 60, 73, 12, 13, true, "microservices", "microservices"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206578935372333, 8703937210789941258, 18446744073709551615, 18446744073709551615, 75, 81, 75, 81, 14, 15, true, "stores", "stores"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106478445190161533, 10842974023255663515, 18446744073709551615, 18446744073709551615, 196, 203, 196, 203, 34, 35, true, "results", "results"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 329104161571401725, 8064733993734324746, 18446744073709551615, 18446744073709551615, 260, 265, 260, 265, 45, 46, true, "order", "order"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15441160910541487324, 5910392785272575830, 18446744073709551615, 18446744073709551615, 304, 309, 304, 309, 52, 53, true, "eg", "e. g."], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206560620045048, 3483212362973437467, 18446744073709551615, 18446744073709551615, 351, 357, 351, 357, 60, 61, true, "images", "images"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106479143794098783, 12365847001997486008, 18446744073709551615, 18446744073709551615, 375, 382, 375, 382, 64, 65, true, "parsing", "parsing"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206560620045048, 3483212362973413702, 18446744073709551615, 18446744073709551615, 427, 433, 427, 433, 73, 74, true, "images", "images"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 329104161531686411, 8019458249201971616, 18446744073709551615, 18446744073709551615, 473, 478, 473, 478, 82, 83, true, "cells", "cells"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206560620045048, 3483212362973442358, 18446744073709551615, 18446744073709551615, 490, 496, 490, 496, 85, 86, true, "images", "images"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 329104159214088329, 8194156825567072634, 18446744073709551615, 18446744073709551615, 520, 525, 520, 525, 91, 92, true, "tasks", "tasks"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 14650448032998792781, 16582753633148168921, 18446744073709551615, 18446744073709551615, 618, 626, 618, 626, 107, 108, true, "approach", "approach"], ["verb", "compound-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106398132958436429, 6971888441348882646, 18446744073709551615, 18446744073709551615, 143, 150, 143, 150, 25, 27, true, "is done", "is done"], ["verb", "compound-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15388942590337907789, 4280950829150221216, 18446744073709551615, 18446744073709551615, 204, 214, 204, 214, 35, 37, true, "are stored", "are stored"], ["verb", "compound-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 11296839647862937485, 4164176593204929584, 18446744073709551615, 18446744073709551615, 334, 350, 334, 350, 58, 60, true, "embedded scanned", "embedded scanned"], ["verb", "compound-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 13646629520376931899, 8519320198473003477, 18446744073709551615, 18446744073709551615, 358, 372, 358, 372, 61, 63, true, "requires first", "requires first"], ["verb", "compound-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 17982373942613951464, 3937427824351288252, 18446744073709551615, 18446744073709551615, 554, 571, 554, 571, 97, 100, true, "are only executed", "are only executed"], ["verb", "compound-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 12714940176042944879, 15936925534730964046, 18446744073709551615, 18446744073709551615, 588, 611, 588, 611, 103, 105, true, "terminated successfully", "terminated successfully"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 6168537129726002426, 152533883599703009, 18446744073709551615, 18446744073709551615, 32, 41, 32, 41, 7, 8, true, "schedules", "schedules"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106475907566715134, 4681167473274342910, 18446744073709551615, 18446744073709551615, 269, 276, 269, 276, 47, 48, true, "perform", "perform"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106479143794098783, 12365847001997489621, 18446744073709551615, 18446744073709551615, 310, 317, 310, 317, 53, 54, true, "parsing", "parsing"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106397496930289884, 8772117640065310876, 18446744073709551615, 18446744073709551615, 415, 422, 415, 422, 71, 72, true, "extract", "extract"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106397496930289884, 8772117640065258856, 18446744073709551615, 18446744073709551615, 461, 468, 461, 468, 80, 81, true, "extract", "extract"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 329104161556625920, 8062732787574674007, 18446744073709551615, 18446744073709551615, 514, 519, 514, 519, 90, 91, true, "chain", "chain"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206569317834029, 14824105829638492947, 18446744073709551615, 18446744073709551615, 627, 633, 627, 633, 108, 109, true, "allows", "allows"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 6165459236568015364, 2354429920637518397, 18446744073709551615, 18446744073709551615, 527, 536, 527, 536, 93, 95, true, "such that", "such that"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106397727991264470, 2795094083464015695, 18446744073709551615, 18446744073709551615, 52, 59, 52, 59, 10, 12, true, "for the", "for the"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 14638857868319795209, 456352319925648448, 18446744073709551615, 18446744073709551615, 151, 159, 151, 159, 27, 29, true, "with the", "with the"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206560518651853, 3478068517407956556, 18446744073709551615, 18446744073709551615, 215, 221, 215, 221, 37, 39, true, "in the", "in the"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15441160910541480354, 5910392698548049506, 18446744073709551615, 18446744073709551615, 257, 259, 257, 259, 44, 45, true, "In", "In"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 389609625618037948, 4452313474781359012, 18446744073709551615, 18446744073709551615, 329, 333, 329, 333, 57, 58, true, "with", "with"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206565712212855, 14812884387065852796, 18446744073709551615, 18446744073709551615, 383, 389, 383, 389, 65, 67, true, "of the", "of the"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16057368201763467386, 5837363433449722185, 18446744073709551615, 18446744073709551615, 479, 489, 479, 489, 83, 85, true, "from these", "from these"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206478470086874, 3195302339494553733, 18446744073709551615, 18446744073709551615, 572, 578, 572, 578, 100, 102, true, "if the", "if the"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 329104161711024499, 8003597956646423596, 18446744073709551615, 18446744073709551615, 634, 639, 634, 639, 109, 111, true, "for a", "for a"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 389609625618037948, 4452313474781410633, 18446744073709551615, 18446744073709551615, 676, 680, 676, 680, 116, 117, true, "with", "with"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15441160910541485865, 5910392672594327793, 18446744073709551615, 18446744073709551615, 266, 268, 266, 268, 46, 47, true, "to", "to"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15441160910541485865, 5910392672594322553, 18446744073709551615, 18446744073709551615, 412, 414, 412, 414, 70, 71, true, "to", "to"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15441160910541485865, 5910392672594307233, 18446744073709551615, 18446744073709551615, 458, 460, 458, 460, 79, 80, true, "to", "to"], ["numval", "ival", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 17767354399704235163, 6623757277320803060, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "3", "3"], ["numval", "ival", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 17767354399704235163, 6623757277320810717, 18446744073709551615, 18446744073709551615, 74, 75, 74, 75, 13, 14, true, "3", "3"], ["parenthesis", "reference", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 12178341415896394992, 10915561974328134756, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 3, true, "(3)", "(3)"], ["parenthesis", "round brackets", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 9121140803212188746, 5062856742962380004, 18446744073709551615, 18446744073709551615, 148, 200, 148, 200, 26, 38, true, "(e.g. parsing, training, predictions, assembly, etc)", "(e.g. parsing, training, predictions, assembly, etc)"], ["expression", "common", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541487324, 13616139199714584790, 18446744073709551615, 18446744073709551615, 149, 153, 149, 153, 27, 28, true, "eg", "e.g."], ["expression", "wtoken-concatenation", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 10737622929664928958, 8651677615518322335, 18446744073709551615, 18446744073709551615, 332, 346, 332, 346, 61, 62, true, "library^{16}", "library$^{16}$"], ["sentence", "", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 2864828402709972468, 11332229229505390779, 18446744073709551615, 18446744073709551615, 4, 201, 4, 201, 3, 39, true, "A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc).", "A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc)."], ["sentence", "", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15435795280083407866, 4623434773980135134, 18446744073709551615, 18446744073709551615, 202, 347, 202, 347, 39, 63, true, "In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$.", "In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$."], ["sentence", "", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 11964442506319969125, 6249550519038030170, 18446744073709551615, 18446744073709551615, 348, 503, 348, 503, 63, 90, true, "This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker.", "This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker."], ["sentence", "", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 17777896679674985198, 11534624099623021000, 18446744073709551615, 18446744073709551615, 504, 579, 504, 579, 90, 106, true, "The workers are not only consumers of tasks, but may also produce new ones.", "The workers are not only consumers of tasks, but may also produce new ones."], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 5470814617574924291, 724407251113024, 18446744073709551615, 18446744073709551615, 6, 19, 6, 19, 4, 6, true, "compute layer", "compute layer"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 9909278470053653981, 12829744493301079322, 18446744073709551615, 18446744073709551615, 124, 147, 124, 147, 24, 26, true, "available microservices", "available microservices"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14058638345038458245, 12275078423904715575, 18446744073709551615, 18446744073709551615, 149, 161, 149, 161, 27, 29, true, "eg parsing", "e.g. parsing"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 4681591099663460584, 16278920567637920045, 18446744073709551615, 18446744073709551615, 304, 314, 304, 314, 56, 58, true, "task queue", "task queue"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 11198085877607539434, 9973077180559472567, 18446744073709551615, 18446744073709551615, 325, 346, 325, 346, 60, 62, true, "Celery library^{16}", "Celery library$^{16}$"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 4421383392096991748, 16024629256340455225, 18446744073709551615, 18446744073709551615, 388, 405, 388, 405, 70, 72, true, "compute resources", "compute resources"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8115411903316729668, 9549090779302063453, 18446744073709551615, 18446744073709551615, 524, 538, 524, 538, 94, 96, true, "only consumers", "only consumers"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14814151107139696752, 1255249427891598545, 18446744073709551615, 18446744073709551615, 570, 578, 570, 578, 103, 105, true, "new ones", "new ones"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 990358581043194791, 13405780939829855380, 18446744073709551615, 18446744073709551615, 40, 53, 40, 53, 9, 10, true, "microservices", "microservices"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106478708629288965, 12667054332205292279, 18446744073709551615, 18446744073709551615, 66, 73, 66, 73, 12, 13, true, "section", "section"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106478059506484182, 10697147794249982519, 18446744073709551615, 18446744073709551615, 89, 96, 89, 96, 18, 19, true, "workers", "workers"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 329104161624475862, 13848348646967812138, 18446744073709551615, 18446744073709551615, 105, 110, 105, 110, 21, 22, true, "layer", "layer"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14634153919632515335, 7524102672994522753, 18446744073709551615, 18446744073709551615, 163, 171, 163, 171, 30, 31, true, "training", "training"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15175963360124346573, 14989804171272821450, 18446744073709551615, 18446744073709551615, 173, 184, 173, 184, 32, 33, true, "predictions", "predictions"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14650448171968968290, 3790776944315438052, 18446744073709551615, 18446744073709551615, 186, 194, 186, 194, 34, 35, true, "assembly", "assembly"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 329104161571401725, 13849737740624165279, 18446744073709551615, 18446744073709551615, 205, 210, 205, 210, 40, 41, true, "order", "order"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206521526353544, 1782906868391855387, 18446744073709551615, 18446744073709551615, 225, 231, 225, 231, 44, 45, true, "regard", "regard"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 6168338487309432467, 5928008866800453885, 18446744073709551615, 18446744073709551615, 235, 244, 235, 244, 46, 47, true, "resources", "resources"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16682817150367627875, 17485526265701101, 18446744073709551615, 18446744073709551615, 272, 284, 272, 284, 52, 53, true, "microservice", "microservice"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206557159905849, 8006079737033218220, 18446744073709551615, 18446744073709551615, 418, 424, 418, 424, 75, 76, true, "worker", "worker"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106398485449787361, 5675061092301137187, 18446744073709551615, 18446744073709551615, 461, 468, 461, 468, 82, 83, true, "cluster", "cluster"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206570348587859, 15808885045293000288, 18446744073709551615, 18446744073709551615, 496, 502, 496, 502, 88, 89, true, "broker", "broker"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106478059506484182, 10697147794249956396, 18446744073709551615, 18446744073709551615, 508, 515, 508, 515, 91, 92, true, "workers", "workers"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 329104159214088329, 14097861728688353508, 18446744073709551615, 18446744073709551615, 542, 547, 542, 547, 97, 98, true, "tasks", "tasks"], ["verb", "compound-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 13859584371553084961, 6953162611440438890, 18446744073709551615, 18446744073709551615, 249, 266, 249, 266, 49, 51, true, "have encapsulated", "have encapsulated"], ["verb", "compound-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 288538720869017437, 4206979805055504968, 18446744073709551615, 18446744073709551615, 425, 453, 425, 453, 76, 80, true, "can be spawned automatically", "can be spawned automatically"], ["verb", "compound-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106397797831668975, 4782227961575271919, 18446744073709551615, 18446744073709551615, 516, 523, 516, 523, 92, 94, true, "are not", "are not"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 5584174880054122043, 15438871383215853010, 18446744073709551615, 18446744073709551615, 25, 35, 25, 35, 7, 8, true, "implements", "implements"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14652261813489544447, 11247279661113316629, 18446744073709551615, 18446744073709551615, 54, 62, 54, 62, 10, 11, true, "detailed", "detailed"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14652255854767583909, 12382046103724054031, 18446744073709551615, 18446744073709551615, 111, 119, 111, 119, 22, 23, true, "executes", "executes"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 329104161785194305, 13850093838201494630, 18446744073709551615, 18446744073709551615, 214, 219, 214, 219, 42, 43, true, "scale", "scale"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 1477344672819384985, 8283526875963376019, 18446744073709551615, 18446744073709551615, 292, 303, 292, 303, 55, 56, true, "distributed", "distributed"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 329104159157798023, 14298779374945162593, 18446744073709551615, 18446744073709551615, 315, 320, 315, 320, 58, 59, true, "using", "using"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206569317834029, 15096333879001227968, 18446744073709551615, 18446744073709551615, 353, 359, 353, 359, 64, 65, true, "allows", "allows"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 329104161785194305, 13850093838201414935, 18446744073709551615, 18446744073709551615, 378, 383, 378, 383, 68, 69, true, "scale", "scale"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14634109580260092070, 816619907238816136, 18446744073709551615, 18446744073709551615, 473, 481, 473, 481, 84, 85, true, "register", "register"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106476000256008955, 10662702206647144879, 18446744073709551615, 18446744073709551615, 562, 569, 562, 569, 102, 103, true, "produce", "produce"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541486538, 13616139232389586146, 18446744073709551615, 18446744073709551615, 63, 65, 63, 65, 11, 12, true, "in", "in"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 3612640462697257855, 15059124849281620447, 18446744073709551615, 18446744073709551615, 77, 88, 77, 88, 15, 18, true, "Each of the", "Each of the"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106398107541152403, 3040495791226350629, 18446744073709551615, 18446744073709551615, 97, 104, 97, 104, 19, 21, true, "in this", "in this"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541480354, 13616133383081621648, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 39, 40, true, "In", "In"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 389609625618037948, 638487817302062508, 18446744073709551615, 18446744073709551615, 220, 224, 220, 224, 43, 44, true, "with", "with"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206560517276114, 15829604646240034102, 18446744073709551615, 18446744073709551615, 285, 291, 285, 291, 53, 55, true, "into a", "into a"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14091433066300748251, 5574629252352928036, 18446744073709551615, 18446744073709551615, 407, 417, 407, 417, 73, 75, true, "since each", "since each"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206566339127348, 15152243859443904216, 18446744073709551615, 18446744073709551615, 454, 460, 454, 460, 80, 82, true, "on the", "on the"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541485670, 13616139037782441930, 18446744073709551615, 18446744073709551615, 539, 541, 539, 541, 96, 97, true, "of", "of"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541485865, 13616139237691872940, 18446744073709551615, 18446744073709551615, 211, 213, 211, 213, 41, 42, true, "to", "to"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541485865, 13616139237691874671, 18446744073709551615, 18446744073709551615, 232, 234, 232, 234, 45, 46, true, "to", "to"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541485865, 13616139237691883462, 18446744073709551615, 18446744073709551615, 363, 365, 363, 365, 66, 67, true, "to", "to"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206519425733256, 1744518844831028097, 18446744073709551615, 18446744073709551615, 489, 495, 489, 495, 86, 88, true, "to the", "to the"], ["parenthesis", "round brackets", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 15451245949012109980, 860212891132498684, 18446744073709551615, 18446744073709551615, 105, 118, 105, 118, 16, 20, true, "(or document)", "(or document)"], ["expression", "word-concatenation", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 5470814635586025487, 3023904040799855893, 18446744073709551615, 18446744073709551615, 68, 81, 68, 81, 11, 12, true, "compute-heavy", "compute-heavy"], ["sentence", "", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 8943027620035512136, 10854655135118326590, 18446744073709551615, 18446744073709551615, 31, 125, 31, 125, 6, 22, true, "Whenever possible we parallelise the compute-heavy operations at the page (or document) level.", "Whenever possible we parallelise the compute-heavy operations at the page (or document) level."], ["term", "single-term", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 13988986336887005746, 1446674937315880970, 18446744073709551615, 18446744073709551615, 68, 92, 68, 92, 11, 13, true, "compute-heavy operations", "compute-heavy operations"], ["term", "single-term", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 389609625632301461, 15632188389001375550, 18446744073709551615, 18446744073709551615, 100, 104, 100, 104, 15, 16, true, "page", "page"], ["term", "single-term", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 14650401089286948001, 1809325515137941529, 18446744073709551615, 18446744073709551615, 109, 117, 109, 117, 18, 19, true, "document", "document"], ["term", "single-term", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 329104161602483077, 5312276037637913177, 18446744073709551615, 18446744073709551615, 119, 124, 119, 124, 20, 21, true, "level", "level"], ["verb", "single-verb", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 18223316012831076072, 4378757623349607195, 18446744073709551615, 18446744073709551615, 52, 63, 52, 63, 9, 10, true, "parallelise", "parallelise"], ["conn", "single-conn", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 16381206568372064271, 9744783902447945030, 18446744073709551615, 18446744073709551615, 93, 99, 93, 99, 13, 15, true, "at the", "at the"], ["numval", "ival", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 17767354399704235156, 7397297711065841756, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "4", "4"], ["parenthesis", "reference", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 12178341415896395057, 17882276138977820280, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 3, true, "(4)", "(4)"], ["parenthesis", "round brackets", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 7182556421351177654, 17408013221930808816, 18446744073709551615, 18446744073709551615, 207, 256, 207, 256, 38, 50, true, "(e. g. the parsed PDF pages, trained models, etc)", "(e. g. the parsed PDF pages, trained models, etc)"], ["parenthesis", "round brackets", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8366781084765568282, 12073061348432203105, 18446744073709551615, 18446744073709551615, 541, 576, 541, 576, 103, 112, true, "(in our case we use MongoDB$^{17}$)", "(in our case we use MongoDB$^{17}$)"], ["expression", "common", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541487324, 14307783780196245817, 18446744073709551615, 18446744073709551615, 208, 213, 208, 213, 39, 40, true, "eg", "e. g."], ["expression", "word-concatenation", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5910674217167684246, 3596748896249425734, 18446744073709551615, 18446744073709551615, 147, 159, 147, 159, 30, 31, true, "object-store", "object-store"], ["expression", "word-concatenation", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5910674217167684246, 3596748896249446260, 18446744073709551615, 18446744073709551615, 333, 345, 333, 345, 64, 65, true, "object-store", "object-store"], ["expression", "word-concatenation", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5910674217167684246, 3596748896249438801, 18446744073709551615, 18446744073709551615, 351, 363, 351, 363, 67, 68, true, "object-store", "object-store"], ["expression", "word-concatenation", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 7393395602818997382, 2445570796174034956, 18446744073709551615, 18446744073709551615, 620, 632, 620, 632, 122, 123, true, "access-layer", "access-layer"], ["expression", "latex", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 329104159258632281, 15385845941904838235, 18446744073709551615, 18446744073709551615, 568, 575, 568, 575, 110, 111, true, "^{17}", "$^{17}$"], ["sentence", "", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 17454558322938465001, 17890104706267422072, 18446744073709551615, 18446744073709551615, 4, 346, 4, 346, 3, 66, true, "A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store.", "A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store."], ["sentence", "", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5622932258824479139, 10516707730337709393, 18446744073709551615, 18446744073709551615, 347, 451, 347, 451, 66, 84, true, "The object-store allows us to easily scale the storage with regard to the number of processed documents.", "The object-store allows us to easily scale the storage with regard to the number of processed documents."], ["sentence", "", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8816129983583997199, 15303863633151315650, 18446744073709551615, 18446744073709551615, 452, 633, 452, 633, 84, 124, true, "However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", "However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer."], ["term", "enum-term-mark-2", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 9482072146671435678, 4852192659277363715, 18446744073709551615, 18446744073709551615, 598, 613, 598, 613, 117, 120, true, "storage and act", "storage and act"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 13382702060117711634, 866988340100178668, 18446744073709551615, 18446744073709551615, 6, 19, 6, 19, 4, 6, true, "storage layer", "storage layer"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 13382702060117711634, 866988340100185003, 18446744073709551615, 18446744073709551615, 97, 110, 97, 110, 20, 22, true, "storage layer", "storage layer"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 2903324788977241891, 13900858649375924507, 18446744073709551615, 18446744073709551615, 225, 234, 225, 234, 42, 44, true, "PDF pages", "PDF pages"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 9804322186740216471, 16267225850960798578, 18446744073709551615, 18446744073709551615, 236, 250, 236, 250, 45, 47, true, "trained models", "trained models"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 1944794866286482065, 4090230587991277542, 18446744073709551615, 18446744073709551615, 263, 287, 263, 287, 52, 55, true, "queryable NoSQL database", "queryable NoSQL database"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16772942504422841315, 12153285632385192646, 18446744073709551615, 18446744073709551615, 526, 540, 526, 540, 101, 103, true, "NoSQL database", "NoSQL database"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206578935372333, 11959326053889459522, 18446744073709551615, 18446744073709551615, 25, 31, 25, 31, 7, 8, true, "stores", "stores"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 6167933651658664291, 16849655876428761988, 18446744073709551615, 18446744073709551615, 36, 45, 36, 45, 9, 10, true, "documents", "documents"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8106478445190161533, 10724655109163266628, 18446744073709551615, 18446744073709551615, 61, 68, 61, 68, 14, 15, true, "results", "results"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 990358581043194791, 9157342138188045037, 18446744073709551615, 18446744073709551615, 78, 91, 78, 91, 17, 18, true, "microservices", "microservices"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 14635102416861801722, 13921851080814198183, 18446744073709551615, 18446744073709551615, 134, 142, 134, 142, 27, 28, true, "services", "services"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5910674217167684246, 3596748896249425734, 18446744073709551615, 18446744073709551615, 147, 159, 147, 159, 30, 31, true, "object-store", "object-store"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206578935372333, 11959326053892662509, 18446744073709551615, 18446744073709551615, 165, 171, 165, 171, 32, 33, true, "stores", "stores"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 6167933651658664291, 16849655876428781193, 18446744073709551615, 18446744073709551615, 176, 185, 176, 185, 34, 35, true, "documents", "documents"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206578939110576, 11959779027228109556, 18446744073709551615, 18446744073709551615, 200, 206, 200, 206, 37, 38, true, "stages", "stages"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 14638347573453462708, 11572179125786979791, 18446744073709551615, 18446744073709551615, 304, 312, 304, 312, 58, 59, true, "metadata", "metadata"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 389609625697824016, 10086809110828577778, 18446744073709551615, 18446744073709551615, 321, 325, 321, 325, 61, 62, true, "file", "file"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5910674217167684246, 3596748896249446260, 18446744073709551615, 18446744073709551615, 333, 345, 333, 345, 64, 65, true, "object-store", "object-store"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5910674217167684246, 3596748896249438801, 18446744073709551615, 18446744073709551615, 351, 363, 351, 363, 67, 68, true, "object-store", "object-store"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8106478700889254291, 5556909971365278069, 18446744073709551615, 18446744073709551615, 394, 401, 394, 401, 74, 75, true, "storage", "storage"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206521526353544, 15388633276980566672, 18446744073709551615, 18446744073709551615, 407, 413, 407, 413, 76, 77, true, "regard", "regard"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206574973295053, 13874751204703215888, 18446744073709551615, 18446744073709551615, 421, 427, 421, 427, 79, 80, true, "number", "number"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 6167933651658664291, 16849655876428863113, 18446744073709551615, 18446744073709551615, 441, 450, 441, 450, 82, 83, true, "documents", "documents"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 389609625695123443, 10086936390401945955, 18446744073709551615, 18446744073709551615, 549, 553, 549, 553, 106, 107, true, "case", "case"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8106471292843117687, 16633247643408803384, 18446744073709551615, 18446744073709551615, 561, 568, 561, 568, 109, 110, true, "MongoDB", "MongoDB"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 12178341415895527965, 17882005209631256296, 18446744073709551615, 18446744073709551615, 580, 583, 580, 583, 113, 114, true, "top", "top"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8106478700889254291, 5556909971365264072, 18446744073709551615, 18446744073709551615, 598, 605, 598, 605, 117, 118, true, "storage", "storage"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 12178341415895571467, 17882001345722866996, 18446744073709551615, 18446744073709551615, 610, 613, 610, 613, 119, 120, true, "act", "act"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 7393395602818997382, 2445570796174034956, 18446744073709551615, 18446744073709551615, 620, 632, 620, 632, 122, 123, true, "access-layer", "access-layer"], ["verb", "compound-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 9001167546411496730, 5107946166734090998, 18446744073709551615, 18446744073709551615, 111, 122, 111, 122, 22, 24, true, "is composed", "is composed"], ["verb", "compound-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 4557608131655756693, 10894415445021592676, 18446744073709551615, 18446744073709551615, 464, 502, 464, 502, 87, 94, true, "is not build to be queried efficiently", "is not build to be queried efficiently"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 6171728176299542016, 16532079825940175611, 18446744073709551615, 18446744073709551615, 190, 199, 190, 199, 36, 37, true, "processed", "processed"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206517379850387, 15480389052081010479, 18446744073709551615, 18446744073709551615, 218, 224, 218, 224, 41, 42, true, "parsed", "parsed"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206578935372333, 11959326053892654989, 18446744073709551615, 18446744073709551615, 293, 299, 293, 299, 56, 57, true, "stores", "stores"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206569317834029, 11936595447040454128, 18446744073709551615, 18446744073709551615, 364, 370, 364, 370, 68, 69, true, "allows", "allows"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 329104161785194305, 774090374362380612, 18446744073709551615, 18446744073709551615, 384, 389, 384, 389, 72, 73, true, "scale", "scale"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 6171728176299542016, 16532079825940159645, 18446744073709551615, 18446744073709551615, 431, 440, 431, 440, 81, 82, true, "processed", "processed"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541486535, 14307783832258935505, 18446744073709551615, 18446744073709551615, 510, 512, 510, 512, 96, 97, true, "is", "is"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 12178341415895640485, 17882295449136083937, 18446744073709551615, 18446744073709551615, 520, 523, 520, 523, 99, 100, true, "put", "put"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 12178341415895516060, 17882004701528519561, 18446744073709551615, 18446744073709551615, 557, 560, 557, 560, 108, 109, true, "use", "use"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206594265787492, 12314552731610003625, 18446744073709551615, 18446744073709551615, 587, 593, 587, 593, 115, 116, true, "manage", "manage"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 389609625631229034, 10076688135462708121, 18446744073709551615, 18446744073709551615, 20, 24, 20, 24, 6, 7, true, "that", "that"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206568455155979, 11882547791157225115, 18446744073709551615, 18446744073709551615, 54, 60, 54, 60, 12, 14, true, "as the", "as the"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 14637917359887717745, 15358780278905995948, 18446744073709551615, 18446744073709551615, 69, 77, 69, 77, 15, 17, true, "from the", "from the"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 12178341415895623120, 17882259379782471845, 18446744073709551615, 18446744073709551615, 123, 126, 123, 126, 24, 25, true, "out", "out"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541485670, 14307783790885956650, 18446744073709551615, 18446744073709551615, 127, 129, 127, 129, 25, 26, true, "of", "of"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 389609625631229034, 10076688135462665568, 18446744073709551615, 18446744073709551615, 160, 164, 160, 164, 31, 32, true, "that", "that"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206564601699726, 11976296382474591180, 18446744073709551615, 18446744073709551615, 208, 217, 208, 217, 39, 41, true, "eg the", "e. g. the"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8106342927225405366, 3870861981915582763, 18446744073709551615, 18446744073709551615, 313, 320, 313, 320, 59, 61, true, "of each", "of each"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206560518651853, 14827520425559992801, 18446744073709551615, 18446744073709551615, 326, 332, 326, 332, 62, 64, true, "in the", "in the"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 389609625618037948, 10078261007606591819, 18446744073709551615, 18446744073709551615, 402, 406, 402, 406, 75, 76, true, "with", "with"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541485670, 14307783790886352415, 18446744073709551615, 18446744073709551615, 428, 430, 428, 430, 80, 81, true, "of", "of"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541486538, 14307783830786016801, 18446744073709551615, 18446744073709551615, 542, 544, 542, 544, 104, 105, true, "in", "in"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541485678, 14307783792085522482, 18446744073709551615, 18446744073709551615, 577, 579, 577, 579, 112, 113, true, "on", "on"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 329104159171729452, 15392320057005504366, 18446744073709551615, 18446744073709551615, 614, 619, 614, 619, 120, 122, true, "as an", "as an"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541485865, 14307783789814212187, 18446744073709551615, 18446744073709551615, 374, 376, 374, 376, 70, 71, true, "to", "to"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206519425733256, 15635996210936257264, 18446744073709551615, 18446744073709551615, 414, 420, 414, 420, 77, 79, true, "to the", "to the"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541485865, 14307783789814206195, 18446744073709551615, 18446744073709551615, 477, 479, 477, 479, 90, 91, true, "to", "to"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541485865, 14307783789814213462, 18446744073709551615, 18446744073709551615, 584, 586, 584, 586, 114, 115, true, "to", "to"], ["expression", "common", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15441160910541486545, 3028421707917465902, 18446744073709551615, 18446744073709551615, 69, 73, 69, 73, 13, 14, true, "ie", "i.e."], ["expression", "common", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 12178341415895450733, 6765936968254123994, 18446744073709551615, 18446744073709551615, 561, 565, 561, 565, 98, 99, true, "etc", "etc."], ["expression", "apostrophe", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 389609625696231302, 874520044884738072, 18446744073709551615, 18446744073709551615, 79, 84, 79, 84, 15, 16, true, "dont", "don't"], ["expression", "word-concatenation", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 5044385734724420019, 15039915568025682583, 18446744073709551615, 18446744073709551615, 207, 223, 207, 223, 40, 41, true, "state-of-the-art", "state-of-the-art"], ["expression", "word-concatenation", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15169931585135175826, 5000232017329418031, 18446744073709551615, 18446744073709551615, 296, 307, 296, 307, 57, 58, true, "cloud-based", "cloud-based"], ["expression", "word-concatenation", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 17036338369050073511, 11876619048247685695, 18446744073709551615, 18446744073709551615, 517, 529, 517, 529, 92, 93, true, "data-at-rest", "data-at-rest"], ["sentence", "", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 6380046225039059930, 1331102669406003609, 18446744073709551615, 18446744073709551615, 0, 125, 0, 125, 0, 26, true, "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it.", "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it."], ["sentence", "", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 17685124856943080749, 17041292620595508182, 18446744073709551615, 18446744073709551615, 126, 287, 126, 287, 26, 55, true, "This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ.", "This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ."], ["term", "enum-term-mark-4", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 6417746280621449074, 4697720481231698323, 18446744073709551615, 18446744073709551615, 259, 286, 259, 286, 49, 54, true, "MongoDB, Redis and RabbitMQ", "MongoDB, Redis and RabbitMQ"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 5470814617574924291, 6460878720216756235, 18446744073709551615, 18446744073709551615, 40, 53, 40, 53, 8, 10, true, "compute layer", "compute layer"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 1269674564249719737, 10143729425890314060, 18446744073709551615, 18446744073709551615, 154, 174, 154, 174, 32, 34, true, "additional stability", "additional stability"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 1142162931543826722, 12325457556378121938, 18446744073709551615, 18446744073709551615, 179, 199, 179, 199, 35, 38, true, "data safety concerns", "data safety concerns"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 18398403256162540896, 17634138153411653985, 18446744073709551615, 18446744073709551615, 207, 229, 207, 229, 40, 42, true, "state-of-the-art tools", "state-of-the-art tools"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 16381206568241679420, 4060891951880802543, 18446744073709551615, 18446744073709551615, 3, 9, 3, 9, 1, 2, true, "design", "design"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 990358581043194791, 13959011607976637903, 18446744073709551615, 18446744073709551615, 19, 32, 19, 32, 5, 6, true, "microservices", "microservices"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 389609625696431489, 874321965181516179, 18446744073709551615, 18446744073709551615, 96, 100, 96, 100, 18, 19, true, "data", "data"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 8106471292843117687, 12637598480251227160, 18446744073709551615, 18446744073709551615, 259, 266, 259, 266, 49, 50, true, "MongoDB", "MongoDB"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 329104162172852560, 4837073618500726237, 18446744073709551615, 18446744073709551615, 268, 273, 268, 273, 51, 52, true, "Redis", "Redis"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 14650252519075350211, 3711332974386502064, 18446744073709551615, 18446744073709551615, 278, 286, 278, 286, 53, 54, true, "RabbitMQ", "RabbitMQ"], ["verb", "compound-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 2116256630331469530, 14901345236443982027, 18446744073709551615, 18446744073709551615, 238, 249, 238, 249, 44, 46, true, "have chosen", "have chosen"], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 12178341415895564896, 6765880715380529817, 18446744073709551615, 18446744073709551615, 54, 57, 54, 57, 10, 11, true, "are", "are"], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15441160910541486545, 3028421707917465902, 18446744073709551615, 18446744073709551615, 69, 73, 69, 73, 13, 14, true, "ie", "i.e."], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 16381206594265787492, 6901918250154643292, 18446744073709551615, 18446744073709551615, 85, 91, 85, 91, 16, 17, true, "manage", "manage"], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 8106342542940968443, 7576392723763470277, 18446744073709551615, 18446744073709551615, 111, 118, 111, 118, 22, 23, true, "operate", "operate"], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 16381206569317834029, 4041469951276441022, 18446744073709551615, 18446744073709551615, 131, 137, 131, 137, 27, 28, true, "allows", "allows"], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 329104159241711190, 4780637315619849705, 18446744073709551615, 18446744073709551615, 144, 149, 144, 149, 30, 31, true, "trust", "trust"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 8106478685702231057, 6290141210502214270, 18446744073709551615, 18446744073709551615, 251, 258, 251, 258, 47, 49, true, "such as", "such as"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15441160910541480853, 3028421248297502894, 18446744073709551615, 18446744073709551615, 0, 2, 0, 2, 0, 1, true, "By", "By"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 16381206560518651853, 5658730714977917939, 18446744073709551615, 18446744073709551615, 33, 39, 33, 39, 6, 8, true, "in the", "in the"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15441160910541485678, 3028421580747422282, 18446744073709551615, 18446744073709551615, 119, 121, 119, 121, 23, 24, true, "on", "on"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 389609625631229034, 1006213453265744340, 18446744073709551615, 18446744073709551615, 230, 234, 230, 234, 42, 43, true, "that", "that"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15441160910541485865, 3028421580029648804, 18446744073709551615, 18446744073709551615, 141, 143, 141, 143, 29, 30, true, "to", "to"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 16381206519425733256, 14507313859404429169, 18446744073709551615, 18446744073709551615, 200, 206, 200, 206, 38, 40, true, "to the", "to the"], ["numval", "ival", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 17767354399704235158, 2662324577726766030, 18446744073709551615, 18446744073709551615, 132, 133, 132, 133, 25, 26, true, "6", "6"], ["parenthesis", "round brackets", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 14654063839594813536, 538351782817809335, 18446744073709551615, 18446744073709551615, 126, 134, 126, 134, 22, 27, true, "(Fig. 6)", "(Fig. 6)"], ["expression", "common", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541487324, 13392634893759554933, 18446744073709551615, 18446744073709551615, 302, 307, 302, 307, 55, 56, true, "eg", "e. g."], ["expression", "apostrophe", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 329104162099298038, 4763741573118152283, 18446744073709551615, 18446744073709551615, 432, 438, 432, 438, 74, 75, true, "didnt", "didn't"], ["expression", "word-concatenation", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 12953096966692611490, 12141023217086338960, 18446744073709551615, 18446744073709551615, 416, 431, 416, 431, 73, 74, true, "result-backends", "result-backends"], ["expression", "word-concatenation", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15312996304332666827, 6035818222047083309, 18446744073709551615, 18446744073709551615, 449, 462, 449, 462, 77, 78, true, "auto-cleaning", "auto-cleaning"], ["sentence", "", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 530368145582943314, 13295761460699684392, 18446744073709551615, 18446744073709551615, 0, 109, 0, 109, 0, 19, true, "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform.", "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform."], ["sentence", "", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 13690589465324431830, 729303492509750058, 18446744073709551615, 18446744073709551615, 110, 243, 110, 243, 19, 46, true, "From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services.", "From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services."], ["sentence", "", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 3799550530227447837, 10235095163850190658, 18446744073709551615, 18446744073709551615, 244, 397, 244, 397, 46, 70, true, "During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks.", "During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks."], ["term", "enum-term-mark-2", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 17670119798254759554, 11044523659752658873, 18446744073709551615, 18446744073709551615, 362, 384, 362, 384, 65, 68, true, "performance or scaling", "performance or scaling"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 14228775800347505852, 1270934594966554784, 18446744073709551615, 18446744073709551615, 40, 52, 40, 52, 8, 10, true, "crucial role", "crucial role"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 7308464677014704448, 5082168475013008068, 18446744073709551615, 18446744073709551615, 71, 91, 71, 91, 13, 15, true, "scaling requirements", "scaling requirements"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 5470814617574924291, 14078314495213552738, 18446744073709551615, 18446744073709551615, 157, 170, 157, 170, 33, 35, true, "compute layer", "compute layer"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 2732848371272418679, 13963794921672736533, 18446744073709551615, 18446744073709551615, 177, 196, 177, 196, 37, 39, true, "considerable amount", "considerable amount"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 9137804915913150128, 4468637458257639239, 18446744073709551615, 18446744073709551615, 225, 242, 225, 242, 43, 45, true, "external services", "external services"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 2183649553633451561, 2020007629353495081, 18446744073709551615, 18446744073709551615, 280, 296, 280, 296, 51, 53, true, "multiple options", "multiple options"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 8453966769027728994, 16465619217972769818, 18446744073709551615, 18446744073709551615, 351, 373, 351, 373, 64, 66, true, "inadequate performance", "inadequate performance"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 5368659910297958112, 6849806055120467904, 18446744073709551615, 18446744073709551615, 377, 396, 377, 396, 67, 69, true, "scaling bottlenecks", "scaling bottlenecks"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 16381206532620919857, 14992785282357168667, 18446744073709551615, 18446744073709551615, 4, 10, 4, 10, 1, 2, true, "choice", "choice"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 14635102416861801722, 6944138304193469372, 18446744073709551615, 18446744073709551615, 18, 26, 18, 26, 4, 5, true, "services", "services"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 14814125365076808131, 259608735306547128, 18446744073709551615, 18446744073709551615, 100, 108, 100, 108, 17, 18, true, "platform", "platform"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 16381206578503830159, 5707938631766375304, 18446744073709551615, 18446744073709551615, 119, 125, 119, 125, 21, 22, true, "sketch", "sketch"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 12178341415896108354, 15797295600457378135, 18446744073709551615, 18446744073709551615, 127, 130, 127, 130, 23, 24, true, "Fig", "Fig"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 10844940863803374990, 9655000055558243874, 18446744073709551615, 18446744073709551615, 200, 213, 200, 213, 40, 41, true, "communication", "communication"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 1525875096007260836, 12799154604316470877, 18446744073709551615, 18446744073709551615, 255, 266, 255, 266, 48, 49, true, "development", "development"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 14635102416861801722, 6944138304193064763, 18446744073709551615, 18446744073709551615, 331, 339, 331, 339, 61, 62, true, "services", "services"], ["verb", "compound-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 13963960872604267983, 9741187444941953278, 18446744073709551615, 18446744073709551615, 27, 37, 27, 37, 5, 7, true, "plays also", "plays also"], ["verb", "compound-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 3304740881499173399, 16233133484570305536, 18446744073709551615, 18446744073709551615, 311, 325, 311, 325, 57, 60, true, "had to replace", "had to replace"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15360283586477443351, 2607233536085319416, 18446744073709551615, 18446744073709551615, 56, 66, 56, 66, 11, 12, true, "addressing", "addressing"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541486535, 13392635867599855620, 18446744073709551615, 18446744073709551615, 139, 141, 139, 141, 29, 30, true, "is", "is"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 12178341415895601584, 15797287691917345479, 18446744073709551615, 18446744073709551615, 171, 174, 171, 174, 35, 36, true, "has", "has"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 6172092587891830137, 6944235926622048042, 18446744073709551615, 18446744073709551615, 270, 279, 270, 279, 50, 51, true, "evaluated", "evaluated"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541487324, 13392634893759554933, 18446744073709551615, 18446744073709551615, 302, 307, 302, 307, 55, 56, true, "eg", "e. g."], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 2617690495147367356, 3995802380905838725, 18446744073709551615, 18446744073709551615, 142, 152, 142, 152, 30, 32, true, "clear that", "clear that"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 16381206565712212855, 5357051908763334798, 18446744073709551615, 18446744073709551615, 11, 17, 11, 17, 2, 4, true, "of the", "of the"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541486538, 13392635867609642731, 18446744073709551615, 18446744073709551615, 53, 55, 53, 55, 10, 11, true, "in", "in"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 8106397727991264470, 7534731816831827800, 18446744073709551615, 18446744073709551615, 92, 99, 92, 99, 15, 17, true, "for the", "for the"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 14652309564084901216, 17139314077627797878, 18446744073709551615, 18446744073709551615, 110, 118, 110, 118, 19, 21, true, "From the", "From the"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541485670, 13392635753038274381, 18446744073709551615, 18446744073709551615, 197, 199, 197, 199, 39, 40, true, "of", "of"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 8601401817206609046, 13002288130139499420, 18446744073709551615, 18446744073709551615, 214, 224, 214, 224, 41, 43, true, "with these", "with these"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 1703385011780833119, 14455781933540325166, 18446744073709551615, 18446744073709551615, 244, 254, 244, 254, 46, 48, true, "During the", "During the"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 8106397858129277841, 5242979235823403228, 18446744073709551615, 18446744073709551615, 340, 347, 340, 347, 62, 63, true, "because", "because"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541485670, 13392635753038231319, 18446744073709551615, 18446744073709551615, 348, 350, 348, 350, 63, 64, true, "of", "of"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541485865, 13392635755399877181, 18446744073709551615, 18446744073709551615, 315, 317, 315, 317, 58, 59, true, "to", "to"], ["expression", "apostrophe", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 329104162099298038, 2422170512955612338, 18446744073709551615, 18446744073709551615, 27, 33, 27, 33, 6, 7, true, "didnt", "didn't"], ["sentence", "", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 9738445166753142519, 7077409306408156246, 18446744073709551615, 18446744073709551615, 4, 87, 4, 87, 1, 16, true, "GridFS storage, but it didn't fit to the constraints of typical cloud environments.", "GridFS storage, but it didn't fit to the constraints of typical cloud environments."], ["term", "single-term", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 3553616603590296979, 16097117960287067168, 18446744073709551615, 18446744073709551615, 4, 18, 4, 18, 1, 3, true, "GridFS storage", "GridFS storage"], ["term", "single-term", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 3164946639114553222, 7659937814652463492, 18446744073709551615, 18446744073709551615, 60, 86, 60, 86, 12, 15, true, "typical cloud environments", "typical cloud environments"], ["term", "single-term", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 12178341415895625823, 10663577172675311427, 18446744073709551615, 18446744073709551615, 34, 37, 34, 37, 7, 8, true, "fit", "fit"], ["term", "single-term", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 2343820404875251124, 4748486300187076231, 18446744073709551615, 18446744073709551615, 45, 56, 45, 56, 10, 11, true, "constraints", "constraints"], ["verb", "single-verb", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 329104162099298038, 2422170512955612338, 18446744073709551615, 18446744073709551615, 27, 33, 27, 33, 6, 7, true, "didnt", "didn't"], ["conn", "single-conn", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 15441160910541485670, 15469104452822855430, 18446744073709551615, 18446744073709551615, 57, 59, 57, 59, 11, 12, true, "of", "of"], ["conn", "single-conn", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 16381206519425733256, 10289373630862252080, 18446744073709551615, 18446744073709551615, 38, 44, 38, 44, 8, 10, true, "to the", "to the"], ["numval", "fval", 11417717357379295278, "TEXT", "#/texts/82", 1.0, 12178341415896306586, 2376192024093454144, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "4.2", "4.2"], ["numval", "ival", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15441160910541481862, 10500741044532715512, 18446744073709551615, 18446744073709551615, 50, 52, 50, 52, 7, 8, true, "18", "18"], ["numval", "ival", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15441160910541481863, 10500741044517231196, 18446744073709551615, 18446744073709551615, 155, 157, 155, 157, 24, 25, true, "19", "19"], ["expression", "common", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15441160910541487324, 10500757812195718645, 18446744073709551615, 18446744073709551615, 121, 126, 121, 126, 18, 19, true, "eg", "e. g."], ["expression", "word-concatenation", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 14042857724397157868, 17436499209420645038, 18446744073709551615, 18446744073709551615, 95, 105, 95, 105, 15, 16, true, "on-premise", "on-premise"], ["sentence", "", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 16473487772931696221, 1361496787505182232, 18446744073709551615, 18446744073709551615, 0, 171, 0, 171, 0, 27, true, "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution.", "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution."], ["sentence", "", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 13604474430867440219, 15920079442920273776, 18446744073709551615, 18446744073709551615, 172, 302, 172, 302, 27, 48, true, "Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", "Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints."], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 4315218641775224883, 3783623336096074444, 18446744073709551615, 18446744073709551615, 30, 49, 30, 49, 5, 7, true, "Kubernetes clusters", "Kubernetes clusters"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 7578678502347528407, 16606690075113593003, 18446744073709551615, 18446744073709551615, 66, 86, 66, 86, 10, 13, true, "many cloud providers", "many cloud providers"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 17157390005033639285, 14551521890127263578, 18446744073709551615, 18446744073709551615, 95, 119, 95, 119, 15, 17, true, "on-premise installations", "on-premise installations"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15250872047548077430, 7534455339628786157, 18446744073709551615, 18446744073709551615, 137, 154, 137, 154, 21, 24, true, "IBM Cloud Private", "IBM Cloud Private"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 17140401278227586491, 11321802952630178709, 18446744073709551615, 18446744073709551615, 207, 223, 207, 223, 33, 35, true, "storage services", "storage services"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 4047423525975715659, 7947778581648084546, 18446744073709551615, 18446744073709551615, 248, 260, 248, 260, 39, 41, true, "same cluster", "same cluster"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 14814125365076808131, 2443639570324462603, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 1, 2, true, "platform", "platform"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15441160910541487324, 10500757812195718645, 18446744073709551615, 18446744073709551615, 121, 126, 121, 126, 18, 19, true, "eg", "e. g."], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 16659280385198228594, 13641186927945667101, 18446744073709551615, 18446744073709551615, 158, 170, 158, 170, 25, 26, true, "distribution", "distribution"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 13240311013633905449, 2445508371176550978, 18446744073709551615, 18446744073709551615, 189, 201, 189, 201, 30, 31, true, "requirements", "requirements"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 6165987386346442673, 17011861032528540321, 18446744073709551615, 18446744073709551615, 292, 301, 292, 301, 46, 47, true, "endpoints", "endpoints"], ["verb", "compound-verb", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 12677136892665844646, 2032089139232006155, 18446744073709551615, 18446744073709551615, 224, 236, 224, 236, 35, 37, true, "are launched", "are launched"], ["verb", "compound-verb", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 12855573301475655422, 1573892996858218554, 18446744073709551615, 18446744073709551615, 264, 291, 264, 291, 42, 46, true, "linked to externally hosted", "linked to externally hosted"], ["verb", "single-verb", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15441160910541486535, 10500757786703375297, 18446744073709551615, 18446744073709551615, 13, 15, 13, 15, 2, 3, true, "is", "is"], ["verb", "single-verb", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 329104159157798023, 6671752901319384085, 18446744073709551615, 18446744073709551615, 127, 132, 127, 132, 19, 20, true, "using", "using"], ["verb", "single-verb", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 2906423210345501303, 16309307450075852923, 18446744073709551615, 18446744073709551615, 172, 181, 172, 181, 27, 28, true, "Depending", "Depending"], ["conn", "single-conn", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 3013597407861734098, 13645835485872550225, 18446744073709551615, 18446744073709551615, 16, 29, 16, 29, 3, 5, true, "deployable on", "deployable on"], ["conn", "single-conn", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15601168207941439665, 15242156125190384917, 18446744073709551615, 18446744073709551615, 53, 65, 53, 65, 8, 10, true, "available on", "available on"], ["conn", "single-conn", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 16381206566339127348, 12939125612892018463, 18446744073709551615, 18446744073709551615, 182, 188, 182, 188, 28, 30, true, "on the", "on the"], ["conn", "single-conn", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 5386255170026914598, 10161453367619815898, 18446744073709551615, 18446744073709551615, 237, 247, 237, 247, 37, 39, true, "inside the", "inside the"], ["conn", "single-conn", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15441160910541485865, 10500757793681888901, 18446744073709551615, 18446744073709551615, 271, 273, 271, 273, 43, 44, true, "to", "to"], ["expression", "word-concatenation", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 14352418754681794071, 3129213562618289639, 18446744073709551615, 18446744073709551615, 192, 212, 192, 212, 35, 36, true, "parsing-microservice", "parsing-microservice"], ["sentence", "", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 13622028562599160608, 11838629416984110843, 18446744073709551615, 18446744073709551615, 0, 76, 0, 76, 0, 14, true, "The common parts of all deployments are the interface and the compute layer.", "The common parts of all deployments are the interface and the compute layer."], ["sentence", "", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 8982322851077994049, 14574053650340581887, 18446744073709551615, 18446744073709551615, 77, 173, 77, 173, 14, 31, true, "The compute layer is designed for dynamically adapt the number of resources on the current load.", "The compute layer is designed for dynamically adapt the number of resources on the current load."], ["sentence", "", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 8231516465306254293, 5376868082789191066, 18446744073709551615, 18446744073709551615, 174, 445, 174, 445, 31, 77, true, "For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents.", "For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents."], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 4575700335406167488, 1041889512884127769, 18446744073709551615, 18446744073709551615, 4, 16, 4, 16, 1, 3, true, "common parts", "common parts"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 5470814617574924291, 1119950227308354530, 18446744073709551615, 18446744073709551615, 62, 75, 62, 75, 11, 13, true, "compute layer", "compute layer"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 5470814617574924291, 1119950227308355535, 18446744073709551615, 18446744073709551615, 81, 94, 81, 94, 15, 17, true, "compute layer", "compute layer"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 5679217233562387039, 10306293013634943918, 18446744073709551615, 18446744073709551615, 160, 172, 160, 172, 28, 30, true, "current load", "current load"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 7165121732645597150, 17919093041593160462, 18446744073709551615, 18446744073709551615, 192, 222, 192, 222, 35, 37, true, "parsing-microservice instances", "parsing-microservice instances"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 11579811611053762862, 5792740568999225626, 18446744073709551615, 18446744073709551615, 247, 261, 247, 261, 42, 44, true, "large document", "large document"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 5574297910769420540, 7415408366124113138, 18446744073709551615, 18446744073709551615, 374, 390, 374, 390, 66, 68, true, "other components", "other components"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 1526165531385019099, 3702094867294947886, 18446744073709551615, 18446744073709551615, 24, 35, 24, 35, 5, 6, true, "deployments", "deployments"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 6182600923960960908, 8662044929949820827, 18446744073709551615, 18446744073709551615, 44, 53, 44, 53, 8, 9, true, "interface", "interface"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 16381206574973295053, 6957832894321474609, 18446744073709551615, 18446744073709551615, 133, 139, 133, 139, 23, 24, true, "number", "number"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 6168338487309432467, 14015407302848006245, 18446744073709551615, 18446744073709551615, 143, 152, 143, 152, 25, 26, true, "resources", "resources"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 8106397496085150773, 1241946393555377686, 18446744073709551615, 18446744073709551615, 178, 185, 178, 185, 32, 33, true, "example", "example"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 12178341415895456504, 15511050211190565407, 18446744073709551615, 18446744073709551615, 320, 323, 320, 323, 54, 55, true, "end", "end"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 389609625631210899, 6431357524637287554, 18446744073709551615, 18446744073709551615, 331, 335, 331, 335, 57, 58, true, "task", "task"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 6168338487309432467, 14015407302847964601, 18446744073709551615, 18446744073709551615, 351, 360, 351, 360, 62, 63, true, "resources", "resources"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 14634153919632515335, 2667013412527336630, 18446744073709551615, 18446744073709551615, 397, 405, 397, 405, 70, 71, true, "training", "training"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 6167933651658664291, 16134555370198793815, 18446744073709551615, 18446744073709551615, 435, 444, 435, 444, 75, 76, true, "documents", "documents"], ["verb", "compound-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 9165036765200707500, 14015747365861821834, 18446744073709551615, 18446744073709551615, 95, 106, 95, 106, 17, 19, true, "is designed", "is designed"], ["verb", "compound-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 14891459320562646805, 9156075874686645300, 18446744073709551615, 18446744073709551615, 223, 239, 223, 239, 37, 40, true, "could be spawned", "could be spawned"], ["verb", "compound-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 9165313840036679968, 18156907374285878295, 18446744073709551615, 18446744073709551615, 262, 273, 262, 273, 44, 46, true, "is uploaded", "is uploaded"], ["verb", "single-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 12178341415895564896, 15510992411047215180, 18446744073709551615, 18446744073709551615, 36, 39, 36, 39, 6, 7, true, "are", "are"], ["verb", "single-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 329104159173548808, 1957182172439438990, 18446744073709551615, 18446744073709551615, 123, 128, 123, 128, 21, 22, true, "adapt", "adapt"], ["verb", "single-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 16381206579178669319, 6938028188806274364, 18446744073709551615, 18446744073709551615, 301, 307, 301, 307, 50, 51, true, "scaled", "scaled"], ["verb", "single-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 12178341415895564896, 15510992411043340629, 18446744073709551615, 18446744073709551615, 361, 364, 361, 364, 63, 64, true, "are", "are"], ["verb", "single-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 5615554093848987331, 14336271604284028764, 18446744073709551615, 18446744073709551615, 410, 420, 410, 420, 72, 73, true, "assembling", "assembling"], ["verb", "single-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 6171728176299542016, 3957840262356218692, 18446744073709551615, 18446744073709551615, 425, 434, 425, 434, 74, 75, true, "processed", "processed"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 6165459236568015364, 6061326913510427821, 18446744073709551615, 18446744073709551615, 337, 346, 337, 346, 59, 61, true, "such that", "such that"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 14637917385401805410, 5428309875437807875, 18446744073709551615, 18446744073709551615, 365, 373, 365, 373, 64, 66, true, "free for", "free for"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 16381206565712007226, 7724692166486276181, 18446744073709551615, 18446744073709551615, 17, 23, 17, 23, 3, 5, true, "of all", "of all"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 12178341415895625940, 15510989710886502910, 18446744073709551615, 18446744073709551615, 107, 110, 107, 110, 19, 20, true, "for", "for"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 15441160910541485670, 18358916429929728660, 18446744073709551615, 18446744073709551615, 140, 142, 140, 142, 24, 25, true, "of", "of"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 16381206566339127348, 7820316500598827267, 18446744073709551615, 18446744073709551615, 153, 159, 153, 159, 26, 28, true, "on the", "on the"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 12178341415896108722, 15510983418444691685, 18446744073709551615, 18446744073709551615, 174, 177, 174, 177, 31, 32, true, "For", "For"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 16381206568372064271, 10868552521626828999, 18446744073709551615, 18446744073709551615, 313, 319, 313, 319, 52, 54, true, "at the", "at the"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 16381206565712212855, 7724660172286794609, 18446744073709551615, 18446744073709551615, 324, 330, 324, 330, 55, 57, true, "of the", "of the"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 389609625633313393, 6432600486678620238, 18446744073709551615, 18446744073709551615, 392, 396, 392, 396, 69, 70, true, "like", "like"], ["sentence", "", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 9089347946185436978, 12322779939098932937, 18446744073709551615, 18446744073709551615, 0, 223, 0, 223, 0, 34, true, "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements.", "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements."], ["sentence", "", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 5619374035914941215, 13388056321170730470, 18446744073709551615, 18446744073709551615, 224, 307, 224, 307, 34, 47, true, "The parse component is indeed more demanding than the simple annotation components.", "The parse component is indeed more demanding than the simple annotation components."], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 5470814617574924291, 8565468289586536983, 18446744073709551615, 18446744073709551615, 30, 43, 30, 43, 5, 7, true, "compute layer", "compute layer"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 220880112941331342, 10757099033331540513, 18446744073709551615, 18446744073709551615, 69, 85, 69, 85, 11, 13, true, "different queues", "different queues"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 5487575286069153569, 13394910817957544454, 18446744073709551615, 18446744073709551615, 157, 176, 157, 176, 26, 28, true, "different component", "different component"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 8988400645948795194, 16485622837841306210, 18446744073709551615, 18446744073709551615, 196, 222, 196, 222, 31, 33, true, "computational requirements", "computational requirements"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 5037855592482871690, 16562149494420733590, 18446744073709551615, 18446744073709551615, 228, 243, 228, 243, 35, 37, true, "parse component", "parse component"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 1786684417185012154, 17458883524654231766, 18446744073709551615, 18446744073709551615, 278, 306, 278, 306, 43, 46, true, "simple annotation components", "simple annotation components"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 2703018952916355661, 2103701956309679472, 18446744073709551615, 18446744073709551615, 4, 14, 4, 14, 1, 2, true, "components", "components"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 14637917407223052431, 233568237166781340, 18446744073709551615, 18446744073709551615, 116, 124, 116, 124, 20, 21, true, "fraction", "fraction"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 6168338487309432467, 7308226084005327662, 18446744073709551615, 18446744073709551615, 128, 137, 128, 137, 22, 23, true, "resources", "resources"], ["verb", "compound-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 6181919818947346503, 7425024011998385797, 18446744073709551615, 18446744073709551615, 244, 253, 244, 253, 37, 39, true, "is indeed", "is indeed"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 8106478500389476193, 7333002514628894973, 18446744073709551615, 18446744073709551615, 15, 22, 15, 22, 2, 3, true, "running", "running"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 12178341415895564896, 2069865895983944783, 18446744073709551615, 18446744073709551615, 44, 47, 44, 47, 7, 8, true, "are", "are"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 6167774653473311671, 6290589207758732495, 18446744073709551615, 18446744073709551615, 56, 65, 56, 65, 9, 10, true, "organized", "organized"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 14892592691709012982, 6239765786557836013, 18446744073709551615, 18446744073709551615, 100, 111, 100, 111, 17, 19, true, "can control", "can control"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 5946734708345938643, 10385794168888707641, 18446744073709551615, 18446744073709551615, 138, 147, 138, 147, 23, 24, true, "allocated", "allocated"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 6180152660545840784, 10572035524213656485, 18446744073709551615, 18446744073709551615, 177, 186, 177, 186, 28, 29, true, "depending", "depending"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 6180164155127649426, 12351505444317336995, 18446744073709551615, 18446744073709551615, 259, 268, 259, 268, 40, 41, true, "demanding", "demanding"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 6165459236568015364, 15229237459031979782, 18446744073709551615, 18446744073709551615, 87, 96, 87, 96, 14, 16, true, "such that", "such that"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 16381206560518651853, 15245488941580331570, 18446744073709551615, 18446744073709551615, 23, 29, 23, 29, 3, 5, true, "in the", "in the"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 15441160910541486538, 16667658716295011841, 18446744073709551615, 18446744073709551615, 66, 68, 66, 68, 10, 11, true, "in", "in"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 15441160910541485670, 16667656100672477854, 18446744073709551615, 18446744073709551615, 125, 127, 125, 127, 21, 22, true, "of", "of"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 14637917333167503367, 16189411727226984898, 18446744073709551615, 18446744073709551615, 148, 156, 148, 156, 24, 26, true, "for each", "for each"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 15441160910541485678, 16667656110763672808, 18446744073709551615, 18446744073709551615, 187, 189, 187, 189, 29, 30, true, "on", "on"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 14634130760851708851, 14161918089512738998, 18446744073709551615, 18446744073709551615, 269, 277, 269, 277, 41, 43, true, "than the", "than the"], ["numval", "ival", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 17767354399704235157, 4252787363852102188, 18446744073709551615, 18446744073709551615, 39, 40, 39, 40, 7, 8, true, "5", "5"], ["numval", "ival", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 17767354399704235156, 4252787353929282716, 18446744073709551615, 18446744073709551615, 63, 64, 63, 64, 11, 12, true, "4", "4"], ["numval", "ival", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 17767354399704235152, 4252787363047610230, 18446744073709551615, 18446744073709551615, 79, 80, 79, 80, 15, 16, true, "8", "8"], ["numval", "ival", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 17767354399704235152, 4252787363047621876, 18446744073709551615, 18446744073709551615, 132, 133, 132, 133, 26, 27, true, "8", "8"], ["numval", "ival", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 17767354399704235157, 4252787363852130202, 18446744073709551615, 18446744073709551615, 342, 343, 342, 343, 63, 64, true, "5", "5"], ["sentence", "", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 6583879206628208074, 18118287548868180601, 18446744073709551615, 18446744073709551615, 0, 218, 0, 218, 0, 42, true, "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks.", "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks."], ["sentence", "", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 4563023981242652318, 13772109345187795922, 18446744073709551615, 18446744073709551615, 219, 331, 219, 331, 42, 61, true, "Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment.", "Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment."], ["sentence", "", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 14978678634121360006, 2618788565151427954, 18446744073709551615, 18446744073709551615, 332, 438, 332, 438, 61, 80, true, "Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer.", "Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer."], ["term", "enum-term-mark-2", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 2528135788265244608, 5344652883238296137, 18446744073709551615, 18446744073709551615, 179, 211, 179, 211, 36, 40, true, "learning training and prediction", "learning training and prediction"], ["term", "enum-term-mark-2", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 14743433718696772273, 15154440347503938098, 18446744073709551615, 18446744073709551615, 408, 437, 408, 437, 75, 79, true, "orchestration and store layer", "orchestration and store layer"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 16269569412982647766, 2024757386881102351, 18446744073709551615, 18446744073709551615, 15, 26, 15, 26, 3, 5, true, "main system", "main system"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 13444630328481412471, 2581523576540413652, 18446744073709551615, 18446744073709551615, 41, 57, 41, 57, 8, 10, true, "Kubernetes nodes", "Kubernetes nodes"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 6563416156472488864, 2213523264527249618, 18446744073709551615, 18446744073709551615, 65, 74, 65, 74, 12, 14, true, "CPU cores", "CPU cores"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 16269569728729474655, 9476002452497745608, 18446744073709551615, 18446744073709551615, 87, 98, 87, 98, 18, 20, true, "main memory", "main memory"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 3613081198034507866, 9231793791806678387, 18446744073709551615, 18446744073709551615, 174, 196, 174, 196, 35, 38, true, "deep learning training", "deep learning training"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 11816234786078857760, 13747687809735994367, 18446744073709551615, 18446744073709551615, 201, 217, 201, 217, 39, 41, true, "prediction tasks", "prediction tasks"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 1277611218502696979, 1791114475320884340, 18446744073709551615, 18446744073709551615, 229, 245, 229, 245, 45, 47, true, "flexible binding", "flexible binding"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15130402117130351741, 1356464078922122453, 18446744073709551615, 18446744073709551615, 266, 280, 266, 280, 50, 52, true, "specific nodes", "specific nodes"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 5422119649868232113, 9343245046888260619, 18446744073709551615, 18446744073709551615, 286, 301, 286, 301, 54, 56, true, "great advantage", "great advantage"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 3499436126781074633, 11463332423358967580, 18446744073709551615, 18446744073709551615, 309, 330, 309, 330, 58, 60, true, "Kubernetes deployment", "Kubernetes deployment"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 6641605220774829847, 5925222761573375364, 18446744073709551615, 18446744073709551615, 344, 366, 344, 366, 64, 67, true, "other virtual machines", "other virtual machines"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 3081516039280483029, 12956841069716270529, 18446744073709551615, 18446744073709551615, 426, 437, 426, 437, 77, 79, true, "store layer", "store layer"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541479948, 9352056469740640944, 18446744073709551615, 18446744073709551615, 81, 83, 81, 83, 16, 17, true, "GB", "GB"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 329104162118942300, 7223786837846062912, 18446744073709551615, 18446744073709551615, 126, 131, 126, 131, 25, 26, true, "POWER", "POWER"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 389609625621164460, 9971368100247265020, 18446744073709551615, 18446744073709551615, 134, 138, 134, 138, 27, 28, true, "node", "node"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 389609625538377862, 10024562876350301328, 18446744073709551615, 18446744073709551615, 149, 153, 149, 153, 30, 31, true, "GPUs", "GPUs"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 990358581043194791, 3870495549619153573, 18446744073709551615, 18446744073709551615, 249, 262, 249, 262, 48, 49, true, "microservices", "microservices"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 14635102416861801722, 13295514760162013366, 18446744073709551615, 18446744073709551615, 392, 400, 392, 400, 72, 73, true, "services", "services"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 4327709553742697698, 1943965779924640606, 18446744073709551615, 18446744073709551615, 408, 421, 408, 421, 75, 76, true, "orchestration", "orchestration"], ["verb", "compound-verb", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 16914551794403134749, 14289767005593609982, 18446744073709551615, 18446744073709551615, 154, 169, 154, 169, 31, 34, true, "is dedicated to", "is dedicated to"], ["verb", "compound-verb", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 556334503717216086, 14184460239514695626, 18446744073709551615, 18446744073709551615, 367, 387, 367, 387, 67, 71, true, "are employed to host", "are employed to host"], ["verb", "single-verb", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 14814150617868433693, 2828843033811804789, 18446744073709551615, 18446744073709551615, 27, 35, 27, 35, 5, 6, true, "operates", "operates"], ["verb", "single-verb", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541486535, 9351998670672415825, 18446744073709551615, 18446744073709551615, 281, 283, 281, 283, 52, 53, true, "is", "is"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541485678, 9351998466637895866, 18446744073709551615, 18446744073709551615, 36, 38, 36, 38, 6, 7, true, "on", "on"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 389609625618037948, 9971074391070552914, 18446744073709551615, 18446744073709551615, 58, 62, 58, 62, 10, 11, true, "with", "with"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541485670, 9351998465977112201, 18446744073709551615, 18446744073709551615, 84, 86, 84, 86, 17, 18, true, "of", "of"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 389609625618037948, 9971074391070570820, 18446744073709551615, 18446744073709551615, 139, 143, 139, 143, 28, 29, true, "with", "with"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541485670, 9351998465978041953, 18446744073709551615, 18446744073709551615, 246, 248, 246, 248, 47, 48, true, "of", "of"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 16381206565712212855, 15798433650653459254, 18446744073709551615, 18446744073709551615, 302, 308, 302, 308, 56, 58, true, "of the", "of the"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 16381206560518651853, 15995808466227689457, 18446744073709551615, 18446744073709551615, 401, 407, 401, 407, 73, 75, true, "in the", "in the"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 16381206519425733256, 10817242341263362701, 18446744073709551615, 18446744073709551615, 167, 173, 167, 173, 33, 35, true, "to the", "to the"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541485865, 9351998575526350776, 18446744073709551615, 18446744073709551615, 263, 265, 263, 265, 49, 50, true, "to", "to"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541485865, 9351998575526358427, 18446744073709551615, 18446744073709551615, 380, 382, 380, 382, 69, 70, true, "to", "to"], ["numval", "fval", 17769988780693768120, "TEXT", "#/texts/87", 1.0, 12178341415896306587, 11831950895164487341, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "4.3", "4.3"], ["numval", "ival", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 17767354399704235159, 6323623135901186785, 18446744073709551615, 18446744073709551615, 258, 259, 258, 259, 48, 49, true, "7", "7"], ["sentence", "", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 11703520391970010536, 357834892882144608, 18446744073709551615, 18446744073709551615, 0, 56, 0, 56, 0, 11, true, "Let us now discuss some scaling results on our platform.", "Let us now discuss some scaling results on our platform."], ["sentence", "", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8157900891627315247, 15072204804485786404, 18446744073709551615, 18446744073709551615, 57, 247, 57, 247, 11, 46, true, "As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources.", "As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources."], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 4421383392096991748, 9783447214836928971, 18446744073709551615, 18446744073709551615, 229, 246, 229, 246, 43, 45, true, "compute resources", "compute resources"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8106478445190161533, 16203321325185840639, 18446744073709551615, 18446744073709551615, 32, 39, 32, 39, 6, 7, true, "results", "results"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 14814125365076808131, 8660743237002823027, 18446744073709551615, 18446744073709551615, 47, 55, 47, 55, 9, 10, true, "platform", "platform"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 5948159060234732715, 1856499645219012237, 18446744073709551615, 18446744073709551615, 82, 91, 82, 91, 17, 18, true, "beginning", "beginning"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8106478708629288965, 9823809706360263062, 18446744073709551615, 18446744073709551615, 99, 106, 99, 106, 20, 21, true, "section", "section"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 13240311013633905449, 13295854927356281099, 18446744073709551615, 18446744073709551615, 112, 124, 112, 124, 23, 24, true, "requirements", "requirements"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 14814125365076808131, 8660743237002811080, 18446744073709551615, 18446744073709551615, 133, 141, 133, 141, 26, 27, true, "platform", "platform"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206521526353544, 17652704280141269029, 18446744073709551615, 18446744073709551615, 160, 166, 160, 166, 30, 31, true, "regard", "regard"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206574973295053, 10204329654469875979, 18446744073709551615, 18446744073709551615, 174, 180, 174, 180, 33, 34, true, "number", "number"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 329104159157820437, 7766615889727787026, 18446744073709551615, 18446744073709551615, 184, 189, 184, 189, 35, 36, true, "users", "users"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206574973295053, 10204329654469873734, 18446744073709551615, 18446744073709551615, 195, 201, 195, 201, 38, 39, true, "number", "number"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 6167933651658664291, 11552308682759832261, 18446744073709551615, 18446744073709551615, 215, 224, 215, 224, 41, 42, true, "documents", "documents"], ["verb", "compound-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 17858839733535377008, 1920907252286073734, 18446744073709551615, 18446744073709551615, 142, 154, 142, 154, 27, 29, true, "were scaling", "were scaling"], ["verb", "single-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 12178341415896275389, 7943964340963228966, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "Let", "Let"], ["verb", "single-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8106397868479560363, 17872032239065412285, 18446744073709551615, 18446744073709551615, 11, 18, 11, 18, 3, 4, true, "discuss", "discuss"], ["verb", "single-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8106478648771436891, 8278854775081679845, 18446744073709551615, 18446744073709551615, 24, 31, 24, 31, 5, 6, true, "scaling", "scaling"], ["verb", "single-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8106476015433464060, 11919317671569973370, 18446744073709551615, 18446744073709551615, 63, 70, 63, 70, 13, 14, true, "pointed", "pointed"], ["verb", "single-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 6171728176299542016, 13908821060191020107, 18446744073709551615, 18446744073709551615, 205, 214, 205, 214, 40, 41, true, "processed", "processed"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 15441160910541485678, 1498640040591994871, 18446744073709551615, 18446744073709551615, 40, 42, 40, 42, 7, 8, true, "on", "on"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 15441160910541480533, 1498641809232943552, 18446744073709551615, 18446744073709551615, 57, 59, 57, 59, 11, 12, true, "As", "As"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206560518651853, 18249880271754047870, 18446744073709551615, 18446744073709551615, 75, 81, 75, 81, 15, 17, true, "in the", "in the"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206565712212855, 14962824842694991931, 18446744073709551615, 18446744073709551615, 92, 98, 92, 98, 18, 20, true, "of the", "of the"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8106397727991264470, 14674040507008400644, 18446744073709551615, 18446744073709551615, 125, 132, 125, 132, 24, 26, true, "for the", "for the"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 389609625618037948, 16164147193980002015, 18446744073709551615, 18446744073709551615, 155, 159, 155, 159, 29, 30, true, "with", "with"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 15441160910541485670, 1498640040717785247, 18446744073709551615, 18446744073709551615, 181, 183, 181, 183, 34, 35, true, "of", "of"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 15441160910541485670, 1498640040717786992, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 39, 40, true, "of", "of"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206519425733256, 17614093764484085203, 18446744073709551615, 18446744073709551615, 167, 173, 167, 173, 31, 33, true, "to the", "to the"], ["numval", "year", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625548777057, 52173736134253972, 18446744073709551615, 18446744073709551615, 172, 176, 172, 176, 35, 36, true, "2017", "2017"], ["numval", "ival", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541481786, 6866904732321432818, 18446744073709551615, 18446744073709551615, 6, 8, 6, 8, 1, 2, true, "20", "20"], ["sentence", "", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 7518545641936550538, 13327477621696107517, 18446744073709551615, 18446744073709551615, 32, 177, 32, 177, 8, 37, true, "As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017.", "As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017."], ["sentence", "", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 10434596786350098942, 4246700910361462765, 18446744073709551615, 18446744073709551615, 178, 363, 178, 363, 37, 71, true, "It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time.", "It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time."], ["sentence", "", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15858606414680046310, 9355067359629881245, 18446744073709551615, 18446744073709551615, 364, 504, 364, 504, 71, 99, true, "Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", "Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity."], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 2903324788977241891, 10047065559827135054, 18446744073709551615, 18446744073709551615, 82, 91, 82, 91, 19, 21, true, "PDF pages", "PDF pages"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 2245603532715892325, 16478559053695087323, 18446744073709551615, 18446744073709551615, 226, 237, 226, 237, 46, 48, true, "sharp steps", "sharp steps"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 11942859038914222878, 15085431028673446657, 18446744073709551615, 18446744073709551615, 286, 301, 286, 301, 56, 58, true, "massive amounts", "massive amounts"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 7252014402665196659, 15261261573536593307, 18446744073709551615, 18446744073709551615, 342, 354, 342, 354, 66, 68, true, "small amount", "small amount"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 2245697320636800498, 85672314451497322, 18446744073709551615, 18446744073709551615, 472, 483, 472, 483, 93, 95, true, "short burst", "short burst"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 16558536334265483368, 11847226154466128513, 18446744073709551615, 18446744073709551615, 487, 503, 487, 503, 96, 98, true, "extreme activity", "extreme activity"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 16381206574973295053, 8846230013490521873, 18446744073709551615, 18446744073709551615, 52, 58, 52, 58, 14, 15, true, "number", "number"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 329104159157820437, 8249018447781337774, 18446744073709551615, 18446744073709551615, 62, 67, 62, 67, 16, 17, true, "users", "users"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625631241985, 100194020203438184, 18446744073709551615, 18446744073709551615, 126, 130, 126, 130, 26, 27, true, "time", "time"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 16381206590630165717, 11719327657280904751, 18446744073709551615, 18446744073709551615, 141, 147, 141, 147, 29, 30, true, "launch", "launch"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 8106478708506632112, 18231497537744338632, 18446744073709551615, 18446744073709551615, 155, 162, 155, 162, 32, 33, true, "service", "service"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 329104161963544245, 9857237958615296698, 18446744073709551615, 18446744073709551615, 166, 171, 166, 171, 34, 35, true, "April", "April"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 329104159157820437, 8249018447781333663, 18446744073709551615, 18446744073709551615, 260, 265, 260, 265, 52, 53, true, "users", "users"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 6167933651658664291, 12240764636372283946, 18446744073709551615, 18446744073709551615, 305, 314, 305, 314, 59, 60, true, "documents", "documents"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 8106478708506632112, 18231497537744787108, 18446744073709551615, 18446744073709551615, 324, 331, 324, 331, 62, 63, true, "service", "service"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625631241985, 100194020203425198, 18446744073709551615, 18446744073709551615, 358, 362, 358, 362, 69, 70, true, "time", "time"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 16381206568241679420, 9368345895491961575, 18446744073709551615, 18446744073709551615, 375, 381, 375, 381, 74, 75, true, "design", "design"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 8106476000253296785, 450489361038114021, 18446744073709551615, 18446744073709551615, 396, 403, 396, 403, 80, 81, true, "problem", "problem"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 329104161666914718, 9871052204414646047, 18446744073709551615, 18446744073709551615, 425, 430, 425, 430, 84, 85, true, "peaks", "peaks"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 8106478708506632112, 18231497537744780505, 18446744073709551615, 18446744073709551615, 439, 446, 439, 446, 87, 88, true, "service", "service"], ["verb", "compound-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 11953671505157202285, 11937015333801488817, 18446744073709551615, 18446744073709551615, 92, 120, 92, 120, 21, 25, true, "has been increasing steadily", "has been increasing steadily"], ["verb", "compound-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15603889104119874938, 7803556645500016268, 18446744073709551615, 18446744073709551615, 181, 191, 181, 191, 38, 40, true, "is however", "is however"], ["verb", "compound-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 7806959182595507225, 2570245507595885020, 18446744073709551615, 18446744073709551615, 266, 285, 266, 285, 53, 56, true, "have been uploading", "have been uploading"], ["verb", "compound-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 8106477873809970266, 16668743459306840426, 18446744073709551615, 18446744073709551615, 386, 393, 386, 393, 77, 79, true, "was not", "was not"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 8106397812083771063, 12986611233641368860, 18446744073709551615, 18446744073709551615, 39, 46, 39, 46, 10, 12, true, "can see", "can see"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 6171728176299542016, 11463438981088562167, 18446744073709551615, 18446744073709551615, 72, 81, 72, 81, 18, 19, true, "processed", "processed"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 12178341415895638617, 17393490097429873872, 18446744073709551615, 18446744073709551615, 207, 210, 207, 210, 42, 43, true, "see", "see"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 12178341415895564896, 17393508726112862574, 18446744073709551615, 18446744073709551615, 222, 225, 222, 225, 45, 46, true, "are", "are"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 5581574448026047221, 15445029894002382055, 18446744073709551615, 18446744073709551615, 239, 249, 239, 249, 49, 50, true, "indicating", "indicating"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 6807190128157759045, 13053632147510739476, 18446744073709551615, 18446744073709551615, 407, 418, 407, 418, 82, 83, true, "accommodate", "accommodate"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 12178341415895525606, 17393661643866953573, 18446744073709551615, 18446744073709551615, 447, 450, 447, 450, 88, 89, true, "was", "was"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 16381206485955868973, 14033094471219798649, 18446744073709551615, 18446744073709551615, 459, 465, 459, 465, 91, 92, true, "handle", "handle"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541480533, 6866903594362655679, 18446744073709551615, 18446744073709551615, 32, 34, 32, 34, 8, 9, true, "As", "As"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485670, 6866903700093396448, 18446744073709551615, 18446744073709551615, 59, 61, 59, 61, 15, 16, true, "of", "of"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625618865305, 100185561388315538, 18446744073709551615, 18446744073709551615, 121, 125, 121, 125, 25, 26, true, "over", "over"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 6168057894310307081, 494748048694411645, 18446744073709551615, 18446744073709551615, 131, 140, 131, 140, 27, 29, true, "since the", "since the"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485670, 6866903700093386593, 18446744073709551615, 18446744073709551615, 148, 150, 148, 150, 30, 31, true, "of", "of"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541486538, 6866903364400136605, 18446744073709551615, 18446744073709551615, 163, 165, 163, 165, 33, 34, true, "in", "in"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625631229034, 100122430387311494, 18446744073709551615, 18446744073709551615, 211, 215, 211, 215, 43, 44, true, "that", "that"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 3504047303126433547, 11549299435570708613, 18446744073709551615, 18446744073709551615, 250, 259, 250, 259, 50, 52, true, "that some", "that some"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485670, 6866903700093379447, 18446744073709551615, 18446744073709551615, 302, 304, 302, 304, 58, 59, true, "of", "of"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 14637953883063114384, 1261358077151630278, 18446744073709551615, 18446744073709551615, 315, 323, 315, 323, 60, 62, true, "into the", "into the"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625698530964, 94041907290639477, 18446744073709551615, 18446744073709551615, 332, 336, 332, 336, 63, 65, true, "in a", "in a"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485670, 6866903700093367010, 18446744073709551615, 18446744073709551615, 355, 357, 355, 357, 68, 69, true, "of", "of"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485670, 6866903700093361485, 18446744073709551615, 18446744073709551615, 484, 486, 484, 486, 95, 96, true, "of", "of"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485865, 6866903731987646441, 18446744073709551615, 18446744073709551615, 204, 206, 204, 206, 41, 42, true, "to", "to"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485865, 6866903731988159776, 18446744073709551615, 18446744073709551615, 368, 370, 368, 370, 72, 73, true, "to", "to"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485865, 6866903731988157479, 18446744073709551615, 18446744073709551615, 404, 406, 404, 406, 81, 82, true, "to", "to"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485865, 6866903731988157354, 18446744073709551615, 18446744073709551615, 456, 458, 456, 458, 90, 91, true, "to", "to"], ["numval", "ival", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 17767354399704235152, 10891777227864623310, 18446744073709551615, 18446744073709551615, 10, 11, 10, 11, 2, 3, true, "8", "8"], ["parenthesis", "round brackets", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 4022242346074010063, 12541000686584287248, 18446744073709551615, 18446744073709551615, 74, 178, 74, 178, 14, 33, true, "(i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON)", "(i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON)"], ["expression", "common", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541486545, 15841608933708088140, 18446744073709551615, 18446744073709551615, 75, 79, 75, 79, 15, 16, true, "ie", "i.e."], ["expression", "word-concatenation", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 2217258678859216685, 3493505507787421146, 18446744073709551615, 18446744073709551615, 621, 639, 621, 639, 119, 120, true, "better-than-linear", "better-than-linear"], ["expression", "word-concatenation", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 6285955549867796622, 17538568638231419383, 18446744073709551615, 18446744073709551615, 1121, 1137, 1121, 1137, 209, 210, true, "time-to-solution", "time-to-solution"], ["expression", "word-concatenation", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14639522327238241124, 8193922819820873277, 18446744073709551615, 18446744073709551615, 1155, 1163, 1155, 1163, 213, 214, true, "job-size", "job-size"], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 17950606815080185664, 182687704084943809, 18446744073709551615, 18446744073709551615, 0, 228, 0, 228, 0, 42, true, "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources.", "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 9291869836472436551, 7073966199782583842, 18446744073709551615, 18446744073709551615, 229, 320, 229, 320, 42, 58, true, "We show this scaling by displaying the speedup versus the number of worker nodes available.", "We show this scaling by displaying the speedup versus the number of worker nodes available."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 11942797776008272897, 18354315767267706544, 18446744073709551615, 18446744073709551615, 321, 448, 321, 448, 58, 83, true, "Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores.", "Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 1337110641996971981, 12497994289374110365, 18446744073709551615, 18446744073709551615, 449, 580, 449, 580, 83, 111, true, "As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes.", "As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 7140787443244237501, 14548003650603154277, 18446744073709551615, 18446744073709551615, 581, 724, 581, 724, 111, 135, true, "Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker.", "Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 7774794569544328631, 12157835320367080769, 18446744073709551615, 18446744073709551615, 725, 876, 725, 876, 135, 166, true, "The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level.", "The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 11827058603931473819, 1210577326445407272, 18446744073709551615, 18446744073709551615, 877, 1042, 877, 1042, 166, 194, true, "The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes.", "The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8702943219455942098, 9096969319153565329, 18446744073709551615, 18446744073709551615, 1043, 1164, 1043, 1164, 194, 215, true, "Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size.", "Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size."], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8172710598775048780, 6087292431822475163, 18446744073709551615, 18446744073709551615, 46, 73, 46, 73, 11, 14, true, "main pipeline microservices", "main pipeline microservices"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 12653831733608918357, 5399694712153694222, 18446744073709551615, 18446744073709551615, 95, 108, 95, 108, 19, 21, true, "PDF documents", "PDF documents"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 12400507963759742880, 6135519514760056473, 18446744073709551615, 18446744073709551615, 297, 309, 297, 309, 54, 56, true, "worker nodes", "worker nodes"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 4940765489471971613, 347303216115352656, 18446744073709551615, 18446744073709551615, 370, 391, 370, 391, 68, 70, true, "pipeline microservice", "pipeline microservice"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 10318072901532559633, 7254172824621403054, 18446744073709551615, 18446744073709551615, 507, 519, 507, 519, 96, 98, true, "tasks scales", "tasks scales"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 18001738063114990140, 7718080442102537061, 18446744073709551615, 18446744073709551615, 621, 647, 621, 647, 119, 121, true, "better-than-linear speedup", "better-than-linear speedup"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 3088520230983972493, 6524782884039209835, 18446744073709551615, 18446744073709551615, 670, 691, 670, 691, 126, 128, true, "bandwidth constraints", "bandwidth constraints"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14290393742330326868, 1869283060159003292, 18446744073709551615, 18446744073709551615, 744, 758, 744, 758, 139, 141, true, "assemble tasks", "assemble tasks"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 13968810274884964698, 7333175022141755015, 18446744073709551615, 18446744073709551615, 865, 875, 865, 875, 163, 165, true, "page level", "page level"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 18404777356709557822, 5867368598465364348, 18446744073709551615, 18446744073709551615, 938, 952, 938, 952, 177, 179, true, "load imbalance", "load imbalance"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 12400507963759742880, 6135519514760170296, 18446744073709551615, 18446744073709551615, 965, 977, 965, 977, 181, 183, true, "worker nodes", "worker nodes"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 12569603855738370264, 1147410557148444790, 18446744073709551615, 18446744073709551615, 1023, 1041, 1023, 1041, 190, 193, true, "large corpus sizes", "large corpus sizes"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 4421383392096991748, 17586453718413772848, 18446744073709551615, 18446744073709551615, 1082, 1099, 1082, 1099, 202, 204, true, "compute resources", "compute resources"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206514091025767, 11298218412956847237, 18446744073709551615, 18446744073709551615, 3, 9, 3, 9, 1, 2, true, "Figure", "Figure"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478648771436891, 15412781195243883400, 18446744073709551615, 18446744073709551615, 25, 32, 25, 32, 7, 8, true, "scaling", "scaling"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106479143794098783, 12796848297776230218, 18446744073709551615, 18446744073709551615, 84, 91, 84, 91, 17, 18, true, "parsing", "parsing"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106464587473865376, 14658563877589949653, 18446744073709551615, 18446744073709551615, 119, 126, 119, 126, 23, 24, true, "machine", "machine"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206567230470443, 6941201434190273501, 18446744073709551615, 18446744073709551615, 135, 141, 135, 141, 25, 26, true, "models", "models"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 2703018679320364082, 5037546725350435645, 18446744073709551615, 18446744073709551615, 146, 156, 146, 156, 27, 28, true, "conversion", "conversion"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 6167933651658664291, 11495196011120521523, 18446744073709551615, 18446744073709551615, 160, 169, 160, 169, 29, 30, true, "documents", "documents"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625541450799, 11852205465525539051, 18446744073709551615, 18446744073709551615, 173, 177, 173, 177, 31, 32, true, "JSON", "JSON"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14814125365076808131, 1145249750150199435, 18446744073709551615, 18446744073709551615, 186, 194, 186, 194, 35, 36, true, "platform", "platform"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206521526353544, 6483551066150257775, 18446744073709551615, 18446744073709551615, 200, 206, 200, 206, 37, 38, true, "regard", "regard"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 6168338487309432467, 8730004420584075578, 18446744073709551615, 18446744073709551615, 218, 227, 218, 227, 40, 41, true, "resources", "resources"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478648771436891, 15412781195243911626, 18446744073709551615, 18446744073709551615, 242, 249, 242, 249, 45, 46, true, "scaling", "scaling"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478695960615463, 15062829371033729664, 18446744073709551615, 18446744073709551615, 268, 275, 268, 275, 49, 50, true, "speedup", "speedup"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206574973295053, 7759102089231672623, 18446744073709551615, 18446744073709551615, 287, 293, 287, 293, 52, 53, true, "number", "number"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478059506484182, 992201111003243269, 18446744073709551615, 18446744073709551615, 349, 356, 349, 356, 65, 66, true, "workers", "workers"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206557159905849, 4500784834839577245, 18446744073709551615, 18446744073709551615, 404, 410, 404, 410, 73, 74, true, "worker", "worker"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625621164460, 11851913345703152408, 18446744073709551615, 18446744073709551615, 427, 431, 427, 431, 78, 79, true, "node", "node"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 329104161555640697, 12658575526930668613, 18446744073709551615, 18446744073709551615, 442, 447, 442, 447, 81, 82, true, "cores", "cores"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478695960615463, 15062829371036718218, 18446744073709551615, 18446744073709551615, 473, 480, 473, 480, 89, 90, true, "speedup", "speedup"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 329104161667983915, 11953058923899695299, 18446744073709551615, 18446744073709551615, 488, 493, 488, 493, 92, 93, true, "parse", "parse"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541480579, 15841608372110957817, 18446744073709551615, 18446744073709551615, 498, 500, 498, 500, 94, 95, true, "ML", "ML"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206574973295053, 7759102089231787311, 18446744073709551615, 18446744073709551615, 542, 548, 542, 548, 102, 103, true, "number", "number"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478059506484182, 992201111003203952, 18446744073709551615, 18446744073709551615, 552, 559, 552, 559, 104, 105, true, "workers", "workers"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 329104161758737773, 12673240055158662957, 18446744073709551615, 18446744073709551615, 574, 579, 574, 579, 109, 110, true, "nodes", "nodes"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14652257141644167489, 8090587240840002892, 18446744073709551615, 18446744073709551615, 699, 707, 699, 707, 130, 131, true, "baseline", "baseline"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206557159905849, 4500784834839606045, 18446744073709551615, 18446744073709551615, 717, 723, 717, 723, 133, 134, true, "worker", "worker"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478695960615463, 15062829371033753418, 18446744073709551615, 18446744073709551615, 729, 736, 729, 736, 136, 137, true, "speedup", "speedup"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 2703018939289543887, 15004756027981769615, 18446744073709551615, 18446744073709551615, 763, 773, 763, 773, 143, 144, true, "comparison", "comparison"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625631210899, 11837631279683138832, 18446744073709551615, 18446744073709551615, 804, 808, 804, 808, 151, 152, true, "task", "task"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14650401089286948001, 16245137145237128880, 18446744073709551615, 18446744073709551615, 841, 849, 841, 849, 158, 159, true, "document", "document"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 11600564911974996302, 14480046541473745390, 18446744073709551615, 18446744073709551615, 881, 892, 881, 892, 167, 168, true, "variability", "variability"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206590668214829, 418453764520998193, 18446744073709551615, 18446744073709551615, 900, 906, 900, 906, 170, 171, true, "length", "length"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 6167933651658664291, 11495196011120532495, 18446744073709551615, 18446744073709551615, 910, 919, 910, 919, 172, 173, true, "documents", "documents"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 329104161571401725, 12673278341361969037, 18446744073709551615, 18446744073709551615, 1103, 1108, 1103, 1108, 205, 206, true, "order", "order"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 6285955549867796622, 17538568638231419383, 18446744073709551615, 18446744073709551615, 1121, 1137, 1121, 1137, 209, 210, true, "time-to-solution", "time-to-solution"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14639522327238241124, 8193922819820873277, 18446744073709551615, 18446744073709551615, 1155, 1163, 1155, 1163, 213, 214, true, "job-size", "job-size"], ["verb", "compound-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16437991637119672281, 10698691307559986577, 18446744073709551615, 18446744073709551615, 330, 343, 330, 343, 61, 64, true, "chose to have", "chose to have"], ["verb", "compound-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15603860935510693939, 14727933870906010759, 18446744073709551615, 18446744073709551615, 411, 421, 411, 421, 74, 76, true, "is running", "is running"], ["verb", "compound-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 4859670139939149227, 6280919696978205559, 18446744073709551615, 18446744073709551615, 818, 833, 818, 833, 154, 156, true, "be parallelised", "be parallelised"], ["verb", "compound-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 2871347585595950403, 17619376144327821929, 18446744073709551615, 18446744073709551615, 920, 932, 920, 932, 173, 175, true, "is reflected", "is reflected"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625741152123, 11952417481958675502, 18446744073709551615, 18446744073709551615, 16, 20, 16, 20, 5, 6, true, "show", "show"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541486545, 15841608933708088140, 18446744073709551615, 18446744073709551615, 75, 79, 75, 79, 15, 16, true, "ie", "i.e."], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14650448030444381648, 6220015674503799158, 18446744073709551615, 18446744073709551615, 110, 118, 110, 118, 22, 23, true, "applying", "applying"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106342444693204894, 3141726382109473264, 18446744073709551615, 18446744073709551615, 127, 134, 127, 134, 24, 25, true, "learned", "learned"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106398484825895017, 9843385479250406664, 18446744073709551615, 18446744073709551615, 210, 217, 210, 217, 39, 40, true, "compute", "compute"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625741152123, 11952417481958832640, 18446744073709551615, 18446744073709551615, 232, 236, 232, 236, 43, 44, true, "show", "show"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 5314879136556773391, 10872235211663904849, 18446744073709551615, 18446744073709551615, 253, 263, 253, 263, 47, 48, true, "displaying", "displaying"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206519567123880, 11458061957152441695, 18446744073709551615, 18446744073709551615, 276, 282, 276, 282, 50, 51, true, "versus", "versus"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478708506631920, 15215214992224138903, 18446744073709551615, 18446744073709551615, 357, 364, 357, 364, 66, 67, true, "serving", "serving"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14892726175400695403, 11929108714311456052, 18446744073709551615, 18446744073709551615, 456, 467, 456, 467, 85, 87, true, "can observe", "can observe"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 329104159174415764, 15201692130147370947, 18446744073709551615, 18446744073709551615, 501, 506, 501, 506, 95, 96, true, "apply", "apply"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106342033696543838, 12894177882275964000, 18446744073709551615, 18446744073709551615, 602, 609, 602, 609, 116, 117, true, "observe", "observe"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106397800846024988, 10875439300264671294, 18446744073709551615, 18446744073709551615, 655, 662, 655, 662, 123, 124, true, "appears", "appears"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14637929372960545624, 5914110341326941133, 18446744073709551615, 18446744073709551615, 775, 783, 775, 783, 145, 146, true, "flattens", "flattens"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14650440089690345452, 13442210495055254219, 18446744073709551615, 18446744073709551615, 992, 1000, 992, 1000, 186, 187, true, "averages", "averages"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 12178341415895564896, 12750707122609445157, 18446744073709551615, 18446744073709551615, 1060, 1063, 1060, 1063, 197, 198, true, "are", "are"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 329104161785194305, 12659210101863210938, 18446744073709551615, 18446744073709551615, 1072, 1077, 1072, 1077, 200, 201, true, "scale", "scale"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625632420840, 11837667465904018998, 18446744073709551615, 18446744073709551615, 1112, 1116, 1112, 1116, 207, 208, true, "keep", "keep"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 4606782280409462864, 1759956167227045589, 18446744073709551615, 18446744073709551615, 1138, 1150, 1138, 1150, 210, 212, true, "constant for", "constant for"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541480354, 15841607655799874221, 18446744073709551615, 18446744073709551615, 0, 2, 0, 2, 0, 1, true, "In", "In"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206565712212855, 6967163224569431769, 18446744073709551615, 18446744073709551615, 33, 39, 33, 39, 8, 10, true, "of the", "of the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485670, 15841608916207186363, 18446744073709551615, 18446744073709551615, 92, 94, 92, 94, 18, 19, true, "of", "of"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485670, 15841608916207173993, 18446744073709551615, 18446744073709551615, 157, 159, 157, 159, 28, 29, true, "of", "of"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206566339127348, 6948695024569863629, 18446744073709551615, 18446744073709551615, 179, 185, 179, 185, 33, 35, true, "on the", "on the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625618037948, 11853381785047006947, 18446744073709551615, 18446744073709551615, 195, 199, 195, 199, 36, 37, true, "with", "with"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541486989, 15841608542350473314, 18446744073709551615, 18446744073709551615, 250, 252, 250, 252, 46, 47, true, "by", "by"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485670, 15841608916207199049, 18446744073709551615, 18446744073709551615, 294, 296, 294, 296, 53, 54, true, "of", "of"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14091433066300748251, 10813497490882117614, 18446744073709551615, 18446744073709551615, 393, 403, 393, 403, 71, 73, true, "since each", "since each"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625618762887, 11853298334064133313, 18446744073709551615, 18446744073709551615, 422, 426, 422, 426, 76, 78, true, "on a", "on a"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625618037948, 11853381785046890442, 18446744073709551615, 18446744073709551615, 432, 436, 432, 436, 79, 80, true, "with", "with"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541480533, 15841607645344043171, 18446744073709551615, 18446744073709551615, 449, 451, 449, 451, 83, 84, true, "As", "As"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206560518651853, 299391170561034072, 18446744073709551615, 18446744073709551615, 481, 487, 481, 487, 90, 92, true, "in the", "in the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 5535791613041986682, 1987456193213323417, 18446744073709551615, 18446744073709551615, 529, 541, 529, 541, 99, 102, true, "with the the", "with the the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485670, 15841608916207808715, 18446744073709551615, 18446744073709551615, 549, 551, 549, 551, 103, 104, true, "of", "of"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206566339127348, 6948695024569829645, 18446744073709551615, 18446744073709551615, 692, 698, 692, 698, 128, 130, true, "on the", "on the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625618037948, 11853381785046972707, 18446744073709551615, 18446744073709551615, 708, 712, 708, 712, 131, 132, true, "with", "with"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206566339127348, 6948695024569840929, 18446744073709551615, 18446744073709551615, 737, 743, 737, 743, 137, 139, true, "on the", "on the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541486538, 15841608934679761481, 18446744073709551615, 18446744073709551615, 760, 762, 760, 762, 142, 143, true, "in", "in"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106397797884119903, 2253942102629974295, 18446744073709551615, 18446744073709551615, 796, 803, 796, 803, 149, 151, true, "as this", "as this"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206566339127348, 6948695024569822665, 18446744073709551615, 18446744073709551615, 834, 840, 834, 840, 156, 158, true, "on the", "on the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206566339127348, 6948695024569833459, 18446744073709551615, 18446744073709551615, 858, 864, 858, 864, 161, 163, true, "on the", "on the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206560518651853, 299391170561066249, 18446744073709551615, 18446744073709551615, 893, 899, 893, 899, 168, 170, true, "in the", "in the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485670, 15841608916207884744, 18446744073709551615, 18446744073709551615, 907, 909, 907, 909, 171, 172, true, "of", "of"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625698530964, 11943889382951508475, 18446744073709551615, 18446744073709551615, 933, 937, 933, 937, 175, 177, true, "in a", "in a"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 2011002864325523456, 9569710153707003014, 18446744073709551615, 18446744073709551615, 953, 964, 953, 964, 179, 181, true, "between the", "between the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625618037948, 11853381785046974620, 18446744073709551615, 18446744073709551615, 1005, 1009, 1005, 1009, 188, 189, true, "with", "with"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541486538, 15841608934679158336, 18446744073709551615, 18446744073709551615, 1100, 1102, 1100, 1102, 204, 205, true, "in", "in"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485865, 15841608914925665456, 18446744073709551615, 18446744073709551615, 170, 172, 170, 172, 30, 31, true, "to", "to"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485865, 15841608914925705101, 18446744073709551615, 18446744073709551615, 207, 209, 207, 209, 38, 39, true, "to", "to"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485865, 15841608914925660269, 18446744073709551615, 18446744073709551615, 336, 338, 336, 338, 62, 63, true, "to", "to"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485865, 15841608914925567374, 18446744073709551615, 18446744073709551615, 667, 669, 667, 669, 125, 126, true, "to", "to"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485865, 15841608914925573526, 18446744073709551615, 18446744073709551615, 1069, 1071, 1069, 1071, 199, 200, true, "to", "to"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485865, 15841608914925579244, 18446744073709551615, 18446744073709551615, 1109, 1111, 1109, 1111, 206, 207, true, "to", "to"], ["numval", "ival", 7794115281016062068, "TEXT", "#/texts/91", 1.0, 17767354399704235157, 9706977069123592745, 18446744073709551615, 18446744073709551615, 0, 1, 0, 1, 0, 1, true, "5", "5"], ["sentence", "", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 657005981473069779, 855544429870992132, 18446744073709551615, 18446744073709551615, 0, 276, 0, 276, 0, 48, true, "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation.", "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation."], ["term", "enum-term-mark-2", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 15515663500877316360, 9779616463577804921, 18446744073709551615, 18446744073709551615, 70, 88, 70, 88, 14, 17, true, "parse and annotate", "parse and annotate"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 3764444893564113560, 6998103868267134559, 18446744073709551615, 18446744073709551615, 80, 98, 80, 98, 16, 18, true, "annotate documents", "annotate documents"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 10640406915501670366, 17510700195140757278, 18446744073709551615, 18446744073709551615, 204, 222, 204, 222, 37, 39, true, "ingested documents", "ingested documents"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 7992990666316029472, 588680236579575873, 18446744073709551615, 18446744073709551615, 245, 275, 245, 275, 44, 47, true, "structured data representation", "structured data representation"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 14814125365076808131, 17380690645567101284, 18446744073709551615, 18446744073709551615, 42, 50, 42, 50, 8, 9, true, "platform", "platform"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 329104161667983915, 6897824401029810817, 18446744073709551615, 18446744073709551615, 70, 75, 70, 75, 14, 15, true, "parse", "parse"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 329104159241569908, 5347589032455145571, 18446744073709551615, 18446744073709551615, 118, 123, 118, 123, 22, 23, true, "train", "train"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 8106464587473865376, 2835791627105026255, 18446744073709551615, 18446744073709551615, 141, 148, 141, 148, 26, 27, true, "machine", "machine"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 16381206567230470443, 15747559098284370939, 18446744073709551615, 18446744073709551615, 158, 164, 158, 164, 28, 29, true, "models", "models"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 329104161571401725, 6886205595361632203, 18446744073709551615, 18446744073709551615, 168, 173, 168, 173, 30, 31, true, "order", "order"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 8106398484416916345, 12761515448611326706, 18446744073709551615, 18446744073709551615, 189, 196, 189, 196, 34, 35, true, "content", "content"], ["verb", "compound-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 13481804153867000640, 5921155616940636913, 18446744073709551615, 18446744073709551615, 3, 17, 3, 17, 1, 3, true, "have presented", "have presented"], ["verb", "single-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 329104159219515955, 5354365758875797437, 18446744073709551615, 18446744073709551615, 36, 41, 36, 41, 7, 8, true, "based", "based"], ["verb", "single-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 2873440693780286732, 4285909795825994377, 18446744073709551615, 18446744073709551615, 58, 68, 58, 68, 11, 13, true, "can ingest", "can ingest"], ["verb", "single-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 14650447832610756948, 58344428677043651, 18446744073709551615, 18446744073709551615, 132, 140, 132, 140, 25, 26, true, "advanced", "advanced"], ["verb", "single-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 14639581097006750428, 9301901239831102358, 18446744073709551615, 18446744073709551615, 149, 157, 149, 157, 27, 28, true, "learning", "learning"], ["verb", "single-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 8106397496930289884, 12659480351226973306, 18446744073709551615, 18446744073709551615, 177, 184, 177, 184, 32, 33, true, "extract", "extract"], ["verb", "single-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 8106398484416229602, 12761525411618135232, 18446744073709551615, 18446744073709551615, 227, 234, 227, 234, 40, 41, true, "convert", "convert"], ["conn", "single-conn", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 15441160910541486538, 5009177319356301500, 18446744073709551615, 18446744073709551615, 165, 167, 165, 167, 29, 30, true, "in", "in"], ["conn", "single-conn", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 16381206565712212855, 15774434041302426793, 18446744073709551615, 18446744073709551615, 197, 203, 197, 203, 35, 37, true, "of the", "of the"], ["conn", "single-conn", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 16381206560517276114, 4138873729787782213, 18446744073709551615, 18446744073709551615, 238, 244, 238, 244, 42, 44, true, "into a", "into a"], ["conn", "single-conn", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 15441160910541485865, 5009177098714228529, 18446744073709551615, 18446744073709551615, 174, 176, 174, 176, 31, 32, true, "to", "to"], ["sentence", "", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 5399734795549420383, 9459426372965111375, 18446744073709551615, 18446744073709551615, 0, 102, 0, 102, 0, 17, true, "The fundamental design choices in our solution have proven to enable scaling in three elementary ways.", "The fundamental design choices in our solution have proven to enable scaling in three elementary ways."], ["sentence", "", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16807999257243449869, 13297903776875612574, 18446744073709551615, 18446744073709551615, 103, 153, 103, 153, 17, 26, true, "First, it can service multiple users concurrently.", "First, it can service multiple users concurrently."], ["sentence", "", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 13213872932381400279, 13188215006770693544, 18446744073709551615, 18446744073709551615, 154, 251, 154, 251, 26, 46, true, "Second, it can ingest, parse and apply machine learned models on many documents at the same time.", "Second, it can ingest, parse and apply machine learned models on many documents at the same time."], ["sentence", "", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 9242556889455087990, 15971994141625715858, 18446744073709551615, 18446744073709551615, 252, 468, 252, 468, 46, 85, true, "Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources.", "Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources."], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 2281965028547407404, 4585290171111099204, 18446744073709551615, 18446744073709551615, 4, 30, 4, 30, 1, 4, true, "fundamental design choices", "fundamental design choices"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 10458141827175777973, 662304186431118688, 18446744073709551615, 18446744073709551615, 86, 101, 86, 101, 14, 16, true, "elementary ways", "elementary ways"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 17200993861033027072, 10058402512815484380, 18446744073709551615, 18446744073709551615, 125, 139, 125, 139, 22, 24, true, "multiple users", "multiple users"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 12462088721494412558, 14795705853314288990, 18446744073709551615, 18446744073709551615, 219, 233, 219, 233, 39, 41, true, "many documents", "many documents"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 6168880476795325400, 1749109350869750559, 18446744073709551615, 18446744073709551615, 241, 250, 241, 250, 43, 45, true, "same time", "same time"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 4421383392096991748, 13841033715090277122, 18446744073709551615, 18446744073709551615, 276, 293, 276, 293, 52, 54, true, "compute resources", "compute resources"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 13127417780425802861, 18242234552519957632, 18446744073709551615, 18446744073709551615, 298, 313, 298, 313, 55, 57, true, "different tasks", "different tasks"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 871079831703051200, 5077405699917193499, 18446744073709551615, 18446744073709551615, 349, 364, 349, 364, 63, 65, true, "respective load", "respective load"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 10442897704134762600, 3285893567564519587, 18446744073709551615, 18446744073709551615, 451, 467, 451, 467, 82, 84, true, "enough resources", "enough resources"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 14635106751859230946, 764401693493390910, 18446744073709551615, 18446744073709551615, 38, 46, 38, 46, 6, 7, true, "solution", "solution"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 329104161667983915, 8353591573424153686, 18446744073709551615, 18446744073709551615, 177, 182, 177, 182, 32, 33, true, "parse", "parse"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 8106464587473865376, 16865370909529075844, 18446744073709551615, 18446744073709551615, 193, 200, 193, 200, 35, 36, true, "machine", "machine"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16381206567230470443, 6296565769111805720, 18446744073709551615, 18446744073709551615, 209, 215, 209, 215, 37, 38, true, "models", "models"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 329104161844229707, 8223163135175074012, 18446744073709551615, 18446744073709551615, 252, 257, 252, 257, 46, 47, true, "Third", "Third"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 14814125365076808131, 2596919094696196606, 18446744073709551615, 18446744073709551615, 321, 329, 321, 329, 59, 60, true, "platform", "platform"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 2703018679320364082, 16899905581150215026, 18446744073709551615, 18446744073709551615, 372, 382, 372, 382, 67, 68, true, "conversion", "conversion"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 6167933651658664291, 2405213947196016063, 18446744073709551615, 18446744073709551615, 386, 395, 386, 395, 69, 70, true, "documents", "documents"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 14814125365076808131, 2596919094696188986, 18446744073709551615, 18446744073709551615, 403, 411, 403, 411, 72, 73, true, "platform", "platform"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 329104159219994925, 8265223761504278760, 18446744073709551615, 18446744073709551615, 422, 427, 422, 427, 76, 77, true, "times", "times"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 389609625631241985, 2218225659402359325, 18446744073709551615, 18446744073709551615, 439, 443, 439, 443, 79, 80, true, "time", "time"], ["verb", "compound-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 3403952970044578622, 10903875917460680118, 18446744073709551615, 18446744073709551615, 47, 76, 47, 76, 7, 12, true, "have proven to enable scaling", "have proven to enable scaling"], ["verb", "compound-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15412069422981600492, 6547325180036345245, 18446744073709551615, 18446744073709551615, 330, 342, 330, 342, 60, 62, true, "according to", "according to"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 14892726286148891751, 10384184194505177525, 18446744073709551615, 18446744073709551615, 113, 124, 113, 124, 20, 22, true, "can service", "can service"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 2873440693780286732, 15985974084754193151, 18446744073709551615, 18446744073709551615, 165, 175, 165, 175, 29, 31, true, "can ingest", "can ingest"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 329104159174415764, 8268242647359376883, 18446744073709551615, 18446744073709551615, 187, 192, 187, 192, 34, 35, true, "apply", "apply"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 8106342444693204894, 6388168354172051323, 18446744073709551615, 18446744073709551615, 201, 208, 201, 208, 36, 37, true, "learned", "learned"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 5949049089925459445, 6157765644161528738, 18446744073709551615, 18446744073709551615, 262, 271, 262, 271, 49, 51, true, "can scale", "can scale"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541486535, 12447978246358110993, 18446744073709551615, 18446744073709551615, 412, 414, 412, 414, 73, 74, true, "is", "is"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 8106396909840561507, 15824344309645083727, 18446744073709551615, 18446744073709551615, 428, 435, 428, 435, 77, 78, true, "bounded", "bounded"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 329104159209890620, 8264779023608497036, 18446744073709551615, 18446744073709551615, 445, 450, 445, 450, 81, 82, true, "given", "given"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541486538, 12447978247236708799, 18446744073709551615, 18446744073709551615, 31, 33, 31, 33, 4, 5, true, "in", "in"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541486538, 12447978247236781944, 18446744073709551615, 18446744073709551615, 77, 79, 77, 79, 12, 13, true, "in", "in"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541485678, 12447978235992890146, 18446744073709551615, 18446744073709551615, 216, 218, 216, 218, 38, 39, true, "on", "on"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16381206568372064271, 7263818147332248111, 18446744073709551615, 18446744073709551615, 234, 240, 234, 240, 41, 43, true, "at the", "at the"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 12178341415895625940, 10930510655083395949, 18446744073709551615, 18446744073709551615, 294, 297, 294, 297, 54, 55, true, "for", "for"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16381206566339127348, 6281427824769892480, 18446744073709551615, 18446744073709551615, 314, 320, 314, 320, 57, 59, true, "on the", "on the"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16381206579218901666, 7932367388675800903, 18446744073709551615, 18446744073709551615, 365, 371, 365, 371, 65, 67, true, "so the", "so the"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541485670, 12447978245548248810, 18446744073709551615, 18446744073709551615, 383, 385, 383, 385, 68, 69, true, "of", "of"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16381206566339127348, 6281427824769909768, 18446744073709551615, 18446744073709551615, 396, 402, 396, 402, 70, 72, true, "on the", "on the"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16381206568372178543, 7263503225869480888, 18446744073709551615, 18446744073709551615, 415, 421, 415, 421, 74, 76, true, "at all", "at all"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541486538, 12447978247236683376, 18446744073709551615, 18446744073709551615, 436, 438, 436, 438, 78, 79, true, "in", "in"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541485865, 12447978233189958175, 18446744073709551615, 18446744073709551615, 59, 61, 59, 61, 9, 10, true, "to", "to"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541485865, 12447978233189813654, 18446744073709551615, 18446744073709551615, 340, 342, 340, 342, 61, 62, true, "to", "to"], ["numval", "ival", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 17767354399704235157, 6666235790308819566, 18446744073709551615, 18446744073709551615, 720, 721, 720, 721, 134, 135, true, "5", "5"], ["parenthesis", "round brackets", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 772704748867907067, 17873771936385193962, 18446744073709551615, 18446744073709551615, 215, 286, 215, 286, 42, 57, true, "(e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc)", "(e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc)"], ["parenthesis", "round brackets", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 7596548548401207156, 5106769991605743942, 18446744073709551615, 18446744073709551615, 697, 722, 697, 722, 128, 136, true, "(as is shown in Figure 5)", "(as is shown in Figure 5)"], ["expression", "common", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541487324, 12448443577304465400, 18446744073709551615, 18446744073709551615, 216, 220, 216, 220, 43, 44, true, "eg", "e.g."], ["expression", "word-concatenation", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 13953038768306043326, 2217483007470679809, 18446744073709551615, 18446744073709551615, 253, 263, 253, 263, 50, 51, true, "pie-charts", "pie-charts"], ["expression", "word-concatenation", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 5428486186575573840, 17552603483030949066, 18446744073709551615, 18446744073709551615, 412, 428, 412, 428, 80, 81, true, "image-classifier", "image-classifier"], ["sentence", "", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 9576287605285270893, 7775032662306861151, 18446744073709551615, 18446744073709551615, 0, 65, 0, 65, 0, 15, true, "In the future, we plan to extend the platform in two major areas.", "In the future, we plan to extend the platform in two major areas."], ["sentence", "", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 7980828285556281738, 2544051083396498287, 18446744073709551615, 18446744073709551615, 66, 172, 66, 172, 15, 34, true, "First, we would like to extend the number of microservices, especially with regard to image understanding.", "First, we would like to extend the number of microservices, especially with regard to image understanding."], ["sentence", "", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16816675794156539317, 4106452168371569212, 18446744073709551615, 18446744073709551615, 173, 287, 173, 287, 34, 58, true, "The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc).", "The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc)."], ["sentence", "", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16727745954821675360, 18300594417076082954, 18446744073709551615, 18446744073709551615, 288, 429, 288, 429, 58, 82, true, "The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier.", "The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier."], ["sentence", "", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 10448641789434054504, 14093320860906874170, 18446744073709551615, 18446744073709551615, 430, 513, 430, 513, 82, 98, true, "Second, we would like to improve the quality and performance of our default models.", "Second, we would like to improve the quality and performance of our default models."], ["sentence", "", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 3084657715463842285, 15630767766630582663, 18446744073709551615, 18446744073709551615, 514, 723, 514, 723, 98, 137, true, "We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5).", "We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5)."], ["term", "enum-term-mark-2", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 18219039247346551478, 2994436876810062612, 18446744073709551615, 18446744073709551615, 216, 239, 216, 239, 43, 47, true, "eg line & scatterplot", "e.g. line & scatterplot"], ["term", "enum-term-mark-2", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 2459701502714558679, 5298793252682520889, 18446744073709551615, 18446744073709551615, 467, 490, 467, 490, 90, 93, true, "quality and performance", "quality and performance"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16589376492252179077, 7295144040672653108, 18446744073709551615, 18446744073709551615, 53, 64, 53, 64, 12, 14, true, "major areas", "major areas"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8106398377967204844, 1921596529468359029, 18446744073709551615, 18446744073709551615, 216, 225, 216, 225, 43, 45, true, "eg line", "e.g. line"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 5358230985886796623, 5106522770952356562, 18446744073709551615, 18446744073709551615, 265, 280, 265, 280, 52, 54, true, "geographic maps", "geographic maps"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15357232380281159303, 112568471828176926, 18446744073709551615, 18446744073709551615, 344, 359, 344, 359, 70, 72, true, "individual type", "individual type"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 3849116425022465253, 9034086680124657749, 18446744073709551615, 18446744073709551615, 378, 403, 378, 403, 76, 78, true, "successful identification", "successful identification"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 1915006193249717419, 4993787564856558201, 18446744073709551615, 18446744073709551615, 498, 512, 498, 512, 95, 97, true, "default models", "default models"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 3374009463271020691, 3843260871587525071, 18446744073709551615, 18446744073709551615, 585, 600, 585, 600, 110, 112, true, "neural networks", "neural networks"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 10900025937134233159, 18131173731884203799, 18446744073709551615, 18446744073709551615, 636, 655, 636, 655, 118, 120, true, "photographic images", "photographic images"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 5766847864654328399, 4382574540747563376, 18446744073709551615, 18446744073709551615, 675, 696, 675, 696, 125, 128, true, "parsed document pages", "parsed document pages"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206565274670318, 14238598565348925208, 18446744073709551615, 18446744073709551615, 7, 13, 7, 13, 2, 3, true, "future", "future"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 14814125365076808131, 2312829961765099304, 18446744073709551615, 18446744073709551615, 37, 45, 37, 45, 9, 10, true, "platform", "platform"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206574973295053, 1926660952952474766, 18446744073709551615, 18446744073709551615, 101, 107, 101, 107, 23, 24, true, "number", "number"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 990358581043194791, 6104208925519602427, 18446744073709551615, 18446744073709551615, 111, 124, 111, 124, 25, 26, true, "microservices", "microservices"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206521526353544, 4410027063676095069, 18446744073709551615, 18446744073709551615, 142, 148, 142, 148, 29, 30, true, "regard", "regard"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 11827147635933835345, 3554885262491213918, 18446744073709551615, 18446744073709551615, 158, 171, 158, 171, 32, 33, true, "understanding", "understanding"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206574973295053, 1926660952951671347, 18446744073709551615, 18446744073709551615, 177, 183, 177, 183, 35, 36, true, "number", "number"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 329104159243796903, 7082055202846522668, 18446744073709551615, 18446744073709551615, 187, 192, 187, 192, 37, 38, true, "types", "types"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206560620045048, 3914201981705366923, 18446744073709551615, 18446744073709551615, 196, 202, 196, 202, 39, 40, true, "images", "images"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 1839290100020230611, 2397938769091018318, 18446744073709551615, 18446744073709551615, 228, 239, 228, 239, 46, 47, true, "scatterplot", "scatterplot"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16102584389807428912, 3793139059914902481, 18446744073709551615, 18446744073709551615, 241, 251, 241, 251, 48, 49, true, "histograms", "histograms"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 13953038768306043326, 2217483007470679809, 18446744073709551615, 18446744073709551615, 253, 263, 253, 263, 50, 51, true, "pie-charts", "pie-charts"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 389609625699055241, 7447546965782188814, 18446744073709551615, 18446744073709551615, 292, 296, 292, 296, 59, 60, true, "goal", "goal"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 389609625696431489, 7440840763973745685, 18446744073709551615, 18446744073709551615, 326, 330, 326, 330, 66, 67, true, "data", "data"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206560620045048, 3914201981705340835, 18446744073709551615, 18446744073709551615, 363, 369, 363, 369, 73, 74, true, "images", "images"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 5428486186575573840, 17552603483030949066, 18446744073709551615, 18446744073709551615, 412, 428, 412, 428, 80, 81, true, "image-classifier", "image-classifier"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8106477781724488761, 4422931059285339225, 18446744073709551615, 18446744073709551615, 467, 474, 467, 474, 90, 91, true, "quality", "quality"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 5731695876385560379, 12754564211995509475, 18446744073709551615, 18446744073709551615, 479, 490, 479, 490, 92, 93, true, "performance", "performance"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8106478445190161533, 8668956716153119308, 18446744073709551615, 18446744073709551615, 543, 550, 543, 550, 103, 104, true, "results", "results"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206560620045048, 3914201981705337550, 18446744073709551615, 18446744073709551615, 665, 671, 665, 671, 123, 124, true, "images", "images"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206514091025767, 4428872138347593094, 18446744073709551615, 18446744073709551615, 713, 719, 713, 719, 133, 134, true, "Figure", "Figure"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 6843908984328718198, 4424980337438809569, 18446744073709551615, 18446744073709551615, 18, 32, 18, 32, 5, 8, true, "plan to extend", "plan to extend"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 14998042519330616781, 177101627084045088, 18446744073709551615, 18446744073709551615, 76, 96, 76, 96, 18, 22, true, "would like to extend", "would like to extend"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 4420603704750285605, 14167669410101881458, 18446744073709551615, 18446744073709551615, 302, 321, 302, 321, 61, 65, true, "would be to extract", "would be to extract"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16290083057699948816, 15990868729997335654, 18446744073709551615, 18446744073709551615, 441, 462, 441, 462, 85, 89, true, "would like to improve", "would like to improve"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 17236050900252224747, 9854267715107878317, 18446744073709551615, 18446744073709551615, 551, 574, 551, 574, 104, 108, true, "can be greatly improved", "can be greatly improved"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8208641893359681869, 16932607482672372426, 18446744073709551615, 18446744073709551615, 614, 631, 614, 631, 114, 117, true, "use are optimised", "use are optimised"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 14637951881518043285, 16016201078485034145, 18446744073709551615, 18446744073709551615, 701, 709, 701, 709, 130, 132, true, "is shown", "is shown"], ["verb", "single-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 329104161828335551, 7191149074974692359, 18446744073709551615, 18446744073709551615, 152, 157, 152, 157, 31, 32, true, "image", "image"], ["verb", "single-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541486535, 12448443551126566363, 18446744073709551615, 18446744073709551615, 203, 205, 203, 205, 40, 41, true, "is", "is"], ["verb", "single-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8106397860663428876, 7464848062962649547, 18446744073709551615, 18446744073709551615, 526, 533, 526, 533, 100, 101, true, "believe", "believe"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16380809977974811061, 16065202910059383934, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 2, true, "In the", "In the"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541486538, 12448443553082805214, 18446744073709551615, 18446744073709551615, 46, 48, 46, 48, 10, 11, true, "in", "in"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485670, 12448449173148059932, 18446744073709551615, 18446744073709551615, 108, 110, 108, 110, 24, 25, true, "of", "of"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 389609625618037948, 7445535260585538379, 18446744073709551615, 18446744073709551615, 137, 141, 137, 141, 28, 29, true, "with", "with"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485670, 12448449173148052031, 18446744073709551615, 18446744073709551615, 184, 186, 184, 186, 36, 37, true, "of", "of"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485670, 12448449173148050529, 18446744073709551615, 18446744073709551615, 193, 195, 193, 195, 38, 39, true, "of", "of"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 12178341415895623120, 598445003466491402, 18446744073709551615, 18446744073709551615, 331, 334, 331, 334, 67, 68, true, "out", "out"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 14814148868025447689, 6811951436730744836, 18446744073709551615, 18446744073709551615, 335, 343, 335, 343, 68, 70, true, "of these", "of these"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485670, 12448449173147950427, 18446744073709551615, 18446744073709551615, 360, 362, 360, 362, 72, 73, true, "of", "of"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8106398472718381934, 9575911640413642094, 18446744073709551615, 18446744073709551615, 370, 377, 370, 377, 74, 76, true, "after a", "after a"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8106477988572616406, 15264315134668474563, 18446744073709551615, 18446744073709551615, 404, 411, 404, 411, 78, 80, true, "with an", "with an"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485670, 12448449173148007931, 18446744073709551615, 18446744073709551615, 491, 493, 491, 493, 93, 94, true, "of", "of"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 14634130761162415388, 3833651190149238108, 18446744073709551615, 18446744073709551615, 534, 542, 534, 542, 101, 103, true, "that the", "that the"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 6168057894310307081, 11769172586530017585, 18446744073709551615, 18446744073709551615, 575, 584, 575, 584, 108, 110, true, "since the", "since the"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 12178341415895625940, 598444982766319560, 18446744073709551615, 18446744073709551615, 632, 635, 632, 635, 117, 118, true, "for", "for"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485670, 12448449173147963145, 18446744073709551615, 18446744073709551615, 672, 674, 672, 674, 124, 125, true, "of", "of"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541487053, 12448443593105791703, 18446744073709551615, 18446744073709551615, 698, 700, 698, 700, 129, 130, true, "as", "as"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541486538, 12448443553082893684, 18446744073709551615, 18446744073709551615, 710, 712, 710, 712, 132, 133, true, "in", "in"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485865, 12448449225007565696, 18446744073709551615, 18446744073709551615, 23, 25, 23, 25, 6, 7, true, "to", "to"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485865, 12448449225007823792, 18446744073709551615, 18446744073709551615, 87, 89, 87, 89, 20, 21, true, "to", "to"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485865, 12448449225007818811, 18446744073709551615, 18446744073709551615, 149, 151, 149, 151, 30, 31, true, "to", "to"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485865, 12448449224998732616, 18446744073709551615, 18446744073709551615, 311, 313, 311, 313, 63, 64, true, "to", "to"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485865, 12448449224998740693, 18446744073709551615, 18446744073709551615, 452, 454, 452, 454, 87, 88, true, "to", "to"], ["expression", "word-concatenation", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 17249225789261661029, 3807297211102715149, 18446744073709551615, 18446744073709551615, 12, 28, 12, 28, 1, 2, true, "data-parallelism", "data-parallelism"], ["expression", "word-concatenation", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 8685358683472264781, 17027290145523372529, 18446744073709551615, 18446744073709551615, 87, 105, 87, 105, 12, 13, true, "user-customisation", "user-customisation"], ["sentence", "", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 10588183979877639592, 1367000647117206524, 18446744073709551615, 18446744073709551615, 12, 119, 12, 119, 1, 15, true, "data-parallelism in order to speed up the training and provide interactive user-customisation capabilities.", "data-parallelism in order to speed up the training and provide interactive user-customisation capabilities."], ["term", "single-term", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 9998261106336570604, 4856078764969945002, 18446744073709551615, 18446744073709551615, 75, 118, 75, 118, 11, 14, true, "interactive user-customisation capabilities", "interactive user-customisation capabilities"], ["term", "single-term", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 17249225789261661029, 3807297211102715149, 18446744073709551615, 18446744073709551615, 12, 28, 12, 28, 1, 2, true, "data-parallelism", "data-parallelism"], ["term", "single-term", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 329104161571401725, 3792502362005124423, 18446744073709551615, 18446744073709551615, 32, 37, 32, 37, 3, 4, true, "order", "order"], ["term", "single-term", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 14634153919632515335, 3840780376526095372, 18446744073709551615, 18446744073709551615, 54, 62, 54, 62, 8, 9, true, "training", "training"], ["verb", "single-verb", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 329104161639049345, 3799043945253257651, 18446744073709551615, 18446744073709551615, 41, 46, 41, 46, 5, 6, true, "speed", "speed"], ["verb", "single-verb", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 8106476000214061408, 9620881782228868220, 18446744073709551615, 18446744073709551615, 67, 74, 67, 74, 10, 11, true, "provide", "provide"], ["conn", "single-conn", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 15441160910541486538, 14667436044722575629, 18446744073709551615, 18446744073709551615, 29, 31, 29, 31, 2, 3, true, "in", "in"], ["conn", "single-conn", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 15441160910541485865, 14667435948507858038, 18446744073709551615, 18446744073709551615, 38, 40, 38, 40, 4, 5, true, "to", "to"], ["sentence", "", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 5556222901900980902, 15746519596852768008, 18446744073709551615, 18446744073709551615, 0, 127, 0, 127, 0, 22, true, "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system.", "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system."], ["term", "enum-term-mark-4", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 13556182311682325280, 9761797471225359212, 18446744073709551615, 18446744073709551615, 32, 66, 32, 66, 6, 11, true, "Roxana Istrate and Matthieu Mottet", "Roxana Istrate and Matthieu Mottet"], ["term", "single-term", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 7949755686502200390, 668350583233417234, 18446744073709551615, 18446744073709551615, 32, 46, 32, 46, 6, 8, true, "Roxana Istrate", "Roxana Istrate"], ["term", "single-term", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 422584487912656734, 11698320462170095527, 18446744073709551615, 18446744073709551615, 51, 66, 51, 66, 9, 11, true, "Matthieu Mottet", "Matthieu Mottet"], ["term", "single-term", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 244635901031456436, 9625433519480199700, 18446744073709551615, 18446744073709551615, 116, 126, 116, 126, 19, 21, true, "CCS system", "CCS system"], ["term", "single-term", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 8106397759446161562, 5642353918280438479, 18446744073709551615, 18446744073709551615, 4, 11, 4, 11, 1, 2, true, "authors", "authors"], ["term", "single-term", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 4603153860084293890, 9630773090599505701, 18446744073709551615, 18446744073709551615, 77, 89, 77, 89, 13, 14, true, "contribution", "contribution"], ["term", "single-term", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 1525875096007260836, 16905177906202921866, 18446744073709551615, 18446744073709551615, 97, 108, 97, 108, 16, 17, true, "development", "development"], ["verb", "compound-verb", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 17737636287413194494, 18246863768738587194, 18446744073709551615, 18446744073709551615, 12, 31, 12, 31, 2, 6, true, "would like to thank", "would like to thank"], ["conn", "single-conn", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 12178341415895625940, 9042585404458343529, 18446744073709551615, 18446744073709551615, 67, 70, 67, 70, 11, 12, true, "for", "for"], ["conn", "single-conn", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 16381206565712212855, 15703671923459609107, 18446744073709551615, 18446744073709551615, 109, 115, 109, 115, 17, 19, true, "of the", "of the"], ["conn", "single-conn", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 15441160910541485865, 16366793807298640842, 18446744073709551615, 18446744073709551615, 23, 25, 23, 25, 4, 5, true, "to", "to"], ["conn", "single-conn", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 16381206519425733256, 17710263008813390102, 18446744073709551615, 18446744073709551615, 90, 96, 90, 96, 14, 16, true, "to the", "to the"], ["numval", "year", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 389609625548777262, 15175051322594687321, 18446744073709551615, 18446744073709551615, 175, 179, 175, 179, 39, 40, true, "2020", "2020"], ["numval", "ival", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 16380810010060182105, 2087358970220343258, 18446744073709551615, 18446744073709551615, 232, 238, 232, 238, 47, 48, true, "721027", "721027"], ["link", "url", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 4558951843677957919, 6153426188298487244, 18446744073709551615, 18446744073709551615, 44, 62, 44, 62, 9, 16, true, "http://nccr-marvel", "http://nccr-marvel"], ["link", "url", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 1840514147198720564, 15805877038624594621, 18446744073709551615, 18446744073709551615, 240, 267, 240, 267, 49, 60, true, "http://the-force-project.eu", "http://the-force-project.eu"], ["parenthesis", "round brackets", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 9725913318321680311, 4961995203860234930, 18446744073709551615, 18446744073709551615, 43, 67, 43, 67, 8, 19, true, "(http://nccr-marvel. ch)", "(http://nccr-marvel. ch)"], ["parenthesis", "round brackets", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 2988796312331131177, 6687402703764012002, 18446744073709551615, 18446744073709551615, 239, 268, 239, 268, 48, 61, true, "(http://the-force-project.eu)", "(http://the-force-project.eu)"], ["expression", "wtoken-concatenation", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 10991632387650324970, 16837241231127303249, 18446744073709551615, 18446744073709551615, 186, 198, 186, 198, 41, 42, true, "NMBP-23-2016", "NMBP-23-2016"], ["sentence", "", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 9490709138959189212, 12250691288144973169, 18446744073709551615, 18446744073709551615, 0, 117, 0, 117, 0, 28, true, "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation.", "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation."], ["sentence", "", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 1102470314652222820, 15601071363037323756, 18446744073709551615, 18446744073709551615, 118, 269, 118, 269, 28, 62, true, "MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", "MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu)."], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 9107120802959375325, 14854418713978759874, 18446744073709551615, 18446744073709551615, 31, 42, 31, 42, 6, 8, true, "NCCR MARVEL", "NCCR MARVEL"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 4312908239263712749, 12629609910975902459, 18446744073709551615, 18446744073709551615, 83, 116, 83, 116, 23, 27, true, "Swiss National Science Foundation", "Swiss National Science Foundation"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 15770732106686559794, 7781941783302435281, 18446744073709551615, 18446744073709551615, 142, 155, 142, 155, 33, 35, true, "FORCE project", "FORCE project"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 4392060515500483083, 8866459764729165903, 18446744073709551615, 18446744073709551615, 209, 231, 209, 231, 44, 47, true, "Grant agreement number", "Grant agreement number"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 389609625633592023, 15303732731624508399, 18446744073709551615, 18446744073709551615, 5, 9, 5, 9, 1, 2, true, "work", "work"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 15441160910541486943, 7829630847478764393, 18446744073709551615, 18446744073709551615, 64, 66, 64, 66, 17, 18, true, "ch", "ch"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 15441160910541480587, 7830721483973022036, 18446744073709551615, 18446744073709551615, 118, 120, 118, 120, 28, 29, true, "MD", "MD"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 8106351288219429194, 18213714777089539961, 18446744073709551615, 18446744073709551615, 167, 174, 167, 174, 38, 39, true, "Horizon", "Horizon"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 10991632387650324970, 16837241231127303249, 18446744073709551615, 18446744073709551615, 186, 198, 186, 198, 41, 42, true, "NMBP-23-2016", "NMBP-23-2016"], ["verb", "compound-verb", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 13041846394845825316, 5320956231753433918, 18446744073709551615, 18446744073709551615, 10, 23, 10, 23, 2, 4, true, "was supported", "was supported"], ["verb", "compound-verb", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 13041846394845825316, 5320956231753459128, 18446744073709551615, 18446744073709551615, 121, 134, 121, 134, 29, 31, true, "was supported", "was supported"], ["verb", "single-verb", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 16381206565272093797, 5039615147538801699, 18446744073709551615, 18446744073709551615, 69, 75, 69, 75, 20, 21, true, "funded", "funded"], ["verb", "single-verb", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 16381206565272093797, 5039615147538790981, 18446744073709551615, 18446744073709551615, 157, 163, 157, 163, 36, 37, true, "funded", "funded"], ["verb", "single-verb", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 389609625695109591, 15313901038780033729, 18446744073709551615, 18446744073709551615, 199, 203, 199, 203, 42, 43, true, "call", "call"], ["conn", "single-conn", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 16381206574363061705, 5224141779864768374, 18446744073709551615, 18446744073709551615, 24, 30, 24, 30, 4, 6, true, "by the", "by the"], ["conn", "single-conn", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 16381206574363061705, 5224141779864730347, 18446744073709551615, 18446744073709551615, 76, 82, 76, 82, 21, 23, true, "by the", "by the"], ["conn", "single-conn", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 16381206574363061705, 5224141779864726325, 18446744073709551615, 18446744073709551615, 135, 141, 135, 141, 31, 33, true, "by the", "by the"], ["conn", "single-conn", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 15441160910541486989, 7829629584886114826, 18446744073709551615, 18446744073709551615, 164, 166, 164, 166, 37, 38, true, "by", "by"], ["conn", "single-conn", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 329104159159151530, 12430892283433612669, 18446744073709551615, 18446744073709551615, 180, 185, 180, 185, 40, 41, true, "under", "under"], ["conn", "single-conn", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 389609625618037948, 15311592167218177731, 18446744073709551615, 18446744073709551615, 204, 208, 204, 208, 43, 44, true, "with", "with"], ["reference", "author", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 10921193442290853772, 7808176325166967948, 18446744073709551615, 18446744073709551615, 4, 21, 4, 21, 1, 4, true, "A. Antonacopoulos", "A. Antonacopoulos"], ["reference", "author", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 5181382481262336037, 5307751930075227018, 18446744073709551615, 18446744073709551615, 23, 34, 23, 34, 5, 8, true, "C. Clausner", "C. Clausner"], ["reference", "author", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 18410882341323932977, 3950678732393374894, 18446744073709551615, 18446744073709551615, 36, 51, 36, 51, 9, 12, true, "C. Papadopoulos", "C. Papadopoulos"], ["reference", "author", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 6326253284428776844, 2242368337149903292, 18446744073709551615, 18446744073709551615, 57, 73, 57, 73, 14, 18, true, "S. Pletschacher.", "S. Pletschacher."], ["reference", "citation-number", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 12178341415895551530, 18332345913337968356, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[1]", "[1]"], ["reference", "container-title", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 2527079864200222812, 474810476780653321, 18446744073709551615, 18446744073709551615, 161, 249, 161, 249, 30, 42, true, "In Proceedings of the 13th International Conference on Document Analysis and Recognition", "In Proceedings of the 13th International Conference on Document Analysis and Recognition"], ["reference", "container-title", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 6558131902220562236, 4761966619744782752, 18446744073709551615, 18446744073709551615, 251, 260, 251, 260, 43, 45, true, "ICDAR2015", "ICDAR2015"], ["reference", "date", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 389609625548777059, 4138332198474599496, 18446744073709551615, 18446744073709551615, 74, 78, 74, 78, 18, 19, true, "2015", "2015"], ["reference", "date", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 10303630957638511768, 3815340683710445282, 18446744073709551615, 18446744073709551615, 270, 279, 270, 279, 49, 50, true, "1151-1155", "1151-1155"], ["reference", "location", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 329104162200796337, 14591806354842233425, 18446744073709551615, 18446744073709551615, 263, 268, 263, 268, 47, 48, true, "Nancy", "Nancy"], ["reference", "title", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 17804212744220731295, 13329383501201933373, 18446744073709551615, 18446744073709551615, 80, 159, 80, 159, 20, 29, true, "ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015", "ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015"], ["numval", "year", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 389609625548757414, 14515784463162085628, 18446744073709551615, 18446744073709551615, 17, 21, 17, 21, 4, 5, true, "2001", "2001"], ["numval", "year", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 389609625548757414, 14515784463162082595, 18446744073709551615, 18446744073709551615, 70, 74, 70, 74, 17, 18, true, "2001", "2001"], ["numval", "fval", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 8104408072666159999, 6544755582293006081, 18446744073709551615, 18446744073709551615, 99, 106, 99, 106, 30, 31, true, "10.1023", "10.1023"], ["numval", "irng", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 389609625655395305, 14454171207833729215, 18446744073709551615, 18446744073709551615, 77, 81, 77, 81, 20, 21, true, "5-32", "5-32"], ["numval", "ival", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 15441160910541486271, 2222475241750256418, 18446744073709551615, 18446744073709551615, 56, 58, 56, 58, 11, 12, true, "45", "45"], ["numval", "ival", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 17767354399704235161, 1208869658482268274, 18446744073709551615, 18446744073709551615, 60, 61, 60, 61, 13, 14, true, "1", "1"], ["numval", "ival", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 15441160910541481918, 2222473849477022423, 18446744073709551615, 18446744073709551615, 63, 65, 63, 65, 15, 16, true, "01", "01"], ["numval", "ival", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 8238939589069097802, 7985792528150377935, 18446744073709551615, 18446744073709551615, 109, 122, 109, 122, 34, 35, true, "1010933404324", "1010933404324"], ["link", "url", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 1225079762841478321, 13531790532415888950, 18446744073709551615, 18446744073709551615, 83, 122, 83, 122, 22, 35, true, "https://doi.org/10.1023/A:1010933404324", "https://doi.org/10.1023/A:1010933404324"], ["link", "doi", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 3684595426122890679, 8279367666964033467, 18446744073709551615, 18446744073709551615, 91, 122, 91, 122, 26, 35, true, "doi.org/10.1023/A:1010933404324", "doi.org/10.1023/A:1010933404324"], ["parenthesis", "reference", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 12178341415895551595, 12282095972636501808, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[2]", "[2]"], ["parenthesis", "round brackets", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 1665356215973274173, 6648617910504136340, 18446744073709551615, 18446744073709551615, 62, 75, 62, 75, 14, 19, true, "(01 Oct 2001)", "(01 Oct 2001)"], ["expression", "wtoken-concatenation", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 12178341415895551595, 12282095972636501808, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[2]", "[2]"], ["sentence", "", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 15712069677801245586, 15474412409802070037, 18446744073709551615, 18446744073709551615, 0, 16, 0, 16, 0, 4, true, "[2] Leo Breiman.", "[2] Leo Breiman."], ["sentence", "", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 12717495542064580601, 7141139343320249805, 18446744073709551615, 18446744073709551615, 23, 38, 23, 38, 6, 9, true, "Random Forests.", "Random Forests."], ["sentence", "", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 378110784683181940, 10684547640314883302, 18446744073709551615, 18446744073709551615, 39, 82, 39, 82, 9, 22, true, "Machine Learning 45, 1 (01 Oct 2001), 5-32.", "Machine Learning 45, 1 (01 Oct 2001), 5-32."], ["term", "single-term", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 4182884638369411954, 3549752055104827895, 18446744073709551615, 18446744073709551615, 4, 15, 4, 15, 1, 3, true, "Leo Breiman", "Leo Breiman"], ["term", "single-term", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 2109081024677782429, 14560503901773287747, 18446744073709551615, 18446744073709551615, 23, 37, 23, 37, 6, 8, true, "Random Forests", "Random Forests"], ["term", "single-term", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 13278563109182224937, 9894237306486099503, 18446744073709551615, 18446744073709551615, 39, 55, 39, 55, 9, 11, true, "Machine Learning", "Machine Learning"], ["term", "single-term", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 12178341415896271308, 12282147181195563083, 18446744073709551615, 18446744073709551615, 66, 69, 66, 69, 16, 17, true, "Oct", "Oct"], ["reference", "author", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 1401374873664364883, 11647727014815681179, 18446744073709551615, 18446744073709551615, 4, 14, 4, 14, 1, 4, true, "R. Cattoni", "R. Cattoni"], ["reference", "author", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 8489759580118410179, 13292301803598722609, 18446744073709551615, 18446744073709551615, 16, 26, 16, 26, 5, 8, true, "T. Coianiz", "T. Coianiz"], ["reference", "author", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 6842824740074268202, 13861579202330443089, 18446744073709551615, 18446744073709551615, 28, 40, 28, 40, 9, 12, true, "S. Messelodi", "S. Messelodi"], ["reference", "author", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 3186691256225071720, 5893020180892593571, 18446744073709551615, 18446744073709551615, 46, 59, 46, 59, 14, 20, true, "C. M. Modena.", "C. M. Modena."], ["reference", "citation-number", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 12178341415895577000, 12922636114896239788, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[3]", "[3]"], ["reference", "date", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 389609625536085742, 14383425253514843049, 18446744073709551615, 18446744073709551615, 60, 64, 60, 64, 20, 21, true, "1998", "1998"], ["reference", "title", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 10272469742902868819, 13721964765306049914, 18446744073709551615, 18446744073709551615, 66, 145, 66, 145, 22, 33, true, "Geometric layout analysis techniques for document image understanding: a review", "Geometric layout analysis techniques for document image understanding: a review"], ["numval", "year", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 389609625548757410, 11746200903899729970, 18446744073709551615, 18446744073709551615, 132, 136, 129, 133, 27, 28, true, "2005", "2005"], ["numval", "fval", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 8104408072666212330, 4511393581502851323, 18446744073709551615, 18446744073709551615, 264, 271, 261, 268, 57, 58, true, "10.1007", "10.1007"], ["numval", "irng", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 16380810033755625172, 7099395661617449598, 18446744073709551615, 18446744073709551615, 240, 246, 237, 243, 47, 48, true, "92-103", "92-103"], ["numval", "ival", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 14654384880687695893, 11510483770024350538, 18446744073709551615, 18446744073709551615, 272, 280, 269, 277, 59, 60, true, "11551362", "11551362"], ["numval", "ival", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 17767354399704235153, 16958274266350069298, 18446744073709551615, 18446744073709551615, 281, 282, 278, 279, 61, 62, true, "9", "9"], ["link", "url", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 9115058383761225167, 648438667166468655, 18446744073709551615, 18446744073709551615, 248, 282, 245, 279, 49, 62, true, "https://doi.org/10.1007/11551362_9", "https://doi.org/10.1007/11551362_9"], ["link", "doi", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 5111704410202581687, 8009806043582861918, 18446744073709551615, 18446744073709551615, 256, 282, 253, 279, 53, 62, true, "doi.org/10.1007/11551362_9", "doi.org/10.1007/11551362_9"], ["name", "name-concatenation", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 1570415358681803520, 1677517284588620982, 18446744073709551615, 18446744073709551615, 4, 15, 4, 15, 1, 4, true, "Jean-Pierre", "Jean-Pierre"], ["name", "name-concatenation", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 14652280738021138433, 16881362210242463731, 18446744073709551615, 18446744073709551615, 114, 122, 111, 119, 22, 25, true, "Jean-Luc", "Jean-Luc"], ["parenthesis", "reference", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 12178341415895577065, 17281225859930936863, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[4]", "[4]"], ["expression", "wtoken-concatenation", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 12178341415895577065, 17281225859930936863, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[4]", "[4]"], ["sentence", "", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 1454337940209217576, 15847786010983528714, 18446744073709551615, 18446744073709551615, 0, 131, 0, 128, 0, 27, true, "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier.", "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier."], ["sentence", "", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 980690805708056428, 10837072452839845549, 18446744073709551615, 18446744073709551615, 138, 191, 135, 188, 29, 39, true, "From Legacy Documents to XML: A Conversion Framework.", "From Legacy Documents to XML: A Conversion Framework."], ["sentence", "", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 9562882546299798530, 8479999919787955978, 18446744073709551615, 18446744073709551615, 192, 247, 189, 244, 39, 49, true, "Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103.", "Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103."], ["term", "enum-term-mark-4", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 17602133858301996024, 15696295076363719071, 18446744073709551615, 18446744073709551615, 9, 118, 9, 115, 3, 23, true, "Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean", "Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean"], ["term", "single-term", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 10319721072853010428, 6489882772390524296, 18446744073709551615, 18446744073709551615, 9, 22, 9, 22, 3, 5, true, "Pierre Chanod", "Pierre Chanod"], ["term", "single-term", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 7554933550167443736, 13411551703313480687, 18446744073709551615, 18446744073709551615, 24, 41, 24, 41, 6, 8, true, "Boris Chidlovskii", "Boris Chidlovskii"], ["term", "single-term", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 16299981998052668228, 10120159009512117499, 18446744073709551615, 18446744073709551615, 43, 56, 43, 55, 9, 11, true, "Herv\u00e9 Dejean", "Herv\u00e9 Dejean"], ["term", "single-term", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 12186041413076963653, 1815357622671572381, 18446744073709551615, 18446744073709551615, 58, 72, 57, 71, 12, 14, true, "Olivier Fambon", "Olivier Fambon"], ["term", "single-term", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 10757542349073996342, 681372576460736923, 18446744073709551615, 18446744073709551615, 74, 91, 73, 88, 15, 17, true, "J\u00e9r\u00f4me Fuselier", "J\u00e9r\u00f4me Fuselier"], ["term", "single-term", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 17756104824925179897, 12319066590629211102, 18446744073709551615, 18446744073709551615, 93, 108, 90, 105, 18, 20, true, "Thierry Jacquin", "Thierry Jacquin"], ["term", "single-term", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 5537218577218077560, 2866017161052533450, 18446744073709551615, 18446744073709551615, 119, 130, 116, 127, 24, 26, true, "Luc Meunier", "Luc Meunier"], ["term", "single-term", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 4381219347563518937, 4225593426044066727, 18446744073709551615, 18446744073709551615, 143, 159, 140, 156, 30, 32, true, "Legacy Documents", "Legacy Documents"], ["term", "single-term", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 15039437164843108785, 909619298391709716, 18446744073709551615, 18446744073709551615, 170, 190, 167, 187, 36, 38, true, "Conversion Framework", "Conversion Framework"], ["term", "single-term", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 14297842595136370149, 6710149947699048907, 18446744073709551615, 18446744073709551615, 192, 218, 189, 215, 39, 42, true, "Springer Berlin Heidelberg", "Springer Berlin Heidelberg"], ["term", "single-term", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 16381206535680833456, 11649598247391704340, 18446744073709551615, 18446744073709551615, 220, 226, 217, 223, 43, 44, true, "Berlin", "Berlin"], ["term", "single-term", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 4638979131570902619, 4604157803470259425, 18446744073709551615, 18446744073709551615, 228, 238, 225, 235, 45, 46, true, "Heidelberg", "Heidelberg"], ["verb", "single-verb", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 12178341415895541463, 17281175695706441462, 18446744073709551615, 18446744073709551615, 163, 166, 160, 163, 33, 34, true, "XML", "XML"], ["conn", "single-conn", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 389609625538216073, 11746398078428097470, 18446744073709551615, 18446744073709551615, 138, 142, 135, 139, 29, 30, true, "From", "From"], ["conn", "single-conn", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 15441160910541485865, 2839021695369005356, 18446744073709551615, 18446744073709551615, 160, 162, 157, 159, 32, 33, true, "to", "to"], ["numval", "year", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 389609625548777059, 1587769393776818040, 18446744073709551615, 18446744073709551615, 19, 23, 19, 23, 4, 5, true, "2015", "2015"], ["numval", "year", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 389609625548777059, 1587769393776763538, 18446744073709551615, 18446744073709551615, 59, 63, 59, 63, 13, 14, true, "2015", "2015"], ["numval", "year", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 389609625548777059, 1587769393776757579, 18446744073709551615, 18446744073709551615, 216, 220, 216, 220, 53, 54, true, "2015", "2015"], ["numval", "fval", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 8104408072666216409, 14220288417264645869, 18446744073709551615, 18446744073709551615, 203, 210, 203, 210, 49, 50, true, "10.1109", "10.1109"], ["numval", "irng", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 10303975503395430788, 13846363068497305469, 18446744073709551615, 18446744073709551615, 176, 185, 176, 185, 39, 40, true, "1440-1448", "1440-1448"], ["numval", "ival", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 15441160910541481979, 3495651879263029623, 18446744073709551615, 18446744073709551615, 127, 129, 127, 129, 26, 27, true, "15", "15"], ["numval", "ival", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 12178341415896420726, 15205590251298949236, 18446744073709551615, 18446744073709551615, 221, 224, 221, 224, 55, 56, true, "169", "169"], ["link", "url", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 8704287819835955947, 1152182854074722114, 18446744073709551615, 18446744073709551615, 187, 224, 187, 224, 41, 56, true, "https://doi.org/10.1109/ICCV.2015.169", "https://doi.org/10.1109/ICCV.2015.169"], ["link", "doi", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 16500190859490903724, 11881156334101563754, 18446744073709551615, 18446744073709551615, 195, 224, 195, 224, 45, 56, true, "doi.org/10.1109/ICCV.2015.169", "doi.org/10.1109/ICCV.2015.169"], ["parenthesis", "reference", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 12178341415895577901, 15205622006266309913, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[5]", "[5]"], ["parenthesis", "round brackets", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 16380808314424790428, 13641327031679638352, 18446744073709551615, 18446744073709551615, 113, 119, 113, 119, 20, 23, true, "(ICCV)", "(ICCV)"], ["parenthesis", "round brackets", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 7096417115544771815, 11395153180835677355, 18446744073709551615, 18446744073709551615, 120, 130, 120, 130, 23, 28, true, "(ICCV '15)", "(ICCV '15)"], ["expression", "word-concatenation", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 329104162326555074, 11875090144383732350, 18446744073709551615, 18446744073709551615, 30, 35, 30, 35, 7, 8, true, "R-CNN", "R-CNN"], ["expression", "wtoken-concatenation", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 12178341415895577901, 15205622006266309913, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[5]", "[5]"], ["sentence", "", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 9476456823999413175, 1770001191116537879, 18446744073709551615, 18446744073709551615, 0, 18, 0, 18, 0, 4, true, "[5] Ross Girshick.", "[5] Ross Girshick."], ["sentence", "", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 2498161724771347283, 9048804449047555840, 18446744073709551615, 18446744073709551615, 25, 36, 25, 36, 6, 9, true, "Fast R-CNN.", "Fast R-CNN."], ["sentence", "", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 15441257360238259822, 5601527666409867545, 18446744073709551615, 18446744073709551615, 37, 131, 37, 131, 9, 29, true, "In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15).", "In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15)."], ["sentence", "", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 1794451012634991315, 7573806955644456863, 18446744073709551615, 18446744073709551615, 132, 186, 132, 186, 29, 41, true, "IEEE Computer Society, Washington, DC, USA, 1440-1448.", "IEEE Computer Society, Washington, DC, USA, 1440-1448."], ["term", "single-term", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 13123599834782083842, 8538907007420179436, 18446744073709551615, 18446744073709551615, 4, 17, 4, 17, 1, 3, true, "Ross Girshick", "Ross Girshick"], ["term", "single-term", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 15491004285883184028, 17483261521377705764, 18446744073709551615, 18446744073709551615, 25, 35, 25, 35, 6, 8, true, "Fast R-CNN", "Fast R-CNN"], ["term", "single-term", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 10113513805742010945, 11458139500661842431, 18446744073709551615, 18446744073709551615, 64, 93, 64, 93, 14, 17, true, "IEEE International Conference", "IEEE International Conference"], ["term", "single-term", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 10222924410753703457, 248762853253947982, 18446744073709551615, 18446744073709551615, 97, 112, 97, 112, 18, 20, true, "Computer Vision", "Computer Vision"], ["term", "single-term", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 12763303431451614333, 16935168498405799510, 18446744073709551615, 18446744073709551615, 132, 153, 132, 153, 29, 32, true, "IEEE Computer Society", "IEEE Computer Society"], ["term", "single-term", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 17329186159823478547, 16920991383284353179, 18446744073709551615, 18446744073709551615, 40, 51, 40, 51, 10, 11, true, "Proceedings", "Proceedings"], ["term", "single-term", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 389609625537760670, 1654267914364558446, 18446744073709551615, 18446744073709551615, 114, 118, 114, 118, 21, 22, true, "ICCV", "ICCV"], ["term", "single-term", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 389609625537760670, 1654267914364557852, 18446744073709551615, 18446744073709551615, 121, 125, 121, 125, 24, 25, true, "ICCV", "ICCV"], ["term", "single-term", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 5589693159453375122, 13837519084974782204, 18446744073709551615, 18446744073709551615, 155, 165, 155, 165, 33, 34, true, "Washington", "Washington"], ["term", "single-term", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 15441160910541480769, 3495651894226244878, 18446744073709551615, 18446744073709551615, 167, 169, 167, 169, 35, 36, true, "DC", "DC"], ["term", "single-term", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 12178341415895650394, 15205628192038338337, 18446744073709551615, 18446744073709551615, 171, 174, 171, 174, 37, 38, true, "USA", "USA"], ["conn", "single-conn", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 15441160910541480354, 3495651908653608597, 18446744073709551615, 18446744073709551615, 37, 39, 37, 39, 9, 10, true, "In", "In"], ["conn", "single-conn", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 16381206565712212855, 2695983015266329611, 18446744073709551615, 18446744073709551615, 52, 58, 52, 58, 11, 13, true, "of the", "of the"], ["conn", "single-conn", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 15441160910541485678, 3495646783295715187, 18446744073709551615, 18446744073709551615, 94, 96, 94, 96, 17, 18, true, "on", "on"], ["reference", "author", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 141995704861070506, 4358412458884164235, 18446744073709551615, 18446744073709551615, 4, 20, 4, 20, 1, 5, true, "Ross B. Girshick", "Ross B. Girshick"], ["reference", "author", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 16700235966000105766, 16857612526578801697, 18446744073709551615, 18446744073709551615, 22, 34, 22, 34, 6, 8, true, "Jeff Donahue", "Jeff Donahue"], ["reference", "author", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 3125822382074464058, 13386372949081827875, 18446744073709551615, 18446744073709551615, 36, 50, 36, 50, 9, 11, true, "Trevor Darrell", "Trevor Darrell"], ["reference", "author", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 10076860098015848351, 1698280748488935181, 18446744073709551615, 18446744073709551615, 56, 71, 56, 71, 13, 16, true, "Jitendra Malik.", "Jitendra Malik."], ["reference", "citation-number", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 12178341415895577964, 1023751500620290990, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[6]", "[6]"], ["reference", "date", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 389609625548777061, 894814354396885943, 18446744073709551615, 18446744073709551615, 72, 76, 72, 76, 16, 17, true, "2013", "2013"], ["reference", "date", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 389609625548777061, 894814354396890826, 18446744073709551615, 18446744073709551615, 180, 184, 180, 184, 32, 33, true, "2013", "2013"], ["reference", "journal", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 389609625536419383, 889446752040326567, 18446744073709551615, 18446744073709551615, 160, 164, 160, 164, 29, 30, true, "CoRR", "CoRR"], ["reference", "title", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 4208693923929480551, 3754197794849426338, 18446744073709551615, 18446744073709551615, 78, 158, 78, 158, 18, 28, true, "Rich feature hierarchies for accurate object detection and semantic segmentation", "Rich feature hierarchies for accurate object detection and semantic segmentation"], ["reference", "volume", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 3979843797462439752, 2449824314382216916, 18446744073709551615, 18446744073709551615, 165, 178, 165, 178, 30, 31, true, "abs/1311.2524", "abs/1311.2524"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 8106351942713029604, 15468997146309510455, 18446744073709551615, 18446744073709551615, 4, 11, 4, 11, 1, 3, true, "Wei Liu", "Wei Liu"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 7132768279271695, 1832821379686674159, 18446744073709551615, 18446744073709551615, 13, 30, 13, 30, 4, 6, true, "Dragomir Anguelov", "Dragomir Anguelov"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 12871845148221275510, 11451573001119547147, 18446744073709551615, 18446744073709551615, 32, 45, 32, 45, 7, 9, true, "Dumitru Erhan", "Dumitru Erhan"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 6963214204149412896, 11905902671968880924, 18446744073709551615, 18446744073709551615, 47, 64, 47, 64, 10, 12, true, "Christian Szegedy", "Christian Szegedy"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 1399468129531522089, 15637271748350955016, 18446744073709551615, 18446744073709551615, 66, 76, 66, 76, 13, 15, true, "Scott Reed", "Scott Reed"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 12712965187511148158, 5061563798042056469, 18446744073709551615, 18446744073709551615, 78, 91, 78, 91, 16, 20, true, "Cheng-Yang Fu", "Cheng-Yang Fu"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 3733048493609069913, 12058083979397468329, 18446744073709551615, 18446744073709551615, 97, 115, 97, 115, 22, 27, true, "Alexander C. Berg.", "Alexander C. Berg."], ["reference", "citation-number", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 12178341415895577775, 16834182135958034128, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[7]", "[7]"], ["reference", "date", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 389609625548777056, 12418382060406794776, 18446744073709551615, 18446744073709551615, 116, 120, 116, 120, 27, 28, true, "2016", "2016"], ["reference", "doi", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 3534146179424153776, 1525705277889903310, 18446744073709551615, 18446744073709551615, 206, 224, 206, 224, 44, 45, true, "https://doi.org/10", "https://doi.org/10"], ["reference", "doi", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 3493950482346635177, 14172820134834639105, 18446744073709551615, 18446744073709551615, 226, 250, 226, 250, 46, 54, true, "1007/978-3-319-46448-0_2", "1007/978-3-319-46448-0_2"], ["reference", "location", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 389609625536506042, 12420143175742824125, 18446744073709551615, 18446744073709551615, 193, 197, 193, 197, 40, 41, true, "Cham", "Cham"], ["reference", "pages", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 329104147696968014, 12309257817181187524, 18446744073709551615, 18446744073709551615, 199, 204, 199, 204, 42, 43, true, "21-37", "21-37"], ["reference", "publisher", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 18208766556701967117, 9345293319432700679, 18446744073709551615, 18446744073709551615, 158, 191, 158, 191, 36, 39, true, "Springer International Publishing", "Springer International Publishing"], ["reference", "title", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 10201684882899222639, 16463858842282873959, 18446744073709551615, 18446744073709551615, 122, 156, 122, 156, 29, 35, true, "SSD: Single Shot MultiBox Detector", "SSD: Single Shot MultiBox Detector"], ["reference", "author", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 5088659084289352829, 5811844525036759114, 18446744073709551615, 18446744073709551615, 4, 17, 4, 17, 1, 3, true, "Joseph Redmon", "Joseph Redmon"], ["reference", "author", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 417695209021750783, 13441950925666715191, 18446744073709551615, 18446744073709551615, 19, 40, 19, 40, 4, 7, true, "Santosh Kumar Divvala", "Santosh Kumar Divvala"], ["reference", "author", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 141995704861070506, 13286696794844996383, 18446744073709551615, 18446744073709551615, 42, 58, 42, 58, 8, 12, true, "Ross B. Girshick", "Ross B. Girshick"], ["reference", "author", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 16947174234018208722, 13965552924856577071, 18446744073709551615, 18446744073709551615, 64, 76, 64, 76, 14, 17, true, "Ali Farhadi.", "Ali Farhadi."], ["reference", "citation-number", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 12178341415895577838, 11018125289094672461, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[8]", "[8]"], ["reference", "container-title", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 17631274803144515959, 18105892991402137032, 18446744073709551615, 18446744073709551615, 140, 203, 140, 203, 32, 41, true, "2016 IEEE Conference on Computer Vision and Pattern Recognition", "2016 IEEE Conference on Computer Vision and Pattern Recognition"], ["reference", "container-title", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 389609625526699487, 17849764824838617245, 18446744073709551615, 18446744073709551615, 205, 209, 205, 209, 42, 43, true, "CVPR", "CVPR"], ["reference", "date", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 389609625548777056, 17837801987031958568, 18446744073709551615, 18446744073709551615, 77, 81, 77, 81, 17, 18, true, "2016", "2016"], ["reference", "date", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 389609625548777056, 17837801987031982734, 18446744073709551615, 18446744073709551615, 212, 216, 212, 216, 45, 46, true, "2016", "2016"], ["reference", "pages", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 8104408789271407267, 9641140559480270364, 18446744073709551615, 18446744073709551615, 219, 226, 219, 226, 48, 49, true, "779-788", "779-788"], ["reference", "title", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 5895818558987270699, 2974553673873283962, 18446744073709551615, 18446744073709551615, 83, 138, 83, 138, 19, 31, true, "You Only Look Once: Unified, Real-Time Object Detection", "You Only Look Once: Unified, Real-Time Object Detection"], ["reference", "author", 6623297047995432604, "TEXT", "#/texts/108", 1.0, 5088659084289352829, 16235259739729085297, 18446744073709551615, 18446744073709551615, 4, 17, 4, 17, 1, 3, true, "Joseph Redmon", "Joseph Redmon"], ["reference", "author", 6623297047995432604, "TEXT", "#/texts/108", 1.0, 16947174234018208722, 7021580680610188634, 18446744073709551615, 18446744073709551615, 22, 34, 22, 34, 4, 7, true, "Ali Farhadi.", "Ali Farhadi."], ["reference", "citation-number", 6623297047995432604, "TEXT", "#/texts/108", 1.0, 12178341415895577640, 5338477872773862060, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[9]", "[9]"], ["reference", "date", 6623297047995432604, "TEXT", "#/texts/108", 1.0, 389609625548777056, 2625243571990787508, 18446744073709551615, 18446744073709551615, 35, 39, 35, 39, 7, 8, true, "2016", "2016"], ["reference", "date", 6623297047995432604, "TEXT", "#/texts/108", 1.0, 389609625548777056, 2625243571990783197, 18446744073709551615, 18446744073709551615, 110, 114, 110, 114, 21, 22, true, "2016", "2016"], ["reference", "author", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 9337887504118347047, 4966377796769374289, 18446744073709551615, 18446744073709551615, 5, 17, 5, 17, 1, 3, true, "Shaoqing Ren", "Shaoqing Ren"], ["reference", "author", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 7339447509685488310, 1490181006860316744, 18446744073709551615, 18446744073709551615, 19, 29, 19, 29, 4, 6, true, "Kaiming He", "Kaiming He"], ["reference", "author", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 13123599834782083842, 7292467665049010344, 18446744073709551615, 18446744073709551615, 31, 44, 31, 44, 7, 9, true, "Ross Girshick", "Ross Girshick"], ["reference", "author", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 2904781337729160811, 16221483782846728585, 18446744073709551615, 18446744073709551615, 50, 59, 50, 59, 11, 14, true, "Jian Sun.", "Jian Sun."], ["reference", "citation-number", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 389609625697296215, 1913545593953328211, 18446744073709551615, 18446744073709551615, 0, 4, 0, 4, 0, 1, true, "[10]", "[10]"], ["reference", "container-title", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 17791264228691503041, 2574823334558986016, 18446744073709551615, 18446744073709551615, 146, 201, 146, 201, 30, 38, true, "In Advances in Neural Information Processing Systems 28", "In Advances in Neural Information Processing Systems 28"], ["reference", "date", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 389609625548777059, 1924763351573441882, 18446744073709551615, 18446744073709551615, 60, 64, 60, 64, 14, 15, true, "2015", "2015"], ["reference", "editor", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 358905225071115951, 17349611132874933119, 18446744073709551615, 18446744073709551615, 203, 268, 203, 268, 39, 63, true, "C. Cortes, N. D. Lawrence, D. D. Lee, M. Sugiyama, and R. Garnett", "C. Cortes, N. D. Lawrence, D. D. Lee, M. Sugiyama, and R. Garnett"], ["reference", "editor", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 12178341415896120351, 93479150678904308, 18446744073709551615, 18446744073709551615, 270, 273, 270, 273, 64, 65, true, "Eds", "Eds"], ["reference", "publisher", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 9270493059688133028, 15988999060375847661, 18446744073709551615, 18446744073709551615, 277, 294, 277, 294, 68, 70, true, "Curran Associates", "Curran Associates"], ["reference", "publisher", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 12178341415896263797, 93480841238860416, 18446744073709551615, 18446744073709551615, 296, 299, 296, 299, 71, 72, true, "Inc", "Inc"], ["reference", "title", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 695901516261617265, 14331097264748910677, 18446744073709551615, 18446744073709551615, 66, 144, 66, 144, 16, 29, true, "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks", "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"], ["reference", "url", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 3374974501831695503, 17450904193872703176, 18446744073709551615, 18446744073709551615, 309, 420, 309, 420, 76, 78, true, "http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks", "http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks"], ["reference", "url", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 12178341415895634440, 93706065194188109, 18446744073709551615, 18446744073709551615, 422, 425, 422, 425, 79, 80, true, "pdf", "pdf"], ["reference", "author", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 4686361850733567621, 5253767773577297512, 18446744073709551615, 18446744073709551615, 5, 20, 5, 20, 1, 5, true, "Peter W J Staar", "Peter W J Staar"], ["reference", "author", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 1571808557594152175, 1746337992895366641, 18446744073709551615, 18446744073709551615, 22, 35, 22, 35, 6, 8, true, "Michele Dolfi", "Michele Dolfi"], ["reference", "author", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 9737597816447750448, 2973540942666074124, 18446744073709551615, 18446744073709551615, 37, 51, 37, 51, 9, 11, true, "Christoph Auer", "Christoph Auer"], ["reference", "author", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 13732913329338511598, 166477832047526898, 18446744073709551615, 18446744073709551615, 57, 70, 57, 70, 13, 16, true, "Costas Bekas.", "Costas Bekas."], ["reference", "citation-number", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 389609625697296278, 16564150102059325413, 18446744073709551615, 18446744073709551615, 0, 4, 0, 4, 0, 1, true, "[11]", "[11]"], ["reference", "date", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 389609625548777054, 16555452686088781228, 18446744073709551615, 18446744073709551615, 71, 75, 71, 75, 16, 17, true, "2018", "2018"], ["reference", "title", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 16083247419427271197, 18033265608713009513, 18446744073709551615, 18446744073709551615, 77, 133, 77, 133, 18, 26, true, "Corpus Conversion Service poster at the SysML conference", "Corpus Conversion Service poster at the SysML conference"], ["reference", "url", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 18429963590603622561, 12432928173216692023, 18446744073709551615, 18446744073709551615, 135, 166, 135, 166, 27, 31, true, "http://www.sysml.cc/doc/ 76.pdf", "http://www.sysml.cc/doc/ 76.pdf"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 15441160910541481072, 14925187714232052101, 2, 1, 0, 2, 0, 2, 0, 2, true, "72", "72"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 17767354399704235156, 6061612085784771330, 2, 2, 0, 1, 0, 1, 0, 1, true, "4", "4"], ["numval", "fval", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 389609625535995626, 16087508952769745788, 2, 3, 0, 4, 0, 4, 0, 4, true, "0.97", "0.97"], ["numval", "fval", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 389609625535995627, 16087508952857503563, 2, 4, 0, 4, 0, 4, 0, 4, true, "0.98", "0.98"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 17767354399704235162, 6061612085904261025, 3, 0, 5, 6, 5, 6, 1, 2, true, "2", "2"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 17767354399704235153, 6061612080706226208, 3, 1, 0, 1, 0, 1, 0, 1, true, "9", "9"], ["numval", "fval", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 12178341415896431533, 7910815507560570273, 3, 2, 0, 3, 0, 3, 0, 3, true, "0.1", "0.1"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 17767354399704235160, 6061612085871184177, 3, 3, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 15441160910541481353, 14925187695918906548, 3, 3, 4, 6, 4, 6, 2, 4, true, "99", "99"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 17767354399704235160, 6061612085871196320, 3, 4, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 15441160910541481352, 14925187696052464601, 3, 4, 4, 6, 4, 6, 2, 4, true, "98", "98"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 389609625541547546, 16087857384187763916, 0, 1, 0, 4, 0, 4, 0, 1, true, "Time", "Time"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 14635106751859230946, 2707820963080644969, 0, 1, 8, 16, 8, 16, 2, 3, true, "solution", "solution"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 389609625541547546, 16087857384187759837, 0, 2, 0, 4, 0, 4, 0, 1, true, "Time", "Time"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 14635106751859230946, 2707820963080690456, 0, 2, 8, 16, 8, 16, 2, 3, true, "solution", "solution"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 2026722887841362187, 16381659398929700384, 0, 3, 0, 11, 0, 11, 0, 1, true, "Performance", "Performance"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 2026722887841362187, 16381659398929704944, 0, 4, 0, 11, 0, 11, 0, 1, true, "Performance", "Performance"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 1656308328846415489, 13536633304648648095, 1, 2, 0, 10, 0, 10, 0, 1, true, "Prediction", "Prediction"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 16381206531053803330, 16833791661508729184, 2, 0, 0, 6, 0, 6, 0, 1, true, "Faster", "Faster"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 389609625547525555, 16087961378783525633, 2, 0, 7, 11, 7, 11, 2, 3, true, "RCNN", "RCNN"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 329104161533766742, 6816206597211363397, 2, 1, 3, 8, 3, 8, 2, 3, true, "hours", "hours"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 12178341415895638619, 7911352193610305315, 2, 2, 2, 5, 2, 5, 1, 2, true, "sec", "sec"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 329104161538395681, 6804090298571621973, 3, 0, 0, 5, 0, 5, 0, 1, true, "YOLOv", "YOLOv"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 329104161533766742, 6816206597185260967, 3, 1, 2, 7, 2, 7, 1, 2, true, "hours", "hours"], ["term", "single-term", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 12178341415895638619, 7911352193599591381, 3, 2, 4, 7, 4, 7, 3, 4, true, "sec", "sec"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 15441160910541480975, 14063371777824517040, 2, 2, 0, 2, 0, 2, 0, 2, true, "75", "75"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100226782560, 2, 3, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100226770099, 2, 4, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100226790530, 2, 5, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100227143682, 2, 6, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100226706291, 2, 7, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235161, 15803300100445307688, 3, 1, 0, 1, 0, 1, 0, 1, true, "1", "1"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 12178341415896199541, 5837267533537259043, 3, 2, 0, 3, 0, 3, 0, 3, true, "670", "670"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100227552359, 3, 3, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100227564948, 3, 4, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100227560517, 3, 5, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100226887469, 3, 6, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100224577771, 4, 1, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100224140347, 4, 2, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 12178341415896434935, 5837266946220083063, 4, 3, 0, 3, 0, 3, 0, 3, true, "325", "325"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100224164889, 4, 4, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100224136680, 4, 5, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100224550808, 4, 6, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235161, 15803300100440618586, 5, 1, 0, 1, 0, 1, 0, 1, true, "1", "1"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 15441160910541481861, 14063371834761578936, 5, 2, 0, 2, 0, 2, 0, 2, true, "17", "17"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100334898942, 5, 3, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147816412516, 1837047046804924097, 5, 4, 0, 5, 0, 5, 0, 5, true, "56460", "56460"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 15441160910541481978, 14063371734014592858, 5, 5, 0, 2, 0, 2, 0, 2, true, "14", "14"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100334911438, 5, 6, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335666817, 6, 1, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335695504, 6, 2, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335691427, 6, 3, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235156, 15803300100423374160, 6, 4, 0, 1, 0, 1, 0, 1, true, "4", "4"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 389609625655502523, 1616926330272763134, 6, 5, 0, 4, 0, 4, 0, 4, true, "4223", "4223"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 15441160910541481788, 14063371729666955983, 6, 6, 0, 2, 0, 2, 0, 2, true, "26", "26"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335410790, 7, 1, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335431255, 7, 2, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335435332, 7, 3, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335423029, 7, 4, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235161, 15803300100417827613, 7, 5, 0, 1, 0, 1, 0, 1, true, "1", "1"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 389609625549028785, 1615012629921730407, 7, 6, 0, 4, 0, 4, 0, 4, true, "3418", "3418"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 12178341415896426714, 5837506952496953864, 8, 1, 0, 3, 0, 3, 0, 3, true, "100", "100"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147618004574, 1850939892712171199, 8, 2, 0, 5, 0, 5, 0, 5, true, "99.85", "99.85"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 12178341415896426714, 5837506952496995114, 8, 3, 0, 3, 0, 3, 0, 3, true, "100", "100"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147617972634, 1850962284269103299, 8, 4, 0, 5, 0, 5, 0, 5, true, "99.94", "99.94"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147617973201, 1850962300609404744, 8, 5, 0, 5, 0, 5, 0, 5, true, "99.24", "99.24"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147617972639, 1850962292247387325, 8, 6, 0, 5, 0, 5, 0, 5, true, "99.97", "99.97"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147618821186, 1850927305919343478, 9, 1, 0, 5, 0, 5, 0, 5, true, "97.40", "97.40"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147618821120, 1850927276933057753, 9, 2, 0, 5, 0, 5, 0, 5, true, "97.52", "97.52"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 12178341415896426714, 5837506952496145978, 9, 3, 0, 3, 0, 3, 0, 3, true, "100", "100"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147617972625, 1850962284440201056, 9, 4, 0, 5, 0, 5, 0, 5, true, "99.99", "99.99"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147617972438, 1850969319560412727, 9, 5, 0, 5, 0, 5, 0, 5, true, "99.64", "99.64"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147617973201, 1850962300608540590, 9, 6, 0, 5, 0, 5, 0, 5, true, "99.24", "99.24"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104161624445793, 421775788727950862, 0, 2, 10, 15, 10, 15, 1, 2, true, "label", "label"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104161624445793, 421775788727954943, 0, 3, 10, 15, 10, 15, 1, 2, true, "label", "label"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104161624445793, 421775788728023184, 0, 4, 10, 15, 10, 15, 1, 2, true, "label", "label"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104161624445793, 421775788727994369, 0, 5, 10, 15, 10, 15, 1, 2, true, "label", "label"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104161624445793, 421775788727998642, 0, 6, 10, 15, 10, 15, 1, 2, true, "label", "label"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104161624445793, 421775788728004387, 0, 7, 10, 15, 10, 15, 1, 2, true, "label", "label"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 16381206482755721438, 6650922822359027250, 1, 1, 0, 6, 0, 6, 0, 2, true, "T itle", "T itle"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 16381206562428603067, 4879747397715854867, 1, 3, 0, 6, 0, 6, 0, 1, true, "Author", "Author"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 14652314692921799233, 10657560030989347793, 1, 4, 0, 8, 0, 8, 0, 1, true, "Subtitle", "Subtitle"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 15441160910541487879, 14063371787734206200, 1, 5, 0, 2, 0, 2, 0, 1, true, "Te", "Te"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 8106352219904586209, 11031918618165802042, 1, 6, 0, 7, 0, 7, 0, 1, true, "Picture", "Picture"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 4682222921140874465, 7249830264234759527, 2, 0, 0, 10, 0, 10, 0, 2, true, "true label", "true label"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104161841334670, 340356054356328763, 2, 1, 0, 5, 0, 5, 0, 1, true, "Title", "Title"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 15894660811414869651, 783966399713762072, 3, 0, 0, 17, 0, 17, 0, 3, true, "true label Author", "true label Author"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 13831724478157455165, 14474441641223495436, 4, 0, 0, 19, 0, 19, 0, 3, true, "true label Subtitle", "true label Subtitle"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 1941183530690029532, 8884273177240528491, 5, 0, 0, 15, 0, 15, 0, 3, true, "true label Text", "true label Text"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 3368831387223592374, 12898366621225531538, 6, 0, 0, 18, 0, 18, 0, 3, true, "true label Picture", "true label Picture"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 6024510248149615245, 9009738277270760030, 7, 0, 0, 16, 0, 16, 0, 3, true, "true label Table", "true label Table"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 15894660815285786750, 1728790590792959237, 8, 0, 0, 17, 0, 17, 0, 3, true, "true label Recall", "true label Recall"], ["term", "single-term", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 16647754341621779036, 9808370296852837060, 9, 0, 0, 20, 0, 20, 0, 3, true, "true label Precision", "true label Precision"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541481352, 14633884986579423126, 1, 1, 0, 2, 0, 2, 0, 2, true, "98", "98"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541481358, 14633884986289176499, 1, 1, 5, 7, 5, 7, 3, 5, true, "96", "96"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541481353, 14633884986629840445, 1, 2, 0, 2, 0, 2, 0, 2, true, "99", "99"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541481394, 14633884986969604250, 1, 2, 5, 7, 5, 7, 3, 5, true, "83", "83"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541481353, 14633884986621746969, 2, 1, 0, 2, 0, 2, 0, 2, true, "99", "99"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541486270, 14633895233084857259, 2, 1, 5, 7, 5, 7, 3, 5, true, "46", "46"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541481353, 14633884986621702026, 2, 2, 0, 2, 0, 2, 0, 2, true, "99", "99"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541486209, 14633895297101973839, 2, 2, 5, 7, 5, 7, 3, 5, true, "58", "58"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "meta": [{"$ref": "#/footnotes/0"}, {"$ref": "#/footnotes/1"}, {"$ref": "#/footnotes/2"}, {"$ref": "#/footnotes/3"}, {"$ref": "#/footnotes/4"}, {"$ref": "#/footnotes/5"}, {"$ref": "#/figures/0/captions/0"}, {"$ref": "#/footnotes/6"}, {"$ref": "#/footnotes/7"}, {"$ref": "#/footnotes/8"}, {"$ref": "#/footnotes/9"}, {"$ref": "#/footnotes/10"}, {"$ref": "#/figures/2/captions/0"}, {"$ref": "#/figures/1/captions/0"}, {"$ref": "#/footnotes/11"}, {"$ref": "#/footnotes/12"}, {"$ref": "#/footnotes/13"}, {"$ref": "#/figures/3/captions/0"}, {"$ref": "#/figures/4/captions/0"}, {"$ref": "#/footnotes/14"}, {"$ref": "#/footnotes/15"}, {"$ref": "#/footnotes/16"}, {"$ref": "#/footnotes/17"}, {"$ref": "#/footnotes/18"}, {"$ref": "#/footnotes/19"}, {"$ref": "#/figures/6/captions/0"}, {"$ref": "#/footnotes/20"}, {"$ref": "#/figures/7/captions/0"}, {"$ref": "#/footnotes/21"}, {"$ref": "#/footnotes/22"}, {"$ref": "#/footnotes/23"}], "model-application": {"message": "success", "success": true}, "other": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}, {"height": 792.0, "page": 2, "width": 612.0}, {"height": 792.0, "page": 3, "width": 612.0}, {"height": 792.0, "page": 4, "width": 612.0}, {"height": 792.0, "page": 5, "width": 612.0}, {"height": 792.0, "page": 6, "width": 612.0}, {"height": 792.0, "page": 7, "width": 612.0}, {"height": 792.0, "page": 8, "width": 612.0}, {"height": 792.0, "page": 9, "width": 612.0}], "page-elements": [{"bbox": [18.340225219726562, 231.99996948242188, 36.339778900146484, 586.4000244140625], "dref": "#/texts/0", "name": "text", "orig-order": 19, "page": 1, "span": [0, 38], "text-order": 0, "type": "paragraph"}, {"bbox": [61.47460174560547, 672.0942993164062, 552.7999877929688, 708.4287719726562], "dref": "#/texts/1", "name": "title", "orig-order": 0, "page": 1, "span": [0, 84], "text-order": 1, "type": "title"}, {"bbox": [158.54901123046875, 646.95166015625, 454.4521484375, 657.9959716796875], "dref": "#/texts/2", "name": "text", "orig-order": 1, "page": 1, "span": [0, 60], "text-order": 2, "type": "paragraph"}, {"bbox": [179.6484832763672, 635.4270629882812, 433.13836669921875, 644.6961059570312], "dref": "#/texts/3", "name": "text", "orig-order": 2, "page": 1, "span": [0, 30], "text-order": 3, "type": "paragraph"}, {"bbox": [277.5870056152344, 623.4720458984375, 335.40997314453125, 632.3786010742188], "dref": "#/texts/4", "name": "text", "orig-order": 3, "page": 1, "span": [0, 12], "text-order": 4, "type": "paragraph"}, {"bbox": [255.3256378173828, 611.5160522460938, 357.6419982910156, 621.1870727539062], "dref": "#/texts/5", "name": "text", "orig-order": 4, "page": 1, "span": [0, 24], "text-order": 5, "type": "paragraph"}, {"bbox": [53.50812911987305, 592.31494140625, 112.67424011230469, 602.275634765625], "dref": "#/texts/6", "name": "subtitle-level-1", "orig-order": 5, "page": 1, "span": [0, 8], "text-order": 6, "type": "subtitle-level-1"}, {"bbox": [317.7327880859375, 592.2473754882812, 421.26416015625, 602.3604125976562], "dref": "#/texts/7", "name": "subtitle-level-1", "orig-order": 13, "page": 1, "span": [0, 14], "text-order": 7, "type": "subtitle-level-1"}, {"bbox": [53.474998474121094, 326.9052734375, 295.66064453125, 586.9752197265625], "dref": "#/texts/8", "name": "text", "orig-order": 6, "page": 1, "span": [0, 1554], "text-order": 8, "type": "paragraph"}, {"bbox": [53.51100158691406, 294.8792724609375, 138.14549255371094, 302.33953857421875], "dref": "#/texts/9", "name": "subtitle-level-1", "orig-order": 7, "page": 1, "span": [0, 21], "text-order": 9, "type": "subtitle-level-1"}, {"bbox": [53.20000076293945, 235.04745483398438, 295.4400329589844, 292.11370849609375], "dref": "#/texts/10", "name": "text", "orig-order": 8, "page": 1, "span": [0, 366], "text-order": 10, "type": "paragraph"}, {"bbox": [53.79800033569336, 121.27276611328125, 294.28240966796875, 176.01959228515625], "dref": "#/footnotes/0", "name": "footnote", "orig-order": 9, "page": 1, "span": [0, 585], "text-order": 11, "type": "footnote"}, {"bbox": [53.56800079345703, 112.3555908203125, 215.3354034423828, 118.82350158691406], "dref": "#/footnotes/1", "name": "footnote", "orig-order": 10, "page": 1, "span": [0, 53], "text-order": 12, "type": "footnote"}, {"bbox": [53.268001556396484, 94.71673583984375, 286.8135986328125, 110.1262435913086], "dref": "#/footnotes/2", "name": "footnote", "orig-order": 11, "page": 1, "span": [0, 124], "text-order": 13, "type": "footnote"}, {"bbox": [52.780723571777344, 87.53521728515625, 173.61199951171875, 94.18523406982422], "dref": "#/footnotes/3", "name": "footnote", "orig-order": 12, "page": 1, "span": [0, 39], "text-order": 14, "type": "footnote"}, {"bbox": [317.6319885253906, 337.24517822265625, 559.6874389648438, 586.986328125], "dref": "#/texts/11", "name": "text", "orig-order": 14, "page": 1, "span": [0, 1532], "text-order": 15, "type": "paragraph"}, {"bbox": [317.9549865722656, 183.756591796875, 559.7752075195312, 334.59222412109375], "dref": "#/texts/12", "name": "text", "orig-order": 15, "page": 1, "span": [0, 891], "text-order": 16, "type": "paragraph"}, {"bbox": [317.9549865722656, 150.97491455078125, 559.4527587890625, 181.16822814941406], "dref": "#/texts/13", "name": "text", "orig-order": 16, "page": 1, "span": [0, 200], "text-order": 17, "type": "paragraph"}, {"bbox": [317.54400634765625, 100.9158935546875, 559.1497192382812, 123.9642333984375], "dref": "#/footnotes/4", "name": "footnote", "orig-order": 17, "page": 1, "span": [0, 185], "text-order": 18, "type": "footnote"}, {"bbox": [317.54779052734375, 84.349853515625, 559.419189453125, 99.1622314453125], "dref": "#/footnotes/5", "name": "footnote", "orig-order": 18, "page": 1, "span": [0, 130], "text-order": 19, "type": "footnote"}, {"bbox": [57.056358337402344, 581.521484375, 566.21923828125, 705.9985961914062], "dref": "#/figures/0", "name": "picture", "orig-order": 20, "page": 2, "span": [0, 0], "text-order": 20, "type": "figure"}, {"bbox": [53.502044677734375, 488.92645263671875, 560.5620727539062, 562.7618408203125], "dref": "#/figures/0/captions/0", "name": "caption", "orig-order": 21, "page": 2, "span": [0, 820], "text-order": 21, "type": "caption"}, {"bbox": [53.474998474121094, 394.9362487792969, 295.5370178222656, 468.70623779296875], "dref": "#/texts/14", "name": "text", "orig-order": 22, "page": 2, "span": [0, 409], "text-order": 22, "type": "paragraph"}, {"bbox": [53.575965881347656, 370.0102844238281, 173.47894287109375, 380.5594177246094], "dref": "#/texts/15", "name": "subtitle-level-1", "orig-order": 23, "page": 2, "span": [0, 18], "text-order": 23, "type": "subtitle-level-1"}, {"bbox": [53.474998474121094, 203.9712677001953, 295.7048645019531, 365.4122314453125], "dref": "#/texts/16", "name": "text", "orig-order": 24, "page": 2, "span": [0, 955], "text-order": 24, "type": "paragraph"}, {"bbox": [53.79800033569336, 148.84925842285156, 295.53668212890625, 201.0292205810547], "dref": "#/texts/17", "name": "text", "orig-order": 25, "page": 2, "span": [0, 337], "text-order": 25, "type": "paragraph"}, {"bbox": [53.52906036376953, 119.31765747070312, 137.14767456054688, 125.85260009765625], "dref": "#/footnotes/6", "name": "footnote", "orig-order": 26, "page": 2, "span": [0, 32], "text-order": 26, "type": "footnote"}, {"bbox": [53.36406707763672, 110.3837890625, 128.93763732910156, 116.80622863769531], "dref": "#/footnotes/7", "name": "footnote", "orig-order": 27, "page": 2, "span": [0, 31], "text-order": 27, "type": "footnote"}, {"bbox": [53.797996520996094, 101.62908935546875, 125.09330749511719, 108.06022644042969], "dref": "#/footnotes/8", "name": "footnote", "orig-order": 28, "page": 2, "span": [0, 28], "text-order": 28, "type": "footnote"}, {"bbox": [53.797996520996094, 93.07965087890625, 128.44528198242188, 99.42169189453125], "dref": "#/footnotes/9", "name": "footnote", "orig-order": 29, "page": 2, "span": [0, 29], "text-order": 29, "type": "footnote"}, {"bbox": [53.66099548339844, 84.4400634765625, 246.72222900390625, 90.71622467041016], "dref": "#/footnotes/10", "name": "footnote", "orig-order": 30, "page": 2, "span": [0, 68], "text-order": 30, "type": "footnote"}, {"bbox": [317.69781494140625, 416.852783203125, 560.5628051757812, 468.70623779296875], "dref": "#/texts/18", "name": "text", "orig-order": 31, "page": 2, "span": [0, 325], "text-order": 31, "type": "paragraph"}, {"bbox": [317.9549865722656, 392.1359558105469, 440.25689697265625, 402.8952941894531], "dref": "#/texts/19", "name": "subtitle-level-1", "orig-order": 32, "page": 2, "span": [0, 17], "text-order": 32, "type": "subtitle-level-1"}, {"bbox": [317.9549865722656, 357.8152770996094, 559.0849609375, 387.74822998046875], "dref": "#/texts/20", "name": "text", "orig-order": 33, "page": 2, "span": [0, 174], "text-order": 33, "type": "paragraph"}, {"bbox": [317.6319885253906, 248.01971435546875, 559.85888671875, 354.8722229003906], "dref": "#/texts/21", "name": "text", "orig-order": 34, "page": 2, "span": [0, 594], "text-order": 34, "type": "paragraph"}, {"bbox": [317.9549865722656, 83.84225463867188, 559.7321166992188, 245.28321838378906], "dref": "#/texts/22", "name": "text", "orig-order": 35, "page": 2, "span": [0, 983], "text-order": 35, "type": "paragraph"}, {"bbox": [56.01094436645508, 558.7518920898438, 290.9949645996094, 709.7254028320312], "dref": "#/figures/1", "name": "picture", "orig-order": 49, "page": 3, "span": [0, 0], "text-order": 36, "type": "figure"}, {"bbox": [321.3935546875, 558.6827392578125, 554.2520751953125, 709.9332885742188], "dref": "#/figures/2", "name": "picture", "orig-order": 50, "page": 3, "span": [0, 0], "text-order": 37, "type": "figure"}, {"bbox": [53.79798889160156, 472.7510986328125, 295.5321960449219, 546.5648193359375], "dref": "#/figures/2/captions/0", "name": "caption", "orig-order": 36, "page": 3, "span": [0, 389], "text-order": 38, "type": "caption"}, {"bbox": [53.79800033569336, 294.82501220703125, 295.1091003417969, 445.8912353515625], "dref": "#/texts/23", "name": "text", "orig-order": 37, "page": 3, "span": [0, 916], "text-order": 39, "type": "paragraph"}, {"bbox": [53.79800033569336, 272.23712158203125, 137.17259216308594, 282.3864440917969], "dref": "#/texts/24", "name": "subtitle-level-1", "orig-order": 38, "page": 3, "span": [0, 14], "text-order": 40, "type": "subtitle-level-1"}, {"bbox": [53.62799835205078, 214.8350830078125, 295.6110534667969, 266.9012145996094], "dref": "#/texts/25", "name": "text", "orig-order": 39, "page": 3, "span": [0, 280], "text-order": 41, "type": "paragraph"}, {"bbox": [53.50199890136719, 82.777099609375, 295.5345458984375, 212.1072235107422], "dref": "#/texts/26", "name": "text", "orig-order": 40, "page": 3, "span": [0, 799], "text-order": 42, "type": "paragraph"}, {"bbox": [317.9456481933594, 494.0677185058594, 559.70654296875, 546.4738159179688], "dref": "#/figures/1/captions/0", "name": "caption", "orig-order": 41, "page": 3, "span": [0, 272], "text-order": 43, "type": "caption"}, {"bbox": [317.2680969238281, 451.83447265625, 558.4124755859375, 470.8982238769531], "dref": "#/texts/27", "name": "text", "orig-order": 42, "page": 3, "span": [0, 93], "text-order": 44, "type": "paragraph"}, {"bbox": [317.9549865722656, 429.5016174316406, 445.8941345214844, 439.35626220703125], "dref": "#/texts/28", "name": "subtitle-level-1", "orig-order": 43, "page": 3, "span": [0, 24], "text-order": 45, "type": "subtitle-level-1"}, {"bbox": [317.6319885253906, 306.6052551269531, 559.0227661132812, 424.2102355957031], "dref": "#/texts/29", "name": "text", "orig-order": 44, "page": 3, "span": [0, 669], "text-order": 46, "type": "paragraph"}, {"bbox": [317.9549865722656, 152.94097900390625, 559.0300903320312, 303.6622314453125], "dref": "#/texts/30", "name": "text", "orig-order": 45, "page": 3, "span": [0, 900], "text-order": 47, "type": "paragraph"}, {"bbox": [317.542236328125, 119.9617919921875, 560.2256469726562, 150.07322692871094], "dref": "#/texts/31", "name": "text", "orig-order": 46, "page": 3, "span": [0, 199], "text-order": 48, "type": "paragraph"}, {"bbox": [317.8511962890625, 91.70623779296875, 558.1990356445312, 106.6500244140625], "dref": "#/footnotes/11", "name": "footnote", "orig-order": 47, "page": 3, "span": [0, 102], "text-order": 49, "type": "footnote"}, {"bbox": [317.9549865722656, 83.3656005859375, 397.3962707519531, 89.81023406982422], "dref": "#/footnotes/12", "name": "footnote", "orig-order": 48, "page": 3, "span": [0, 34], "text-order": 50, "type": "footnote"}, {"bbox": [53.79800033569336, 608.7432250976562, 295.53790283203125, 704.4302368164062], "dref": "#/texts/32", "name": "text", "orig-order": 51, "page": 4, "span": [0, 542], "text-order": 51, "type": "paragraph"}, {"bbox": [53.79800033569336, 574.2219848632812, 231.56687927246094, 596.9802856445312], "dref": "#/texts/33", "name": "subtitle-level-1", "orig-order": 52, "page": 4, "span": [0, 51], "text-order": 52, "type": "subtitle-level-1"}, {"bbox": [53.79800033569336, 473.106689453125, 295.5303955078125, 568.8822021484375], "dref": "#/texts/34", "name": "text", "orig-order": 53, "page": 4, "span": [0, 557], "text-order": 53, "type": "paragraph"}, {"bbox": [53.250999450683594, 319.60626220703125, 295.61322021484375, 470.2522277832031], "dref": "#/texts/35", "name": "text", "orig-order": 54, "page": 4, "span": [0, 919], "text-order": 54, "type": "paragraph"}, {"bbox": [53.474998474121094, 154.744140625, 296.03668212890625, 316.6632385253906], "dref": "#/texts/36", "name": "text", "orig-order": 55, "page": 4, "span": [0, 1011], "text-order": 55, "type": "paragraph"}, {"bbox": [53.79800033569336, 121.91156005859375, 295.533203125, 152.2802276611328], "dref": "#/texts/37", "name": "text", "orig-order": 56, "page": 4, "span": [0, 195], "text-order": 56, "type": "paragraph"}, {"bbox": [53.387001037597656, 83.17366027832031, 294.92218017578125, 113.5859375], "dref": "#/footnotes/13", "name": "footnote", "orig-order": 57, "page": 4, "span": [0, 290], "text-order": 57, "type": "footnote"}, {"bbox": [326.25421142578125, 539.8611450195312, 548.1567993164062, 703.5318603515625], "dref": "#/figures/3", "name": "picture", "orig-order": 58, "page": 4, "span": [0, 0], "text-order": 58, "type": "figure"}, {"bbox": [317.6319885253906, 415.019287109375, 560.175537109375, 522.0748291015625], "dref": "#/figures/3/captions/0", "name": "caption", "orig-order": 59, "page": 4, "span": [0, 576], "text-order": 59, "type": "caption"}, {"bbox": [317.9549865722656, 304.00390625, 559.1529541015625, 388.9792175292969], "dref": "#/texts/38", "name": "text", "orig-order": 60, "page": 4, "span": [0, 539], "text-order": 60, "type": "paragraph"}, {"bbox": [317.9549865722656, 268.2449951171875, 522.75146484375, 291.0042724609375], "dref": "#/texts/39", "name": "subtitle-level-1", "orig-order": 61, "page": 4, "span": [0, 55], "text-order": 61, "type": "subtitle-level-1"}, {"bbox": [317.9437561035156, 166.98492431640625, 559.7679443359375, 263.0128173828125], "dref": "#/texts/40", "name": "text", "orig-order": 62, "page": 4, "span": [0, 605], "text-order": 62, "type": "paragraph"}, {"bbox": [317.9549865722656, 83.1304931640625, 560.1549682617188, 157.56007385253906], "dref": "#/texts/41", "name": "text", "orig-order": 63, "page": 4, "span": [0, 466], "text-order": 63, "type": "paragraph"}, {"bbox": [55.4039421081543, 459.4396667480469, 294.0187072753906, 709.196533203125], "dref": "#/figures/4", "name": "picture", "orig-order": 79, "page": 5, "span": [0, 0], "text-order": 64, "type": "figure"}, {"bbox": [53.76737594604492, 404.8351745605469, 296.919189453125, 446.1678161621094], "dref": "#/figures/4/captions/0", "name": "caption", "orig-order": 64, "page": 5, "span": [0, 228], "text-order": 65, "type": "caption"}, {"bbox": [53.79800033569336, 353.96826171875, 295.1701354980469, 383.9022216796875], "dref": "#/texts/42", "name": "text", "orig-order": 65, "page": 5, "span": [0, 199], "text-order": 66, "type": "paragraph"}, {"bbox": [53.79800033569336, 332.0502624511719, 294.4319152832031, 351.070068359375], "dref": "#/texts/43", "name": "text", "orig-order": 66, "page": 5, "span": [0, 105], "text-order": 67, "type": "paragraph"}, {"bbox": [117.81383514404297, 304.81182861328125, 294.531494140625, 327.9595642089844], "dref": "#/texts/44", "name": "formula", "orig-order": 67, "page": 5, "span": [0, 73], "text-order": 68, "type": "equation"}, {"bbox": [53.474998474121094, 280.8752746582031, 294.5132751464844, 300.0321044921875], "dref": "#/texts/45", "name": "text", "orig-order": 68, "page": 5, "span": [0, 124], "text-order": 69, "type": "paragraph"}, {"bbox": [53.79800033569336, 154.57626342773438, 295.1409912109375, 272.4860534667969], "dref": "#/texts/46", "name": "text", "orig-order": 69, "page": 5, "span": [0, 715], "text-order": 70, "type": "paragraph"}, {"bbox": [53.79800033569336, 121.69925689697266, 295.1619567871094, 151.6332244873047], "dref": "#/texts/47", "name": "text", "orig-order": 70, "page": 5, "span": [0, 172], "text-order": 71, "type": "paragraph"}, {"bbox": [53.79800033569336, 99.61725616455078, 294.5709533691406, 118.8629150390625], "dref": "#/texts/48", "name": "text", "orig-order": 71, "page": 5, "span": [0, 125], "text-order": 72, "type": "paragraph"}, {"bbox": [53.387001037597656, 83.28266143798828, 294.561279296875, 89.6395263671875], "dref": "#/footnotes/14", "name": "footnote", "orig-order": 72, "page": 5, "span": [0, 93], "text-order": 73, "type": "footnote"}, {"bbox": [316.9908142089844, 622.02099609375, 560.0983276367188, 706.829833984375], "dref": "#/tables/0/captions/0", "name": "text", "orig-order": 73, "page": 5, "span": [0, 461], "text-order": 74, "type": "paragraph"}, {"bbox": [334.4774475097656, 554.5862426757812, 541.1703491210938, 609.4986572265625], "dref": "#/tables/0", "name": "table", "orig-order": 74, "page": 5, "span": [0, 0], "text-order": 75, "type": "table"}, {"bbox": [317.37548828125, 468.0936279296875, 559.939453125, 520.1222534179688], "dref": "#/texts/49", "name": "text", "orig-order": 75, "page": 5, "span": [0, 337], "text-order": 76, "type": "paragraph"}, {"bbox": [317.6319885253906, 303.8862609863281, 561.6922607421875, 465.32720947265625], "dref": "#/texts/50", "name": "text", "orig-order": 76, "page": 5, "span": [0, 955], "text-order": 77, "type": "paragraph"}, {"bbox": [317.6319885253906, 149.8055419921875, 560.1611328125, 300.9432373046875], "dref": "#/texts/51", "name": "text", "orig-order": 77, "page": 5, "span": [0, 913], "text-order": 78, "type": "paragraph"}, {"bbox": [317.6319885253906, 84.708251953125, 559.6876831054688, 147.51922607421875], "dref": "#/texts/52", "name": "text", "orig-order": 78, "page": 5, "span": [0, 398], "text-order": 79, "type": "paragraph"}, {"bbox": [53.50199890136719, 654.8878173828125, 295.74688720703125, 706.829833984375], "dref": "#/texts/53", "name": "text", "orig-order": 80, "page": 6, "span": [0, 310], "text-order": 80, "type": "paragraph"}, {"bbox": [54.41073989868164, 497.82928466796875, 294.0743103027344, 642.206787109375], "dref": "#/tables/1", "name": "table", "orig-order": 81, "page": 6, "span": [0, 0], "text-order": 81, "type": "table"}, {"bbox": [53.474998474121094, 321.0742492675781, 295.6167907714844, 471.55621337890625], "dref": "#/texts/54", "name": "text", "orig-order": 82, "page": 6, "span": [0, 867], "text-order": 82, "type": "paragraph"}, {"bbox": [53.79800033569336, 236.98883056640625, 295.53466796875, 311.1290588378906], "dref": "#/texts/55", "name": "text", "orig-order": 83, "page": 6, "span": [0, 460], "text-order": 83, "type": "paragraph"}, {"bbox": [53.79800033569336, 127.0894775390625, 295.61102294921875, 234.11122131347656], "dref": "#/texts/56", "name": "text", "orig-order": 84, "page": 6, "span": [0, 635], "text-order": 84, "type": "paragraph"}, {"bbox": [53.79800033569336, 83.63025665283203, 295.5378723144531, 124.522216796875], "dref": "#/texts/57", "name": "text", "orig-order": 85, "page": 6, "span": [0, 256], "text-order": 85, "type": "paragraph"}, {"bbox": [317.49591064453125, 643.8729248046875, 560.350341796875, 706.829833984375], "dref": "#/tables/1/captions/0", "name": "text", "orig-order": 86, "page": 6, "span": [0, 356], "text-order": 86, "type": "paragraph"}, {"bbox": [369.7939453125, 587.8507080078125, 506.9258117675781, 631.5213012695312], "dref": "#/tables/2", "name": "table", "orig-order": 87, "page": 6, "span": [0, 0], "text-order": 87, "type": "table"}, {"bbox": [317.9549865722656, 505.94525146484375, 559.6949462890625, 568.7562255859375], "dref": "#/texts/58", "name": "text", "orig-order": 88, "page": 6, "span": [0, 346], "text-order": 88, "type": "paragraph"}, {"bbox": [317.9183349609375, 384.9764404296875, 559.3198852539062, 503.0022277832031], "dref": "#/texts/59", "name": "text", "orig-order": 89, "page": 6, "span": [0, 689], "text-order": 89, "type": "paragraph"}, {"bbox": [317.87957763671875, 351.9661865234375, 559.6873168945312, 382.4542236328125], "dref": "#/texts/60", "name": "text", "orig-order": 90, "page": 6, "span": [0, 198], "text-order": 90, "type": "paragraph"}, {"bbox": [317.9089660644531, 253.72625732421875, 558.7941284179688, 349.57720947265625], "dref": "#/texts/61", "name": "text", "orig-order": 91, "page": 6, "span": [0, 558], "text-order": 91, "type": "paragraph"}, {"bbox": [317.6319885253906, 165.80517578125, 558.5486450195312, 250.78321838378906], "dref": "#/texts/62", "name": "text", "orig-order": 92, "page": 6, "span": [0, 531], "text-order": 92, "type": "paragraph"}, {"bbox": [317.9549865722656, 144.355712890625, 388.1922607421875, 154.5562744140625], "dref": "#/texts/63", "name": "subtitle-level-1", "orig-order": 93, "page": 6, "span": [0, 12], "text-order": 93, "type": "subtitle-level-1"}, {"bbox": [317.9549865722656, 97.82269287109375, 558.6990966796875, 139.1552276611328], "dref": "#/texts/64", "name": "text", "orig-order": 94, "page": 6, "span": [0, 277], "text-order": 94, "type": "paragraph"}, {"bbox": [317.54400634765625, 83.16966247558594, 398.95098876953125, 89.63201904296875], "dref": "#/footnotes/15", "name": "footnote", "orig-order": 95, "page": 6, "span": [0, 35], "text-order": 95, "type": "footnote"}, {"bbox": [53.79800033569336, 687.81005859375, 296.0726318359375, 706.829833984375], "dref": "#/texts/65", "name": "text", "orig-order": 96, "page": 7, "span": [0, 104], "text-order": 96, "type": "paragraph"}, {"bbox": [52.97157669067383, 452.9112548828125, 291.5167236328125, 672.6514282226562], "dref": "#/texts/66", "name": "text", "orig-order": 97, "page": 7, "span": [0, 723], "text-order": 97, "type": "paragraph"}, {"bbox": [53.79800033569336, 388.8714294433594, 295.0650634765625, 430.3962097167969], "dref": "#/texts/67", "name": "text", "orig-order": 98, "page": 7, "span": [0, 226], "text-order": 98, "type": "paragraph"}, {"bbox": [53.474998474121094, 301.7305908203125, 295.7899169921875, 386.56121826171875], "dref": "#/texts/68", "name": "text", "orig-order": 99, "page": 7, "span": [0, 530], "text-order": 99, "type": "paragraph"}, {"bbox": [53.79800033569336, 265.370849609375, 287.7526550292969, 288.25628662109375], "dref": "#/texts/69", "name": "subtitle-level-1", "orig-order": 100, "page": 7, "span": [0, 61], "text-order": 100, "type": "subtitle-level-1"}, {"bbox": [53.474998474121094, 130.9022216796875, 295.88214111328125, 260.0602111816406], "dref": "#/texts/70", "name": "text", "orig-order": 101, "page": 7, "span": [0, 777], "text-order": 101, "type": "paragraph"}, {"bbox": [53.79800033569336, 107.44329833984375, 150.55332946777344, 117.82127380371094], "dref": "#/texts/71", "name": "subtitle-level-1", "orig-order": 102, "page": 7, "span": [0, 19], "text-order": 102, "type": "subtitle-level-1"}, {"bbox": [53.79800033569336, 83.70025634765625, 295.5948791503906, 102.80303955078125], "dref": "#/texts/72", "name": "text", "orig-order": 103, "page": 7, "span": [0, 127], "text-order": 103, "type": "paragraph"}, {"bbox": [319.4678649902344, 591.0667114257812, 563.418212890625, 707.4041137695312], "dref": "#/figures/5", "name": "picture", "orig-order": 104, "page": 7, "span": [0, 0], "text-order": 104, "type": "figure"}, {"bbox": [317.9549865722656, 491.0215148925781, 561.2398681640625, 575.8748168945312], "dref": "#/figures/5/captions/0", "name": "text", "orig-order": 105, "page": 7, "span": [0, 462], "text-order": 105, "type": "paragraph"}, {"bbox": [317.9549865722656, 444.9665222167969, 558.4959106445312, 464.1462097167969], "dref": "#/texts/73", "name": "text", "orig-order": 106, "page": 7, "span": [0, 97], "text-order": 106, "type": "paragraph"}, {"bbox": [327.9239807128906, 375.72027587890625, 560.4287719726562, 438.79620361328125], "dref": "#/texts/74", "name": "list-item", "orig-order": 107, "page": 7, "span": [0, 307], "text-order": 107, "type": "paragraph"}, {"bbox": [326.89788818359375, 244.04925537109375, 561.5510864257812, 372.7772216796875], "dref": "#/texts/75", "name": "list-item", "orig-order": 108, "page": 7, "span": [0, 702], "text-order": 108, "type": "paragraph"}, {"bbox": [327.4911804199219, 133.84515380859375, 560.5987548828125, 241.10621643066406], "dref": "#/texts/76", "name": "list-item", "orig-order": 109, "page": 7, "span": [0, 613], "text-order": 109, "type": "paragraph"}, {"bbox": [317.54400634765625, 110.22366333007812, 398.9919738769531, 117.16583251953125], "dref": "#/footnotes/16", "name": "footnote", "orig-order": 110, "page": 7, "span": [0, 32], "text-order": 110, "type": "footnote"}, {"bbox": [317.54400634765625, 101.18707275390625, 400.1710205078125, 108.2861328125], "dref": "#/footnotes/17", "name": "footnote", "orig-order": 111, "page": 7, "span": [0, 32], "text-order": 111, "type": "footnote"}, {"bbox": [317.54400634765625, 92.2611083984375, 382.0435791015625, 99.16375732421875], "dref": "#/footnotes/18", "name": "footnote", "orig-order": 112, "page": 7, "span": [0, 28], "text-order": 112, "type": "footnote"}, {"bbox": [317.54400634765625, 83.07232666015625, 407.5936279296875, 90.22023010253906], "dref": "#/footnotes/19", "name": "footnote", "orig-order": 113, "page": 7, "span": [0, 36], "text-order": 113, "type": "footnote"}, {"bbox": [58.86375045776367, 545.35546875, 300.35174560546875, 702.7379760742188], "dref": "#/figures/6", "name": "picture", "orig-order": 114, "page": 8, "span": [0, 0], "text-order": 114, "type": "figure"}, {"bbox": [53.79800033569336, 474.97509765625, 297.0106506347656, 526.871826171875], "dref": "#/figures/6/captions/0", "name": "caption", "orig-order": 115, "page": 8, "span": [0, 281], "text-order": 115, "type": "caption"}, {"bbox": [78.20700073242188, 422.1390686035156, 295.529052734375, 452.20721435546875], "dref": "#/texts/77", "name": "text", "orig-order": 116, "page": 8, "span": [0, 125], "text-order": 116, "type": "paragraph"}, {"bbox": [63.44164276123047, 300.48040771484375, 295.35687255859375, 419.33123779296875], "dref": "#/texts/78", "name": "list-item", "orig-order": 117, "page": 8, "span": [0, 633], "text-order": 117, "type": "paragraph"}, {"bbox": [53.79800033569336, 199.9788818359375, 295.3085021972656, 296.0932312011719], "dref": "#/texts/79", "name": "text", "orig-order": 118, "page": 8, "span": [0, 565], "text-order": 118, "type": "paragraph"}, {"bbox": [53.79800033569336, 101.77525329589844, 295.5303649902344, 197.46322631835938], "dref": "#/texts/80", "name": "text", "orig-order": 119, "page": 8, "span": [0, 605], "text-order": 119, "type": "paragraph"}, {"bbox": [53.387001037597656, 83.2796630859375, 137.4241180419922, 89.7896728515625], "dref": "#/footnotes/20", "name": "footnote", "orig-order": 120, "page": 8, "span": [0, 31], "text-order": 120, "type": "footnote"}, {"bbox": [321.94073486328125, 587.7708740234375, 563.5105590820312, 702.5103149414062], "dref": "#/figures/7", "name": "picture", "orig-order": 121, "page": 8, "span": [0, 0], "text-order": 121, "type": "figure"}, {"bbox": [317.9549865722656, 538.82373046875, 558.39794921875, 568.872802734375], "dref": "#/figures/7/captions/0", "name": "caption", "orig-order": 122, "page": 8, "span": [0, 149], "text-order": 122, "type": "caption"}, {"bbox": [317.9549865722656, 490.72216796875, 558.5828857421875, 509.7472229003906], "dref": "#/texts/81", "name": "text", "orig-order": 123, "page": 8, "span": [0, 87], "text-order": 123, "type": "paragraph"}, {"bbox": [317.95361328125, 466.65576171875, 398.97723388671875, 476.83929443359375], "dref": "#/texts/82", "name": "subtitle-level-1", "orig-order": 124, "page": 8, "span": [0, 14], "text-order": 124, "type": "subtitle-level-1"}, {"bbox": [317.9549865722656, 408.89727783203125, 559.3217163085938, 463.36187744140625], "dref": "#/texts/83", "name": "text", "orig-order": 125, "page": 8, "span": [0, 302], "text-order": 125, "type": "paragraph"}, {"bbox": [317.9549865722656, 332.0350341796875, 559.6873779296875, 406.2972106933594], "dref": "#/texts/84", "name": "text", "orig-order": 126, "page": 8, "span": [0, 445], "text-order": 126, "type": "paragraph"}, {"bbox": [317.9181823730469, 277.7332763671875, 559.6890869140625, 329.584228515625], "dref": "#/texts/85", "name": "text", "orig-order": 127, "page": 8, "span": [0, 307], "text-order": 127, "type": "paragraph"}, {"bbox": [317.6409912109375, 200.8416748046875, 559.6902465820312, 274.79022216796875], "dref": "#/texts/86", "name": "text", "orig-order": 128, "page": 8, "span": [0, 438], "text-order": 128, "type": "paragraph"}, {"bbox": [317.9549865722656, 177.281005859375, 438.01214599609375, 187.37762451171875], "dref": "#/texts/87", "name": "subtitle-level-1", "orig-order": 129, "page": 8, "span": [0, 22], "text-order": 129, "type": "subtitle-level-1"}, {"bbox": [317.7309875488281, 119.96771240234375, 558.4498291015625, 171.94122314453125], "dref": "#/texts/88", "name": "text", "orig-order": 130, "page": 8, "span": [0, 320], "text-order": 130, "type": "paragraph"}, {"bbox": [317.54400634765625, 93.00566101074219, 382.23095703125, 99.5784912109375], "dref": "#/footnotes/21", "name": "footnote", "orig-order": 131, "page": 8, "span": [0, 29], "text-order": 131, "type": "footnote"}, {"bbox": [317.54400634765625, 84.25965881347656, 382.0310363769531, 90.77301025390625], "dref": "#/footnotes/22", "name": "footnote", "orig-order": 132, "page": 8, "span": [0, 27], "text-order": 132, "type": "footnote"}, {"bbox": [53.79800033569336, 619.7022094726562, 295.08966064453125, 706.5369262695312], "dref": "#/texts/89", "name": "text", "orig-order": 133, "page": 9, "span": [0, 504], "text-order": 133, "type": "paragraph"}, {"bbox": [53.474998474121094, 421.4378662109375, 295.7029724121094, 616.7592163085938], "dref": "#/texts/90", "name": "text", "orig-order": 134, "page": 9, "span": [0, 1164], "text-order": 134, "type": "paragraph"}, {"bbox": [53.71706771850586, 396.8446350097656, 144.1709442138672, 407.8390808105469], "dref": "#/texts/91", "name": "subtitle-level-1", "orig-order": 135, "page": 9, "span": [0, 12], "text-order": 135, "type": "subtitle-level-1"}, {"bbox": [53.37699890136719, 340.66925048828125, 295.537841796875, 392.5202331542969], "dref": "#/texts/92", "name": "text", "orig-order": 136, "page": 9, "span": [0, 276], "text-order": 136, "type": "paragraph"}, {"bbox": [53.79800033569336, 263.8846435546875, 295.6170654296875, 337.7262268066406], "dref": "#/texts/93", "name": "text", "orig-order": 137, "page": 9, "span": [0, 468], "text-order": 137, "type": "paragraph"}, {"bbox": [53.79800033569336, 131.818603515625, 295.61065673828125, 261.0132141113281], "dref": "#/texts/94", "name": "text", "orig-order": 138, "page": 9, "span": [0, 808], "text-order": 138, "type": "paragraph"}, {"bbox": [53.79800033569336, 84.15838623046875, 295.2939147949219, 106.9937744140625], "dref": "#/footnotes/23", "name": "footnote", "orig-order": 139, "page": 9, "span": [0, 237], "text-order": 139, "type": "footnote"}, {"bbox": [317.6944274902344, 684.9768676757812, 559.6907348632812, 704.4302368164062], "dref": "#/texts/95", "name": "text", "orig-order": 140, "page": 9, "span": [0, 119], "text-order": 140, "type": "paragraph"}, {"bbox": [317.9549865722656, 663.3440551757812, 438.23162841796875, 673.7493286132812], "dref": "#/texts/96", "name": "subtitle-level-1", "orig-order": 141, "page": 9, "span": [0, 15], "text-order": 141, "type": "subtitle-level-1"}, {"bbox": [317.677001953125, 639.6272583007812, 564.3941040039062, 658.7048950195312], "dref": "#/texts/97", "name": "text", "orig-order": 142, "page": 9, "span": [0, 127], "text-order": 142, "type": "paragraph"}, {"bbox": [317.45098876953125, 584.833251953125, 571.9612426757812, 636.8023071289062], "dref": "#/texts/98", "name": "text", "orig-order": 143, "page": 9, "span": [0, 269], "text-order": 143, "type": "paragraph"}, {"bbox": [317.9549865722656, 563.3189697265625, 391.296875, 573.2864990234375], "dref": "#/texts/99", "name": "subtitle-level-1", "orig-order": 144, "page": 9, "span": [0, 10], "text-order": 144, "type": "subtitle-level-1"}, {"bbox": [320.02252197265625, 529.3375854492188, 560.26220703125, 559.6171875], "dref": "#/texts/100", "name": "list-item", "orig-order": 145, "page": 9, "span": [0, 280], "text-order": 145, "type": "paragraph"}, {"bbox": [321.42291259765625, 513.4962158203125, 559.0736694335938, 527.7372436523438], "dref": "#/texts/101", "name": "list-item", "orig-order": 146, "page": 9, "span": [0, 122], "text-order": 146, "type": "paragraph"}, {"bbox": [321.3348388671875, 489.4542236328125, 559.13916015625, 511.7962341308594], "dref": "#/texts/102", "name": "list-item", "orig-order": 147, "page": 9, "span": [0, 164], "text-order": 147, "type": "paragraph"}, {"bbox": [321.1488342285156, 457.43511962890625, 559.233154296875, 487.88623046875], "dref": "#/texts/103", "name": "list-item", "orig-order": 148, "page": 9, "span": [0, 282], "text-order": 148, "type": "paragraph"}, {"bbox": [321.2120056152344, 433.30517578125, 559.0735473632812, 456.0062255859375], "dref": "#/texts/104", "name": "list-item", "orig-order": 149, "page": 9, "span": [0, 224], "text-order": 149, "type": "paragraph"}, {"bbox": [321.4419860839844, 409.4002685546875, 558.4588012695312, 432.0952453613281], "dref": "#/texts/105", "name": "list-item", "orig-order": 150, "page": 9, "span": [0, 233], "text-order": 150, "type": "paragraph"}, {"bbox": [320.9912109375, 378.0406494140625, 559.8010864257812, 408.18524169921875], "dref": "#/texts/106", "name": "list-item", "orig-order": 151, "page": 9, "span": [0, 250], "text-order": 151, "type": "paragraph"}, {"bbox": [321.1594543457031, 346.1596374511719, 560.1024780273438, 376.44775390625], "dref": "#/texts/107", "name": "list-item", "orig-order": 152, "page": 9, "span": [0, 227], "text-order": 152, "type": "paragraph"}, {"bbox": [320.2006530761719, 330.21966552734375, 558.261474609375, 344.5801696777344], "dref": "#/texts/108", "name": "list-item", "orig-order": 153, "page": 9, "span": [0, 116], "text-order": 153, "type": "paragraph"}, {"bbox": [317.95501708984375, 274.36358642578125, 572.77392578125, 328.4842529296875], "dref": "#/texts/109", "name": "list-item", "orig-order": 154, "page": 9, "span": [0, 425], "text-order": 154, "type": "paragraph"}, {"bbox": [317.0665588378906, 250.09552001953125, 560.9763793945312, 272.6932373046875], "dref": "#/texts/110", "name": "list-item", "orig-order": 155, "page": 9, "span": [0, 166], "text-order": 155, "type": "paragraph"}], "page-footers": [], "page-headers": [], "properties": {"data": [["language", "en", 0.8799999952316284], ["semantic", "text", 0.9200000166893005]], "headers": ["type", "label", "confidence"]}, "tables": [{"#-cols": 5, "#-rows": 4, "captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/54", "hash": 9160199179916979172, "orig": "Table 1: Time-to-solution and performance results for the Faster RCNN and YOLOv2 models. The training of the models was done on 25000 PDF pages. The prediction (per page) and performance numbers (Recall= \u211b and Precision= \ud835\udcab) were obtained on 5000 page images, where the prediction confidence cutoff was tuned to yield the maximum F1 score for each. All time-to-solution measurements for training were obtained on a POWER8 node with a single Pascal P100 GPU.", "prov": [{"$ref": "#/page-elements/74"}], "text": "Table 1: Time-to-solution and performance results for the Faster RCNN and YOLOv2 models. The training of the models was done on 25000 PDF pages. The prediction (per page) and performance numbers (Recall= \u211b and Precision= \ud835\udcab) were obtained on 5000 page images, where the prediction confidence cutoff was tuned to yield the maximum F1 score for each. All time-to-solution measurements for training were obtained on a POWER8 node with a single Pascal P100 GPU.", "text-hash": 17279509228359814482, "type": "paragraph"}], "data": [[{"col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 0]], "text": ""}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 1]], "text": "Time to solution"}, {"col": 2, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 1], [0, 2]], "text": "Time to solution"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 3]], "text": "Performance"}, {"col": 4, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 3], [0, 4]], "text": "Performance"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 0]], "text": ""}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 1]], "text": "Training"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 2]], "text": "Prediction"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 3]], "text": "\ud835\udcab"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 4]], "text": "\u211b"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 0]], "text": "Faster-RCNN"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 1]], "text": "72 hours"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 2]], "text": "4 sec"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 3]], "text": "0.97"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 4]], "text": "0.98"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 0]], "text": "YOLOv2"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 1]], "text": "9 hours"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 2]], "text": "0.1 sec"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 3]], "text": "0 . 99"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 4]], "text": "0 . 98"}]], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/tables/0", "footnotes": [], "hash": 16709517892596982787, "mentions": [], "properties": {"data": [["language", "en", 0.75]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/75"}], "type": "table"}, {"#-cols": 8, "#-rows": 10, "captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/64", "hash": 18354136439820865774, "orig": "Table 3: Comparison for two different journal templates showing the aggregated precision and recall averaged over all labels. Each model has been independently trained on a dataset of 400 pages each. The results show that the ML algorithm proves to perform very well for the multiple document templates, simply by providing a different dataset to train on.", "prov": [{"$ref": "#/page-elements/86"}], "text": "Table 3: Comparison for two different journal templates showing the aggregated precision and recall averaged over all labels. Each model has been independently trained on a dataset of 400 pages each. The results show that the ML algorithm proves to perform very well for the multiple document templates, simply by providing a different dataset to train on.", "text-hash": 8085176655901164108, "type": "paragraph"}], "data": [[{"col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 0]], "text": ""}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 1]], "text": ""}, {"col": 2, "col-header": false, "col-span": [2, 7], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2], [0, 3], [0, 4], [0, 5], [0, 6]], "text": "predicted label"}, {"col": 3, "col-header": false, "col-span": [2, 7], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2], [0, 3], [0, 4], [0, 5], [0, 6]], "text": "predicted label"}, {"col": 4, "col-header": false, "col-span": [2, 7], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2], [0, 3], [0, 4], [0, 5], [0, 6]], "text": "predicted label"}, {"col": 5, "col-header": false, "col-span": [2, 7], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2], [0, 3], [0, 4], [0, 5], [0, 6]], "text": "predicted label"}, {"col": 6, "col-header": false, "col-span": [2, 7], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2], [0, 3], [0, 4], [0, 5], [0, 6]], "text": "predicted label"}, {"col": 7, "col-header": false, "col-span": [2, 8], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2], [0, 3], [0, 4], [0, 5], [0, 6], [0, 7]], "text": "predicted label"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 0]], "text": ""}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 1]], "text": "T itle"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 2]], "text": ""}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 3]], "text": "Author"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 4]], "text": "Subtitle"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 5]], "text": "Te xt"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 6]], "text": "Picture"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 7]], "text": "T able"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 1]], "text": "Title"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 2]], "text": "75"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 3]], "text": "0"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 4]], "text": "0"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 5]], "text": "0"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 6]], "text": "0"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 7]], "text": "0"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label Author"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 1]], "text": "1"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 2]], "text": "670"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 3]], "text": "0"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 4]], "text": "0"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 5]], "text": "0"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 6]], "text": "0"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 7]], "text": ""}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label Subtitle"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 1]], "text": "0"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 2]], "text": "0"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 3]], "text": "325"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 4]], "text": "0"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 5]], "text": "0"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 6]], "text": "0"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 7]], "text": ""}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label Text"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 1]], "text": "1"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 2]], "text": "17"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 3]], "text": "0"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 4]], "text": "56460"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 5]], "text": "14"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 6]], "text": "0"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 7]], "text": ""}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label Picture"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 1]], "text": "0"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 2]], "text": "0"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 3]], "text": "0"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 4]], "text": "4"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 5]], "text": "4223"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 6]], "text": "26"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 7]], "text": ""}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label Table"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 1]], "text": "0"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 2]], "text": "0"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 3]], "text": "0"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 4]], "text": "0"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 5]], "text": "1"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 6]], "text": "3418"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 7]], "text": ""}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label Recall"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 1]], "text": "100"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 2]], "text": "99.85"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 3]], "text": "100"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 4]], "text": "99.94"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 5]], "text": "99.24"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 6]], "text": "99.97"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 7]], "text": ""}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [2, 10], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0], [9, 0]], "text": "true label Precision"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 1]], "text": "97.40"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 2]], "text": "97.52"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 3]], "text": "100"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 4]], "text": "99.99"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 5]], "text": "99.64"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 6]], "text": "99.24"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 7]], "text": ""}]], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/tables/1", "footnotes": [], "hash": 16041588621504517180, "mentions": [], "properties": {"data": [["language", "en", 0.18000000715255737]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/81"}], "type": "table"}, {"#-cols": 3, "#-rows": 3, "captions": [], "data": [[{"col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 0]], "text": "Journal template"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 1]], "text": "\ud835\udcab"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2]], "text": "\u211b"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 0]], "text": "Physical Review B"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 1]], "text": "98 . 96"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 2]], "text": "99 . 83"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 0]], "text": "Elsevier"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 1]], "text": "99 . 46"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 2]], "text": "99 . 58"}]], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/tables/2", "footnotes": [], "hash": 14817357053216629605, "mentions": [], "properties": {"data": [["language", "ru", 0.18000000715255737]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/87"}], "type": "table"}], "texts": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/0", "hash": 7377574370756688828, "orig": "arXiv:1806.02284v1 [cs.DL] 24 May 2018", "properties": {"data": [["language", "en", 0.7799999713897705], ["semantic", "text", 0.9399999976158142]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/0"}], "text": "arXiv:1806.02284v1 [cs.DL] 24 May 2018", "text-hash": 605943372629925146, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/1", "hash": 10227328696767902037, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["language", "en", 0.699999988079071], ["semantic", "header", 0.7099999785423279]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/1"}], "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "title"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/2", "hash": 8770494724746327817, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["language", "en", 0.25999999046325684], ["semantic", "meta-data", 0.800000011920929]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/2"}], "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/3", "hash": 18258237174351515285, "orig": "taa,dol,cau,bek@zurich.ibm.com", "properties": {"data": [["language", "zh", 0.09000000357627869], ["semantic", "text", 0.7900000214576721]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/3"}], "text": "taa,dol,cau,bek@zurich.ibm.com", "text-hash": 7883794643982446593, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/4", "hash": 5704354110496947297, "orig": "IBM Research", "properties": {"data": [["language", "en", 0.5299999713897705], ["semantic", "meta-data", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/4"}], "text": "IBM Research", "text-hash": 16114797969310195405, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/5", "hash": 11056873211244709904, "orig": "Rueschlikon, Switzerland", "properties": {"data": [["language", "en", 0.49000000953674316], ["semantic", "meta-data", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/5"}], "text": "Rueschlikon, Switzerland", "text-hash": 10483037511456664190, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/6", "hash": 11788868678004267702, "orig": "ABSTRACT", "properties": {"data": [["language", "en", 0.6499999761581421], ["semantic", "meta-data", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/6"}], "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/7", "hash": 3624246356859711021, "orig": "1 INTRODUCTION", "properties": {"data": [["language", "en", 0.550000011920929], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/7"}], "text": "1 INTRODUCTION", "text-hash": 4359834464932974729, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/8", "hash": 17999848460847860039, "orig": "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size. Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable. Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging. In this paper, we present a modular, cloud-based platform to ingest documents at scale. This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format. We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["language", "en", 0.9200000166893005], ["semantic", "text", 0.9700000286102295]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/8"}], "text": "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size. Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable. Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging. In this paper, we present a modular, cloud-based platform to ingest documents at scale. This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format. We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 8142196169563728819, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/9", "hash": 14387482728083328702, "orig": "ACM Reference Format:", "properties": {"data": [["language", "en", 0.20999999344348907], ["semantic", "header", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/9"}], "text": "ACM Reference Format:", "text-hash": 7430992009485070364, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/10", "hash": 11222145795862225841, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas. 2018. Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.. In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom. ACM, New York, NY, USA, 9 pages. https://doi.org/10. 1145/3219819.3219834", "properties": {"data": [["language", "en", 0.49000000953674316], ["semantic", "text", 0.8600000143051147]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/10"}], "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas. 2018. Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.. In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom. ACM, New York, NY, USA, 9 pages. https://doi.org/10. 1145/3219819.3219834", "text-hash": 10605881125688857885, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/11", "hash": 16923207262044929933, "orig": "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$. These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery. It is needless to say that valuable qualitative and quantitative information is contained in many of them. However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout. Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery. In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$. This poses a real problem, since more and more information published in the PDF documents is going dark. In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components. First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML. Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context.", "properties": {"data": [["language", "en", 0.9399999976158142], ["semantic", "text", 0.9700000286102295]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/15"}], "text": "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$. These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery. It is needless to say that valuable qualitative and quantitative information is contained in many of them. However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout. Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery. In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$. This poses a real problem, since more and more information published in the PDF documents is going dark. In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components. First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML. Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context.", "text-hash": 9516638039579926761, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/12", "hash": 3749305213430885773, "orig": "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files. The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms. This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components. Each of these microservices can be consumed by its own REST API. This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform. In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures.", "properties": {"data": [["language", "en", 0.949999988079071], ["semantic", "text", 0.9800000190734863]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/16"}], "text": "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files. The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms. This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components. Each of these microservices can be consumed by its own REST API. This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform. In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures.", "text-hash": 3945867624210419433, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/13", "hash": 3409470577915009676, "orig": "To obtain a thorough understanding of what our platform can do and how well it performs, we have structured this paper as follows: In Section 2, we briefly review the current state-of-the-art document", "properties": {"data": [["language", "en", 0.949999988079071], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/17"}], "text": "To obtain a thorough understanding of what our platform can do and how well it performs, we have structured this paper as follows: In Section 2, we briefly review the current state-of-the-art document", "text-hash": 4583103017707584490, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/15", "hash": 17187299362680072378, "orig": "processing solutions. In Section 3, we present the design of the platform and its components. In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively. Finally, in Section 5, we discuss the open questions w.r.t. research and possible next steps in the development of the platform.", "properties": {"data": [["language", "en", 0.9200000166893005], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/22"}], "text": "processing solutions. In Section 3, we present the design of the platform and its components. In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively. Finally, in Section 5, we discuss the open questions w.r.t. research and possible next steps in the development of the platform.", "text-hash": 9243393324994873880, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/16", "hash": 697648145931166262, "orig": "2 STATE OF THE ART", "properties": {"data": [["language", "en", 0.47999998927116394], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/23"}], "text": "2 STATE OF THE ART", "text-hash": 2385816824895853732, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/17", "hash": 7935233310532930917, "orig": "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4]. Broadly speaking, there are two types of approaches to this problem. In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document. This can be done through a conversion from PDF towards HTML or MS Word for example. The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format. For example, this could be a JSON/XML file with a particular schema. Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution.", "properties": {"data": [["language", "en", 0.9200000166893005], ["semantic", "text", 0.9800000190734863]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/24"}], "text": "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4]. Broadly speaking, there are two types of approaches to this problem. In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document. This can be done through a conversion from PDF towards HTML or MS Word for example. The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format. For example, this could be a JSON/XML file with a particular schema. Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution.", "text-hash": 57757550267838417, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/18", "hash": 2762070725424637531, "orig": "Many solutions have already been developed that tackle the problem of document conversion. There are well known open-source programs such as Xpdf 3 and Tabula$^{4}$. There are also proprietary solutions, such as Abby$^{5}$, Nuance 6 or DataCap$^{7}$. In contrast to the open-source solutions, all three proprietary solutions support also", "properties": {"data": [["language", "en", 0.9700000286102295], ["semantic", "text", 0.9800000190734863]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/25"}], "text": "Many solutions have already been developed that tackle the problem of document conversion. There are well known open-source programs such as Xpdf 3 and Tabula$^{4}$. There are also proprietary solutions, such as Abby$^{5}$, Nuance 6 or DataCap$^{7}$. In contrast to the open-source solutions, all three proprietary solutions support also", "text-hash": 5230489225511983287, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/19", "hash": 7536915191196259776, "orig": "extraction from scanned documents. Besides the well known opensource and proprietary solutions, there are also countless academic solutions as well as libraries. For example, the challenge of segmenting complex page layouts is actively addressed by recurring competitions posed by ICDAR, as in Ref. [1] and previous editions.", "properties": {"data": [["language", "en", 0.9900000095367432], ["semantic", "text", 0.949999988079071]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/31"}], "text": "extraction from scanned documents. Besides the well known opensource and proprietary solutions, there are also countless academic solutions as well as libraries. For example, the challenge of segmenting complex page layouts is actively addressed by recurring competitions posed by ICDAR, as in Ref. [1] and previous editions.", "text-hash": 167221319977518894, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/20", "hash": 11495493007651807568, "orig": "3 PLATFORM DESIGN", "properties": {"data": [["language", "en", 0.3100000023841858], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/32"}], "text": "3 PLATFORM DESIGN", "text-hash": 10322960049580053438, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/21", "hash": 7650015170039242996, "orig": "Given the plethora of existing solutions, we would like to point out how our solution differs from these, and thus approaches the problem of document conversion in a new way.", "properties": {"data": [["language", "en", 0.9399999976158142], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/33"}], "text": "Given the plethora of existing solutions, we would like to point out how our solution differs from these, and thus approaches the problem of document conversion in a new way.", "text-hash": 333520156392116834, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/22", "hash": 14959508657858158650, "orig": "The key idea is that we do not write any rule-based conversion algorithms, but rather utilize generic machine learning algorithms which produce models that can be easily and quickly trained on ground-truth acquired via human annotation. This flexible mechanism allows us to adapt very quickly to certain templates of documents, achieve very accurate results and ultimately eliminates the time-consuming and costly tuning of traditional rule-based conversion algorithms. This approach is in stark contrast to the previously mentioned state of the art conversion systems, which are all rulebased.", "properties": {"data": [["language", "en", 0.9599999785423279], ["semantic", "text", 0.9700000286102295]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/34"}], "text": "The key idea is that we do not write any rule-based conversion algorithms, but rather utilize generic machine learning algorithms which produce models that can be easily and quickly trained on ground-truth acquired via human annotation. This flexible mechanism allows us to adapt very quickly to certain templates of documents, achieve very accurate results and ultimately eliminates the time-consuming and costly tuning of traditional rule-based conversion algorithms. This approach is in stark contrast to the previously mentioned state of the art conversion systems, which are all rulebased.", "text-hash": 6868109665737773720, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/23", "hash": 10379300903412882972, "orig": "While the approach of swapping rule based solutions with machine learning solutions might appear very natural in the current era of artificial intelligence, it has some serious consequences with regard to its design. First of all, one can not think anymore at the level of a single document. Rather, one should think at the level of a collection of documents (or a corpus of documents). A machine learned model for a single document is not very useful, but a machine learned model for a certain type of documents (e.g. scientific articles, patents, regulations, contracts, etc.) obviously is. This is the first big distinction between the current existing solutions and ours: Existing solutions take one document at a time (no matter its origin) and convert it to a desired output format. Our solution can ingest an entire collection of documents and build machine learned models on top of that. Of course, once the the model is trained, one can convert documents one at a time, too.", "properties": {"data": [["language", "en", 0.9399999976158142], ["semantic", "text", 0.949999988079071]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/35"}], "text": "While the approach of swapping rule based solutions with machine learning solutions might appear very natural in the current era of artificial intelligence, it has some serious consequences with regard to its design. First of all, one can not think anymore at the level of a single document. Rather, one should think at the level of a collection of documents (or a corpus of documents). A machine learned model for a single document is not very useful, but a machine learned model for a certain type of documents (e.g. scientific articles, patents, regulations, contracts, etc.) obviously is. This is the first big distinction between the current existing solutions and ours: Existing solutions take one document at a time (no matter its origin) and convert it to a desired output format. Our solution can ingest an entire collection of documents and build machine learned models on top of that. Of course, once the the model is trained, one can convert documents one at a time, too.", "text-hash": 11150916691880738938, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/25", "hash": 4994395008195818594, "orig": "A second discriminator between the existing solutions and ours is that we need to provide the tools to gather ground-truth, since no model can be trained without it. Hence, not only do we need the ability to manage collections of documents, we also need the ability for people to annotate documents and store these annotations in an efficient way. These annotations are then used as ground-truth data to train models. It is clear then that ML models add an extra level of complexity: One has to provide the ability to store a collection of documents, annotate these documents, store the annotations, train a model and ultimately apply this model on unseen documents. For the authors of this paper, it was therefore evident that our solution cannot be a monolithic application. It fits much better the concept of a cloud-based platform that can execute the previously mentioned tasks in an efficient and scalable way.", "properties": {"data": [["language", "en", 0.9599999785423279], ["semantic", "text", 0.9700000286102295]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/39"}], "text": "A second discriminator between the existing solutions and ours is that we need to provide the tools to gather ground-truth, since no model can be trained without it. Hence, not only do we need the ability to manage collections of documents, we also need the ability for people to annotate documents and store these annotations in an efficient way. These annotations are then used as ground-truth data to train models. It is clear then that ML models add an extra level of complexity: One has to provide the ability to store a collection of documents, annotate these documents, store the annotations, train a model and ultimately apply this model on unseen documents. For the authors of this paper, it was therefore evident that our solution cannot be a monolithic application. It fits much better the concept of a cloud-based platform that can execute the previously mentioned tasks in an efficient and scalable way.", "text-hash": 16536368219630364368, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/26", "hash": 4203835122307823579, "orig": "3.1 Components", "properties": {"data": [["language", "en", 0.23999999463558197], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/40"}], "text": "3.1 Components", "text-hash": 3789103236857293111, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/27", "hash": 13520362244078084911, "orig": "Our platform implements a processing pipeline to ingest, manage, parse, annotate, train and eventually convert the data contained in any type of format (scanned or programmatically created PDF, bitmap images, Word documents, etc.) into a structured data format (e.g. JSON or XML).", "properties": {"data": [["language", "en", 0.75], ["semantic", "text", 0.9800000190734863]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/41"}], "text": "Our platform implements a processing pipeline to ingest, manage, parse, annotate, train and eventually convert the data contained in any type of format (scanned or programmatically created PDF, bitmap images, Word documents, etc.) into a structured data format (e.g. JSON or XML).", "text-hash": 12910497814715733387, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/28", "hash": 1749622367305947670, "orig": "This processing pipeline is formed by five components as depicted in Figure 1: (1) parsing of documents into an internal format optimised for ML, (2) Annotation of the label ground-truth in parsed documents (3) training ML models from the acquired annotations, (4) applying the custom ML model(s), (5) assembling the document(s) into a structured data format. If a trained model is available, only components 1, 4 and 5 are needed to convert the documents. If no template-specific machine learned model is available yet, we provide two additional components 2 and 3, that allow users to gather ground-truth and train custom models. It is important to note that the platform comes with default models, so annotation and training are advised to retrieve the best quality output, yet they are optional.", "properties": {"data": [["language", "en", 0.8999999761581421], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/42"}], "text": "This processing pipeline is formed by five components as depicted in Figure 1: (1) parsing of documents into an internal format optimised for ML, (2) Annotation of the label ground-truth in parsed documents (3) training ML models from the acquired annotations, (4) applying the custom ML model(s), (5) assembling the document(s) into a structured data format. If a trained model is available, only components 1, 4 and 5 are needed to convert the documents. If no template-specific machine learned model is available yet, we provide two additional components 2 and 3, that allow users to gather ground-truth and train custom models. It is important to note that the platform comes with default models, so annotation and training are advised to retrieve the best quality output, yet they are optional.", "text-hash": 1334541935326461060, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/30", "hash": 11083736481641202939, "orig": "Let us now elaborate on what each of the five components deliver in the rest of this section.", "properties": {"data": [["language", "en", 0.9200000166893005], ["semantic", "text", 0.9599999785423279]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/44"}], "text": "Let us now elaborate on what each of the five components deliver in the rest of this section.", "text-hash": 10456209429844276823, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/31", "hash": 15403141463083979171, "orig": "3.2 Parsing of Documents", "properties": {"data": [["language", "en", 0.6800000071525574], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/45"}], "text": "3.2 Parsing of Documents", "text-hash": 6127225399482532623, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/32", "hash": 12234429517419341922, "orig": "In the parsing component of the pipeline, we solve the following straightforward but non-trivial task: Find the bounding boxes of all text-snippets that appear on each PDF page. For simplicity, we will refer to the bounding boxes of the text-snippets as cells in the remainder of the paper. There are two reasons why we are interested in these cells. First, they provide us with the crucial geometric features which are later used in the machine learning models to determine the layout semantic label. Second, the concept of a cell can be easily transferred to scanned documents. In Figure 2, we show the cells obtained from an example PDF page after the parsing stage.", "properties": {"data": [["language", "en", 0.9300000071525574], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/46"}], "text": "In the parsing component of the pipeline, we solve the following straightforward but non-trivial task: Find the bounding boxes of all text-snippets that appear on each PDF page. For simplicity, we will refer to the bounding boxes of the text-snippets as cells in the remainder of the paper. There are two reasons why we are interested in these cells. First, they provide us with the crucial geometric features which are later used in the machine learning models to determine the layout semantic label. Second, the concept of a cell can be easily transferred to scanned documents. In Figure 2, we show the cells obtained from an example PDF page after the parsing stage.", "text-hash": 13908173772261346000, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/33", "hash": 16957857111665886816, "orig": "While the task of finding the cells might appear intuitive from a conceptual point of view, it is not in practice, since there does not exist a unique, precise definition of the cells. This lack of a precise definition has its origins not only in the ISO-standard 8 detailing the PDF document code but also in the variability of the quality of PDFs. Older PDFs which were created from scanned images using OCR typically return cells for each word, while more recent PDFs allow us to create cells for full text-lines. This variability in the geometric features of the cell (e.g. the width of the cell) can negatively impact the performance of later machine learning models. As a consequence, we reduce the variability of the geometric features as much as possible. The more consistent and homogeneous the geometric features of a cell are, the better the machine learning algorithms can do predictions.", "properties": {"data": [["language", "en", 0.9399999976158142], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/47"}], "text": "While the task of finding the cells might appear intuitive from a conceptual point of view, it is not in practice, since there does not exist a unique, precise definition of the cells. This lack of a precise definition has its origins not only in the ISO-standard 8 detailing the PDF document code but also in the variability of the quality of PDFs. Older PDFs which were created from scanned images using OCR typically return cells for each word, while more recent PDFs allow us to create cells for full text-lines. This variability in the geometric features of the cell (e.g. the width of the cell) can negatively impact the performance of later machine learning models. As a consequence, we reduce the variability of the geometric features as much as possible. The more consistent and homogeneous the geometric features of a cell are, the better the machine learning algorithms can do predictions.", "text-hash": 9481411723883903182, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/34", "hash": 10390915169360946497, "orig": "For programmatic PDFs, the text cells are contructed from raw streams of symbols and transforms defined in the PDF document. This operation relies on the iterators provided by the QPDF library$^{9}$.", "properties": {"data": [["language", "en", 0.8500000238418579], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/48"}], "text": "For programmatic PDFs, the text cells are contructed from raw streams of symbols and transforms defined in the PDF document. This operation relies on the iterators provided by the QPDF library$^{9}$.", "text-hash": 11149022357700220845, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/35", "hash": 15254383206256494278, "orig": "For scanned PDFs, we use a two step approach to find the cells by first running all bitmap resources in the PDF through an OCR engine and then merging the extracted text-snippets from the images with the remaining cells from the programmatically created content. Eventually, all the created cells and line paths are stored in an internal JSON format, which also keeps references to the bitmap resources embedded in the PDF document. From this point, all further processing does not need to distinguish between scanned or programmatic sources.", "properties": {"data": [["language", "en", 0.9399999976158142], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/51"}], "text": "For scanned PDFs, we use a two step approach to find the cells by first running all bitmap resources in the PDF through an OCR engine and then merging the extracted text-snippets from the images with the remaining cells from the programmatically created content. Eventually, all the created cells and line paths are stored in an internal JSON format, which also keeps references to the bitmap resources embedded in the PDF document. From this point, all further processing does not need to distinguish between scanned or programmatic sources.", "text-hash": 6573226034038831156, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/36", "hash": 17759618186065566858, "orig": "3.3 Ground-truth gathering through human-annotation", "properties": {"data": [["language", "en", 0.8299999833106995], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/52"}], "text": "3.3 Ground-truth gathering through human-annotation", "text-hash": 8679681341332585960, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/37", "hash": 11638821473906997927, "orig": "In this component, we collect ground-truth for the custom machine learning models to be trained on. Representative ground-truth data is of paramount importance to obtain machine learned models with excellent recall and precision. Unfortunately, it is often very hard to obtain lots of representative ground-truth data, primarily due the the enormous variability across the layout of documents. As a consequence, the concept of annotators for documents were incorporated into the platform from the very beginning. The purpose of these annotators is two-fold.", "properties": {"data": [["language", "en", 0.9700000286102295], ["semantic", "text", 0.9800000190734863]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/53"}], "text": "In this component, we collect ground-truth for the custom machine learning models to be trained on. Representative ground-truth data is of paramount importance to obtain machine learned models with excellent recall and precision. Unfortunately, it is often very hard to obtain lots of representative ground-truth data, primarily due the the enormous variability across the layout of documents. As a consequence, the concept of annotators for documents were incorporated into the platform from the very beginning. The purpose of these annotators is two-fold.", "text-hash": 14503768930839698451, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/38", "hash": 13020065077657899116, "orig": "First and foremost, the annotators on the platform allow us to gather ground-truth at scale using a crowd-sourcing approach. In each annotation task, we retrieve the original PDF page and its associated parsed components, containing the cells (see Figure 2). We then ask the (human) annotator to assign each cell a layout semantic label. Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$. In the annotator tool, each layout semantic label is visually represented by a colour. By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3. Since humans are very efficient in visual recognition, this task comes very natural to us. The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns.", "properties": {"data": [["language", "en", 0.8899999856948853], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/54"}], "text": "First and foremost, the annotators on the platform allow us to gather ground-truth at scale using a crowd-sourcing approach. In each annotation task, we retrieve the original PDF page and its associated parsed components, containing the cells (see Figure 2). We then ask the (human) annotator to assign each cell a layout semantic label. Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$. In the annotator tool, each layout semantic label is visually represented by a colour. By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3. Since humans are very efficient in visual recognition, this task comes very natural to us. The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns.", "text-hash": 13130850271187616458, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/39", "hash": 10103841011442966464, "orig": "The second purpose of the annotators is to visually inspect the quality of our machine learned models. The goal of the models is to emulate the action of the annotators, i.e. to assign a layout semantic label to each cell. Clearly, the result of a prediction for each page can therefore be displayed as if it were an annotated page. This allows the users to directly inspect the results of the models on unseen pages. A direct consequence of this inspection capability in the annotators is that the annotation task can be transformed easily into a correction task, i.e. the human annotators only need to correct the incorrectly predicted labels. Of course, as the models become better over time, the number of corrections needed to be made become less and less. This allows us to significantly reduce the annotation time per document. Since annotations are typically created by professionals with a high hourly rate, the colouring technique allowed us to significantly reduce the cost of ground-truth gathering.", "properties": {"data": [["language", "en", 0.9399999976158142], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/55"}], "text": "The second purpose of the annotators is to visually inspect the quality of our machine learned models. The goal of the models is to emulate the action of the annotators, i.e. to assign a layout semantic label to each cell. Clearly, the result of a prediction for each page can therefore be displayed as if it were an annotated page. This allows the users to directly inspect the results of the models on unseen pages. A direct consequence of this inspection capability in the annotators is that the annotation task can be transformed easily into a correction task, i.e. the human annotators only need to correct the incorrectly predicted labels. Of course, as the models become better over time, the number of corrections needed to be made become less and less. This allows us to significantly reduce the annotation time per document. Since annotations are typically created by professionals with a high hourly rate, the colouring technique allowed us to significantly reduce the cost of ground-truth gathering.", "text-hash": 11435379797753757998, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/40", "hash": 10982401368140758581, "orig": "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute. The vertical red lines indicate that a training was performed on the annotated pages, and a new, improved model is", "properties": {"data": [["language", "en", 0.9599999785423279], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/56"}], "text": "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute. The vertical red lines indicate that a training was performed on the annotated pages, and a new, improved model is", "text-hash": 10548529097098469537, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/42", "hash": 887751753527930563, "orig": "used from that point to predict the labels. Since the corrections become less and less, the rate of annotation goes up. It is needless to say that this inter-leaving of training models (based on annotated ground-truth) and annotation benefits directly from our platform approach, since each task (submitting page-annotations, training the model, applying the model for predicting the labels) comes down to an asynchronous call to a microservice. The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection.", "properties": {"data": [["language", "en", 0.949999988079071], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/60"}], "text": "used from that point to predict the labels. Since the corrections become less and less, the rate of annotation goes up. It is needless to say that this inter-leaving of training models (based on annotated ground-truth) and annotation benefits directly from our platform approach, since each task (submitting page-annotations, training the model, applying the model for predicting the labels) comes down to an asynchronous call to a microservice. The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection.", "text-hash": 2205427981859754031, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/43", "hash": 4695688617288377564, "orig": "3.4 Machine Learning: Training models & Applying models", "properties": {"data": [["language", "en", 0.800000011920929], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/61"}], "text": "3.4 Machine Learning: Training models & Applying models", "text-hash": 16834670239362291258, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/44", "hash": 3275001812318455279, "orig": "In the CCS, there are essentially two types of machine-learning models. On the one hand, we have default models, which are designed to be layout independent. They take a raster image of the page to identify and locate basic objects, such as tables, figures, formulas, etc. On the other hand, we also support the training of custom, templatespecific models, which are designed to specialize on a particular layout template and allow us to convert and extract the data out of documents with very high precision and recall. They will classify each cell in the page with regard to their layout semantic label.", "properties": {"data": [["language", "en", 0.9399999976158142], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/62"}], "text": "In the CCS, there are essentially two types of machine-learning models. On the one hand, we have default models, which are designed to be layout independent. They take a raster image of the page to identify and locate basic objects, such as tables, figures, formulas, etc. On the other hand, we also support the training of custom, templatespecific models, which are designed to specialize on a particular layout template and allow us to convert and extract the data out of documents with very high precision and recall. They will classify each cell in the page with regard to their layout semantic label.", "text-hash": 4429706140044408651, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/45", "hash": 15354930767839681193, "orig": "3.4.1 Metrics. Before discussing the performance of the models, let us first define the precision and recall metrics used to evaluate the results. The first observation is that the output of a machine learned model is exactly the same of what a human annotator would produce, i.e. it will assign a text cell a semantic label. The correctness of this label is what we aim to measure with the recall and precision metrics. The second observation is that we deal with a", "properties": {"data": [["language", "en", 0.8999999761581421], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/63"}], "text": "3.4.1 Metrics. Before discussing the performance of the models, let us first define the precision and recall metrics used to evaluate the results. The first observation is that the output of a machine learned model is exactly the same of what a human annotator would produce, i.e. it will assign a text cell a semantic label. The correctness of this label is what we aim to measure with the recall and precision metrics. The second observation is that we deal with a", "text-hash": 6184852591532473349, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/47", "hash": 6337233386759158728, "orig": "multi-class classification problem, i.e. we don't have only two labels, but many possible semantic labels, hence the performance result will be the average of the recall and precision for each label.", "properties": {"data": [["language", "en", 0.8999999761581421], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/66"}], "text": "multi-class classification problem, i.e. we don't have only two labels, but many possible semantic labels, hence the performance result will be the average of the recall and precision for each label.", "text-hash": 15490331838172880166, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/48", "hash": 2249972239307071508, "orig": "The recall (= \u211b) and precision (= \ud835\udcab) for a given label on a page is defined by the standard formulas", "properties": {"data": [["language", "en", 0.8199999928474426], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/67"}], "text": "The recall (= \u211b) and precision (= \ud835\udcab) for a given label on a page is defined by the standard formulas", "text-hash": 1131271437908497026, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/49", "hash": 12383805870947794174, "orig": "\u211b = t$_{p}$ t$_{p}$ + f$_{p}$ , \ud835\udcab = t$_{p}$ t$_{p}$ + f$_{n}$ , (1)", "properties": {"data": [["language", "en", 0.27000001072883606], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/68"}], "text": "\u211b = t$_{p}$ t$_{p}$ + f$_{p}$ , \ud835\udcab = t$_{p}$ t$_{p}$ + f$_{n}$ , (1)", "text-hash": 14055366495763095132, "type": "equation"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/50", "hash": 7053654953998543393, "orig": "where t$_{p}$, f$_{p}$ and f$_{n}$ represent respectively true positive, false positive and false negative predicted labels.", "properties": {"data": [["language", "en", 0.5799999833106995], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/69"}], "text": "where t$_{p}$, f$_{p}$ and f$_{n}$ represent respectively true positive, false positive and false negative predicted labels.", "text-hash": 642098605774556301, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/51", "hash": 15921044595687116426, "orig": "3.4.2 Default Models. The aim of the default models is to identify specific, ubiquitous objects in documents. Examples of such objects are tables, figures with their captions, mathematical formulas, etc. Due to the high variability in both the document layout as well as in the representation of these objects, we need very robust object detection methods. Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7]. On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions.", "properties": {"data": [["language", "en", 0.949999988079071], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/70"}], "text": "3.4.2 Default Models. The aim of the default models is to identify specific, ubiquitous objects in documents. Examples of such objects are tables, figures with their captions, mathematical formulas, etc. Due to the high variability in both the document layout as well as in the representation of these objects, we need very robust object detection methods. Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7]. On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions.", "text-hash": 5618307884355612648, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/52", "hash": 12234068400463628788, "orig": "In this paper, we will focus only on the detection of table objects, but the same principles described in the following analysis are also applied for other type of objects.", "properties": {"data": [["language", "en", 0.9700000286102295], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/71"}], "text": "In this paper, we will focus only on the detection of table objects, but the same principles described in the following analysis are also applied for other type of objects.", "text-hash": 13907813772802190178, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/53", "hash": 4628466594790006384, "orig": "The networks available on our platform have been trained on arXiv data$^{11}$. We have annotated 30000 PDF pages and know the", "properties": {"data": [["language", "en", 0.9200000166893005], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/72"}], "text": "The networks available on our platform have been trained on arXiv data$^{11}$. We have annotated 30000 PDF pages and know the", "text-hash": 16911352314006995166, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/55", "hash": 9651706913678711778, "orig": "location of at least one table on each page. From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation. Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms.", "properties": {"data": [["language", "en", 0.9200000166893005], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/76"}], "text": "location of at least one table on each page. From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation. Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms.", "text-hash": 11888191065829014864, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/56", "hash": 1363251178266051349, "orig": "We do not locate the table directly on the image of the original PDF page but rather on an image representation of the parsed PDF page with cell boxes. The reasoning behind this is to reduce the variability between all input PDF pages as much as possible and thus increase the effectiveness of the deep neural networks. An example of such an image can be seen in Figure 5. The red bounding boxes around the tables are a result of the prediction using YOLOv2 and are absent in the image on which the model predicts. Note that the visualisation of the text cells visible in Figure 5 does not include any text of the original document, but only its geometrical definition. This is important when one compares for example Asian documents with Japanese, Chinese or Korean characters versus European languages with the roman alphabet. We do not want the deep neural network to focus on the specific characters, but rather on the layout of the cells in the page.", "properties": {"data": [["language", "en", 0.9100000262260437], ["semantic", "text", 0.9800000190734863]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/77"}], "text": "We do not locate the table directly on the image of the original PDF page but rather on an image representation of the parsed PDF page with cell boxes. The reasoning behind this is to reduce the variability between all input PDF pages as much as possible and thus increase the effectiveness of the deep neural networks. An example of such an image can be seen in Figure 5. The red bounding boxes around the tables are a result of the prediction using YOLOv2 and are absent in the image on which the model predicts. Note that the visualisation of the text cells visible in Figure 5 does not include any text of the original document, but only its geometrical definition. This is important when one compares for example Asian documents with Japanese, Chinese or Korean characters versus European languages with the roman alphabet. We do not want the deep neural network to focus on the specific characters, but rather on the layout of the cells in the page.", "text-hash": 2009046567395259777, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/57", "hash": 18259197018396996238, "orig": "Let us now discuss both deep neural network training microservices on the platform. In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision. In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times. We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied. We believe that this is the main origin for the discrepancy of time-to-solution for the training phase. The same holds true for the prediction. Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", "properties": {"data": [["language", "en", 0.9599999785423279], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/78"}], "text": "Let us now discuss both deep neural network training microservices on the platform. In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision. In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times. We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied. We believe that this is the main origin for the discrepancy of time-to-solution for the training phase. The same holds true for the prediction. Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", "text-hash": 7883278994224882668, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/58", "hash": 14663676516964431047, "orig": "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously. The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1. We use these bounding boxes to associate with each cell a label, which is in this particular case either Table or Not-Table, depending on whether they overlap", "properties": {"data": [["language", "en", 0.949999988079071], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/79"}], "text": "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously. The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1. We use these bounding boxes to associate with each cell a label, which is in this particular case either Table or Not-Table, depending on whether they overlap", "text-hash": 7164504172498806323, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/59", "hash": 4577067829072175096, "orig": "Table 2: Performance results for the template specific model of the Physical Review B journals. The confusion matrix highlights the huge imbalance between the number of text cells with different labels. The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types.", "properties": {"data": [["language", "en", 0.8600000143051147], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/80"}], "text": "Table 2: Performance results for the template specific model of the Physical Review B journals. The confusion matrix highlights the huge imbalance between the number of text cells with different labels. The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types.", "text-hash": 3406859306294395222, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/60", "hash": 2569392033451362672, "orig": "with the predicted bounding box. The corresponding recall and precision are then computed for this dual-class classification problem. In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence. For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0. 5. The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers. We believe this originates from the selective search algorithm which is used to determine regions of interest. The images we feed it are not typical photographic images (made with a camera) but layout visualisations. The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", "properties": {"data": [["language", "en", 0.9200000166893005], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/82"}], "text": "with the predicted bounding box. The corresponding recall and precision are then computed for this dual-class classification problem. In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence. For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0. 5. The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers. We believe this originates from the selective search algorithm which is used to determine regions of interest. The images we feed it are not typical photographic images (made with a camera) but layout visualisations. The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", "text-hash": 5414143675771382750, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/61", "hash": 14539041145469267811, "orig": "3.4.3 Template specific Models. The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template. This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance. Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality.", "properties": {"data": [["language", "en", 0.9200000166893005], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/83"}], "text": "3.4.3 Template specific Models. The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template. This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance. Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality.", "text-hash": 6991735551340401103, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/62", "hash": 8607014065143641201, "orig": "For an algorithm to fit in the interactive platform design we identified a few key requirements. First, it is crucial that the model can generate good results with a limited set of pages. In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation. Second it must be robust against extreme imbalance of the labeled data. It is clear that cells of the label Title will be much more uncommon than cells with the label of Text. Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process.", "properties": {"data": [["language", "en", 0.949999988079071], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/84"}], "text": "For an algorithm to fit in the interactive platform design we identified a few key requirements. First, it is crucial that the model can generate good results with a limited set of pages. In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation. Second it must be robust against extreme imbalance of the labeled data. It is clear that cells of the label Title will be much more uncommon than cells with the label of Text. Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process.", "text-hash": 17832237182951286493, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/63", "hash": 1994904537764312371, "orig": "For these reasons, we chose random forest [2] as a machine learning algorithm for template specific models. Random forest algorithms are known to be trained fast and can produce very accurate results on limited, but relatively structured data. In our case,", "properties": {"data": [["language", "en", 0.949999988079071], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/85"}], "text": "For these reasons, we chose random forest [2] as a machine learning algorithm for template specific models. Random forest algorithms are known to be trained fast and can produce very accurate results on limited, but relatively structured data. In our case,", "text-hash": 1377511684573734815, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/65", "hash": 7742256726079628058, "orig": "this structure originates of course from the template. Furthermore, random forest is an ensemble method, meaning that they learn on the distribution function of the features, and not individual dataelements. As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised.", "properties": {"data": [["language", "en", 0.9200000166893005], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/88"}], "text": "this structure originates of course from the template. Furthermore, random forest is an ensemble method, meaning that they learn on the distribution function of the features, and not individual dataelements. As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised.", "text-hash": 250119056806139256, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/66", "hash": 8810233123818174294, "orig": "The random forest method is applied to each cell of the page based on a feature vector representing all of its properties. For example, the feature vector contains information as the page number, the size of the text cell, its position, as well as the distance from the neighbouring cells. Additionally to pure geometrical information we include the text style (normal, italic, or bold) and some text statistics, as the fraction of numeric characters. We then improve the obtained results by performing subsequent iterations with other random forest methods, which operate on an enlarged feature space including the previously predicted labels of the neighbourhood around the current cell.", "properties": {"data": [["language", "en", 0.9599999785423279], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/89"}], "text": "The random forest method is applied to each cell of the page based on a feature vector representing all of its properties. For example, the feature vector contains information as the page number, the size of the text cell, its position, as well as the distance from the neighbouring cells. Additionally to pure geometrical information we include the text style (normal, italic, or bold) and some text statistics, as the fraction of numeric characters. We then improve the obtained results by performing subsequent iterations with other random forest methods, which operate on an enlarged feature space including the previously predicted labels of the neighbourhood around the current cell.", "text-hash": 17619932035192809924, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/67", "hash": 16446711449286912460, "orig": "It is important to realize that almost all of these features are purely geometrical. This allows us to apply exactly the same machine learning methods on both scanned and programmatic PDF documents.", "properties": {"data": [["language", "en", 0.9399999976158142], ["semantic", "text", 0.9800000190734863]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/90"}], "text": "It is important to realize that almost all of these features are purely geometrical. This allows us to apply exactly the same machine learning methods on both scanned and programmatic PDF documents.", "text-hash": 9704353849744984874, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/68", "hash": 9558434107504657973, "orig": "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$. We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels. Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label. We observe that the recall and precision numbers are excellent, with most of them above 99%. This is not surprising, since we are building models that specialise for a particular template.", "properties": {"data": [["language", "en", 0.9100000262260437], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/91"}], "text": "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$. We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels. Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label. We observe that the recall and precision numbers are excellent, with most of them above 99%. This is not surprising, since we are building models that specialise for a particular template.", "text-hash": 11971893452237256865, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/69", "hash": 18349896906192842040, "orig": "Moreover, the same ML algorithm proves to perform very well on different document templates, as is evident from the numbers shown in Table 3, simply by providing it with different datasets to train on. The latter is the power of our platform: we can re-use the same machine-learning algorithm to generate different models solely based on the data gathered by the annotation on the platform. We do not need to define rules and heuristics or update code in order to deal with new types of documents. We only need to gather more data.", "properties": {"data": [["language", "en", 0.9399999976158142], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/92"}], "text": "Moreover, the same ML algorithm proves to perform very well on different document templates, as is evident from the numbers shown in Table 3, simply by providing it with different datasets to train on. The latter is the power of our platform: we can re-use the same machine-learning algorithm to generate different models solely based on the data gathered by the annotation on the platform. We do not need to define rules and heuristics or update code in order to deal with new types of documents. We only need to gather more data.", "text-hash": 8080940474762743702, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/70", "hash": 10082834006373808153, "orig": "3.5 Assembly", "properties": {"data": [["language", "en", 0.8199999928474426], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/93"}], "text": "3.5 Assembly", "text-hash": 11736313095563614837, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/71", "hash": 15253541252152665681, "orig": "In this component, we build a structured data file in JSON or XML format, which contains all the text and objects (e.g. tables) from the original document, retaining the layout semantics. This structured data file is constructed by assembling all the cells from the parsed file", "properties": {"data": [["language", "en", 0.8899999856948853], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/94"}], "text": "In this component, we build a structured data file in JSON or XML format, which contains all the text and objects (e.g. tables) from the original document, retaining the layout semantics. This structured data file is constructed by assembling all the cells from the parsed file", "text-hash": 6565628665194191037, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/72", "hash": 3904142170608486950, "orig": "Listing 1: Excerpt from the JSON output of the Corpus Conversion Service after conversion of this paper.", "properties": {"data": [["language", "en", 0.7799999713897705], ["semantic", "text", 0.5199999809265137]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/96"}], "text": "Listing 1: Excerpt from the JSON output of the Corpus Conversion Service after conversion of this paper.", "text-hash": 4079383948124449940, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/73", "hash": 6410818076508661508, "orig": "{ 'description ': { 'title ': 'Corpus Conversion Service: A machine learning platform to ingest documents at scale. ',, \u2192 'abstract ': 'Over the past few decades, the amount of scientific articles [...] ',, \u2192 'affiliations ': 'IBM Research Rueschlikon, Switzerland ', 'authors ': 'Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas ', \u2192 }, 'main-text ': [{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...], 'tables ': [{...},...], 'images ': [{...},...] }", "properties": {"data": [["language", "en", 0.3499999940395355], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/97"}], "text": "{ 'description ': { 'title ': 'Corpus Conversion Service: A machine learning platform to ingest documents at scale. ',, \u2192 'abstract ': 'Over the past few decades, the amount of scientific articles [...] ',, \u2192 'affiliations ': 'IBM Research Rueschlikon, Switzerland ', 'authors ': 'Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas ', \u2192 }, 'main-text ': [{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...], 'tables ': [{...},...], 'images ': [{...},...] }", "text-hash": 15129105844666734962, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/74", "hash": 12813875992986832439, "orig": "in combination with their associated predicted (or human-annotated) layout semantic labels. It should be noted that no machine learning is used in this component. It is purely rule based and therefore completely deterministic.", "properties": {"data": [["language", "en", 0.9800000190734863], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/98"}], "text": "in combination with their associated predicted (or human-annotated) layout semantic labels. It should be noted that no machine learning is used in this component. It is purely rule based and therefore completely deterministic.", "text-hash": 13337022012432085155, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/75", "hash": 11030869010407626539, "orig": "The assembly phase is a two step process. First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order. Then, the text of all cells that have the same label is contracted into a temporary document objects. Third, we build the internal structure of the temporary document objects, based on the information provided by the models. The latter is only applicable for internally structured objects, such as tables. An example of the generated JSON output is shown in Listing 1.", "properties": {"data": [["language", "en", 0.949999988079071], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/99"}], "text": "The assembly phase is a two step process. First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order. Then, the text of all cells that have the same label is contracted into a temporary document objects. Third, we build the internal structure of the temporary document objects, based on the information provided by the models. The latter is only applicable for internally structured objects, such as tables. An example of the generated JSON output is shown in Listing 1.", "text-hash": 10508897272021404039, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/76", "hash": 2142320548375900929, "orig": "4 ARCHITECTURE AND ORCHESTRATION OF CLOUD BASED MICROSERVICES", "properties": {"data": [["language", "en", 0.33000001311302185], ["semantic", "header", 0.8700000047683716]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/100"}], "text": "4 ARCHITECTURE AND ORCHESTRATION OF CLOUD BASED MICROSERVICES", "text-hash": 950718827856471405, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/77", "hash": 12747011194397783283, "orig": "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated. Before discussing the technical details, we would like to point out our requirements for the architecture of the platform. These requirements are all related to scaling. Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources. In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation. It is clear that the architecture of such a service is heavily influenced by these requirements.", "properties": {"data": [["language", "en", 0.9599999785423279], ["semantic", "text", 0.9700000286102295]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/101"}], "text": "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated. Before discussing the technical details, we would like to point out our requirements for the architecture of the platform. These requirements are all related to scaling. Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources. In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation. It is clear that the architecture of such a service is heavily influenced by these requirements.", "text-hash": 13395059553653450335, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/78", "hash": 174789262945188010, "orig": "4.1 Platform layers", "properties": {"data": [["language", "en", 0.6200000047683716], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/102"}], "text": "4.1 Platform layers", "text-hash": 3197077882590976520, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/79", "hash": 7228893318503650455, "orig": "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents. In Figure 6, we show a sketch of its", "properties": {"data": [["language", "en", 0.9399999976158142], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/103"}], "text": "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents. In Figure 6, we show a sketch of its", "text-hash": 475277818666452483, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/81", "hash": 9230667184712205690, "orig": "architecture. As one can observe, we have grouped the service into four layers. These layers are:", "properties": {"data": [["language", "en", 0.9599999785423279], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/106"}], "text": "architecture. As one can observe, we have grouped the service into four layers. These layers are:", "text-hash": 12309253064221915096, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/82", "hash": 17419815751432442882, "orig": "(1) An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering. The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", "properties": {"data": [["language", "en", 0.8600000143051147], ["semantic", "text", 0.9200000166893005]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/107"}], "text": "(1) An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering. The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", "text-hash": 8731693174932948592, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/83", "hash": 11194226403360998426, "orig": "(2) An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result. The task scheduling is done with the Message Broker RabbitMQ$^{14}$. The results are stored in the in-memory data store Redis$^{15}$. In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully. This approach allows for a very robust, fault-tolerant service with very little downtime.", "properties": {"data": [["language", "en", 0.8899999856948853], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/108"}], "text": "(2) An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result. The task scheduling is done with the Message Broker RabbitMQ$^{14}$. The results are stored in the in-memory data store Redis$^{15}$. In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully. This approach allows for a very robust, fault-tolerant service with very little downtime.", "text-hash": 10633901501381588600, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/84", "hash": 9005324696118733701, "orig": "(3) A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc). In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$. This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker. The workers are not only consumers of tasks, but may also produce new ones. This is the case for the requests", "properties": {"data": [["language", "en", 0.8799999952316284], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/109"}], "text": "(3) A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc). In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$. This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker. The workers are not only consumers of tasks, but may also produce new ones. This is the case for the requests", "text-hash": 17146307233289309425, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/86", "hash": 8082547756621048511, "orig": "operating on the whole corpus. Whenever possible we parallelise the compute-heavy operations at the page (or document) level.", "properties": {"data": [["language", "en", 0.800000011920929], ["semantic", "text", 0.9200000166893005]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/116"}], "text": "operating on the whole corpus. Whenever possible we parallelise the compute-heavy operations at the page (or document) level.", "text-hash": 18059523399368641563, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/87", "hash": 7791113385466815951, "orig": "(4) A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store. The object-store allows us to easily scale the storage with regard to the number of processed documents. However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", "properties": {"data": [["language", "en", 0.9200000166893005], ["semantic", "text", 0.949999988079071]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/117"}], "text": "(4) A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store. The object-store allows us to easily scale the storage with regard to the number of processed documents. However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", "text-hash": 18360382746077681451, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/88", "hash": 2845012065511066307, "orig": "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it. This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ. Being a cloud-based platform, our solution allows for these software assets to be detached from the main deployment and to be served by specialised vendors services which are certified to the latest industry requirements such as data-at-rest encryption, high availability, etc.", "properties": {"data": [["language", "en", 0.9599999785423279], ["semantic", "text", 0.9700000286102295]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/118"}], "text": "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it. This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ. Being a cloud-based platform, our solution allows for these software assets to be detached from the main deployment and to be served by specialised vendors services which are certified to the latest industry requirements such as data-at-rest encryption, high availability, etc.", "text-hash": 5147922161190726703, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/89", "hash": 15072914837937068796, "orig": "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform. From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services. During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks. For example other result-backends didn't offer the auto-cleaning functionality offered by Redis and, before opting for a custom solution mixing MongoDB with an object storage, we evaluated other solutions as", "properties": {"data": [["language", "en", 0.949999988079071], ["semantic", "text", 0.9599999785423279]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/119"}], "text": "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform. From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services. During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks. For example other result-backends didn't offer the auto-cleaning functionality offered by Redis and, before opting for a custom solution mixing MongoDB with an object storage, we evaluated other solutions as", "text-hash": 6457975667604208730, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/91", "hash": 15263283599394646155, "orig": "the GridFS storage, but it didn't fit to the constraints of typical cloud environments.", "properties": {"data": [["language", "en", 0.9800000190734863], ["semantic", "text", 0.9700000286102295]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/123"}], "text": "the GridFS storage, but it didn't fit to the constraints of typical cloud environments.", "text-hash": 6564180200469858791, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/92", "hash": 11417717357379295278, "orig": "4.2 Deployment", "properties": {"data": [["language", "en", 0.8399999737739563], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/124"}], "text": "4.2 Deployment", "text-hash": 10410411375713696396, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/93", "hash": 9031137420247852045, "orig": "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution. Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", "properties": {"data": [["language", "en", 0.8500000238418579], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/125"}], "text": "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution. Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", "text-hash": 17120327512656828009, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/94", "hash": 18436578077535696718, "orig": "The common parts of all deployments are the interface and the compute layer. The compute layer is designed for dynamically adapt the number of resources on the current load. For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents.", "properties": {"data": [["language", "en", 0.9399999976158142], ["semantic", "text", 0.9399999976158142]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/126"}], "text": "The common parts of all deployments are the interface and the compute layer. The compute layer is designed for dynamically adapt the number of resources on the current load. For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents.", "text-hash": 8003240278028347820, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/95", "hash": 11734907767490759865, "orig": "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements. The parse component is indeed more demanding than the simple annotation components.", "properties": {"data": [["language", "en", 0.9100000262260437], ["semantic", "text", 0.9800000190734863]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/127"}], "text": "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements. The parse component is indeed more demanding than the simple annotation components.", "text-hash": 14704352826439757333, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/96", "hash": 7845460979782401889, "orig": "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks. Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment. Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer.", "properties": {"data": [["language", "en", 0.9399999976158142], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/128"}], "text": "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks. Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment. Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer.", "text-hash": 18296438351865061837, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/97", "hash": 17769988780693768120, "orig": "4.3 Scaling benchmarks", "properties": {"data": [["language", "en", 0.38999998569488525], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/129"}], "text": "4.3 Scaling benchmarks", "text-hash": 8669715371308316950, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/98", "hash": 12387489643011067991, "orig": "Let us now discuss some scaling results on our platform. As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources. In Figure 7, we show the number of users and the number of processed PDF", "properties": {"data": [["language", "en", 0.9300000071525574], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/130"}], "text": "Let us now discuss some scaling results on our platform. As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources. In Figure 7, we show the number of users and the number of processed PDF", "text-hash": 14043220598855238339, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/99", "hash": 10375772475809458895, "orig": "pages 20 as a function of time. As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017. It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time. Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", "properties": {"data": [["language", "en", 0.9900000095367432], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/133"}], "text": "pages 20 as a function of time. As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017. It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time. Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", "text-hash": 11451664978555915307, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/100", "hash": 7054726458191881751, "orig": "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources. We show this scaling by displaying the speedup versus the number of worker nodes available. Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores. As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes. Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker. The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level. The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes. Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size.", "properties": {"data": [["language", "en", 0.9399999976158142], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/134"}], "text": "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources. We show this scaling by displaying the speedup versus the number of worker nodes available. Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores. As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes. Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker. The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level. The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes. Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size.", "text-hash": 641132783909312643, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/101", "hash": 7794115281016062068, "orig": "5 CONCLUSION", "properties": {"data": [["language", "en", 0.38999998569488525], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/135"}], "text": "5 CONCLUSION", "text-hash": 18347902420476900066, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/102", "hash": 7038163015905900647, "orig": "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation.", "properties": {"data": [["language", "en", 0.9200000166893005], ["semantic", "text", 0.9800000190734863]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/136"}], "text": "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation.", "text-hash": 657005981473069779, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/103", "hash": 1508626318915838319, "orig": "The fundamental design choices in our solution have proven to enable scaling in three elementary ways. First, it can service multiple users concurrently. Second, it can ingest, parse and apply machine learned models on many documents at the same time. Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources.", "properties": {"data": [["language", "en", 0.949999988079071], ["semantic", "text", 0.9399999976158142]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/137"}], "text": "The fundamental design choices in our solution have proven to enable scaling in three elementary ways. First, it can service multiple users concurrently. Second, it can ingest, parse and apply machine learned models on many documents at the same time. Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources.", "text-hash": 1575427749670982603, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/104", "hash": 17247086344435786796, "orig": "In the future, we plan to extend the platform in two major areas. First, we would like to extend the number of microservices, especially with regard to image understanding. The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc). The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier. Second, we would like to improve the quality and performance of our default models. We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5). To leverage this growing use of deep learning models, we will additionally introduce", "properties": {"data": [["language", "en", 0.9300000071525574], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/138"}], "text": "In the future, we plan to extend the platform in two major areas. First, we would like to extend the number of microservices, especially with regard to image understanding. The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc). The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier. Second, we would like to improve the quality and performance of our default models. We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5). To leverage this growing use of deep learning models, we will additionally introduce", "text-hash": 9192771730962863754, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/105", "hash": 10287541089279789496, "orig": "specialised data-parallelism in order to speed up the training and provide interactive user-customisation capabilities.", "properties": {"data": [["language", "en", 0.8299999833106995], ["semantic", "text", 0.9399999976158142]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/140"}], "text": "specialised data-parallelism in order to speed up the training and provide interactive user-customisation capabilities.", "text-hash": 11530911151361059606, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/106", "hash": 7819882792760965882, "orig": "ACKNOWLEDGMENTS", "properties": {"data": [["language", "en", 0.25], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/141"}], "text": "ACKNOWLEDGMENTS", "text-hash": 18322720810464861272, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/107", "hash": 15983582675278266440, "orig": "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system.", "properties": {"data": [["language", "en", 0.949999988079071], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/142"}], "text": "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system.", "text-hash": 5556222901900980902, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/108", "hash": 12711351442546714716, "orig": "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation. MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", "properties": {"data": [["language", "en", 0.9300000071525574], ["semantic", "text", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/143"}], "text": "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation. MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", "text-hash": 13431247303555599034, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/109", "hash": 1225384713519841338, "orig": "REFERENCES", "properties": {"data": [["language", "en", 0.33000001311302185], ["semantic", "header", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/144"}], "text": "REFERENCES", "text-hash": 1858797456585454232, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/110", "hash": 1712774266196702392, "orig": "[1] A. Antonacopoulos, C. Clausner, C. Papadopoulos, and S. Pletschacher. 2015. ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015. In Proceedings of the 13th International Conference on Document Analysis and Recognition (ICDAR2015). Nancy, 1151-1155.", "properties": {"data": [["language", "en", 0.6499999761581421], ["semantic", "reference", 0.9599999785423279]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/145"}], "text": "[1] A. Antonacopoulos, C. Clausner, C. Papadopoulos, and S. Pletschacher. 2015. ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015. In Proceedings of the 13th International Conference on Document Analysis and Recognition (ICDAR2015). Nancy, 1151-1155.", "text-hash": 1659105420801451542, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/111", "hash": 14718288547983000340, "orig": "[2] Leo Breiman. 2001. Random Forests. Machine Learning 45, 1 (01 Oct 2001), 5-32. https://doi.org/10.1023/A:1010933404324", "properties": {"data": [["language", "en", 0.5799999833106995], ["semantic", "text", 0.6100000143051147]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/146"}], "text": "[2] Leo Breiman. 2001. Random Forests. Machine Learning 45, 1 (01 Oct 2001), 5-32. https://doi.org/10.1023/A:1010933404324", "text-hash": 6812664208788567426, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/112", "hash": 16943780574244090186, "orig": "[3] R. Cattoni, T. Coianiz, S. Messelodi, and C. M. Modena. 1998. Geometric layout analysis techniques for document image understanding: a review. Technical Report.", "properties": {"data": [["language", "en", 0.6700000166893005], ["semantic", "reference", 0.7799999713897705]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/147"}], "text": "[3] R. Cattoni, T. Coianiz, S. Messelodi, and C. M. Modena. 1998. Geometric layout analysis techniques for document image understanding: a review. Technical Report.", "text-hash": 9486476535199015848, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/113", "hash": 8004985786049140169, "orig": "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier. 2005. From Legacy Documents to XML: A Conversion Framework. Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103. https://doi.org/10.1007/11551362_9", "properties": {"data": [["language", "en", 0.3400000035762787], ["semantic", "text", 0.49000000953674316]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/148"}], "text": "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier. 2005. From Legacy Documents to XML: A Conversion Framework. Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103. https://doi.org/10.1007/11551362_9", "text-hash": 18434854666592634661, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/114", "hash": 12744546813104546377, "orig": "[5] Ross Girshick. 2015. Fast R-CNN. In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15). IEEE Computer Society, Washington, DC, USA, 1440-1448. https://doi.org/10.1109/ICCV.2015.169", "properties": {"data": [["language", "en", 0.47999998927116394], ["semantic", "text", 0.6100000143051147]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/149"}], "text": "[5] Ross Girshick. 2015. Fast R-CNN. In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15). IEEE Computer Society, Washington, DC, USA, 1440-1448. https://doi.org/10.1109/ICCV.2015.169", "text-hash": 13406949228208477349, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/115", "hash": 16061746189176848219, "orig": "[6] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. 2013. Rich feature hierarchies for accurate object detection and semantic segmentation. CoRR abs/1311.2524 (2013). arXiv:1311.2524 http://arxiv.org/abs/1311.2524", "properties": {"data": [["language", "en", 0.6299999952316284], ["semantic", "reference", 0.5799999833106995]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/150"}], "text": "[6] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. 2013. Rich feature hierarchies for accurate object detection and semantic segmentation. CoRR abs/1311.2524 (2013). arXiv:1311.2524 http://arxiv.org/abs/1311.2524", "text-hash": 5756829059313082807, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/116", "hash": 11872392946390819176, "orig": "[7] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C. Berg. 2016. SSD: Single Shot MultiBox Detector. Springer International Publishing, Cham, 21-37. https://doi.org/10. 1007/978-3-319-46448-0_2", "properties": {"data": [["language", "en", 0.38999998569488525], ["semantic", "reference", 0.6000000238418579]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/151"}], "text": "[7] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C. Berg. 2016. SSD: Single Shot MultiBox Detector. Springer International Publishing, Cham, 21-37. https://doi.org/10. 1007/978-3-319-46448-0_2", "text-hash": 14270091870781297606, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/117", "hash": 2956849475535726296, "orig": "[8] Joseph Redmon, Santosh Kumar Divvala, Ross B. Girshick, and Ali Farhadi. 2016. You Only Look Once: Unified, Real-Time Object Detection. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016), 779-788.", "properties": {"data": [["language", "en", 0.6299999952316284], ["semantic", "reference", 0.7900000214576721]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/152"}], "text": "[8] Joseph Redmon, Santosh Kumar Divvala, Ross B. Girshick, and Ali Farhadi. 2016. You Only Look Once: Unified, Real-Time Object Detection. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016), 779-788.", "text-hash": 4738468948628789302, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/118", "hash": 6623297047995432604, "orig": "[9] Joseph Redmon and Ali Farhadi. 2016. YOLO9000: Better, Faster, Stronger. arXiv preprint arXiv:1612.08242 (2016).", "properties": {"data": [["language", "en", 0.4399999976158142], ["semantic", "reference", 0.6899999976158142]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/153"}], "text": "[9] Joseph Redmon and Ali Farhadi. 2016. YOLO9000: Better, Faster, Stronger. arXiv preprint arXiv:1612.08242 (2016).", "text-hash": 15195146357792776186, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/119", "hash": 2507285765516108280, "orig": "[10] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In Advances in Neural Information Processing Systems 28, C. Cortes, N. D. Lawrence, D. D. Lee, M. Sugiyama, and R. Garnett (Eds.). Curran Associates, Inc., 91-99. http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks. pdf", "properties": {"data": [["language", "en", 0.5899999737739563], ["semantic", "reference", 0.6800000071525574]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/154"}], "text": "[10] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In Advances in Neural Information Processing Systems 28, C. Cortes, N. D. Lawrence, D. D. Lee, M. Sugiyama, and R. Garnett (Eds.). Curran Associates, Inc., 91-99. http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks. pdf", "text-hash": 5476658171803931478, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/120", "hash": 14905276480471286920, "orig": "[11] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. 2018. Corpus Conversion Service poster at the SysML conference. http://www.sysml.cc/doc/ 76.pdf", "properties": {"data": [["language", "en", 0.47999998927116394], ["semantic", "reference", 0.8899999856948853]], "headers": ["type", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/155"}], "text": "[11] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. 2018. Corpus Conversion Service poster at the SysML conference. http://www.sysml.cc/doc/ 76.pdf", "text-hash": 6922174983558886886, "type": "paragraph"}]} +{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "body": [{"$ref": "#/texts/0"}, {"$ref": "#/texts/1"}, {"$ref": "#/texts/2"}, {"$ref": "#/texts/3"}, {"$ref": "#/texts/4"}, {"$ref": "#/texts/5"}, {"$ref": "#/texts/6"}, {"$ref": "#/texts/7"}, {"$ref": "#/texts/8"}, {"$ref": "#/texts/9"}, {"$ref": "#/texts/10"}, {"$ref": "#/texts/11"}, {"$ref": "#/texts/12"}, {"$ref": "#/texts/13"}, {"$ref": "#/figures/0"}, {"$ref": "#/texts/14"}, {"$ref": "#/texts/15"}, {"$ref": "#/texts/16"}, {"$ref": "#/texts/17"}, {"$ref": "#/texts/18"}, {"$ref": "#/texts/19"}, {"$ref": "#/texts/20"}, {"$ref": "#/texts/21"}, {"$ref": "#/texts/22"}, {"$ref": "#/figures/1"}, {"$ref": "#/figures/2"}, {"$ref": "#/texts/23"}, {"$ref": "#/texts/24"}, {"$ref": "#/texts/25"}, {"$ref": "#/texts/26"}, {"$ref": "#/texts/27"}, {"$ref": "#/texts/28"}, {"$ref": "#/texts/29"}, {"$ref": "#/texts/30"}, {"$ref": "#/texts/31"}, {"$ref": "#/texts/32"}, {"$ref": "#/texts/33"}, {"$ref": "#/texts/34"}, {"$ref": "#/texts/35"}, {"$ref": "#/texts/36"}, {"$ref": "#/texts/37"}, {"$ref": "#/figures/3"}, {"$ref": "#/texts/38"}, {"$ref": "#/texts/39"}, {"$ref": "#/texts/40"}, {"$ref": "#/texts/41"}, {"$ref": "#/figures/4"}, {"$ref": "#/texts/42"}, {"$ref": "#/texts/43"}, {"$ref": "#/texts/44"}, {"$ref": "#/texts/45"}, {"$ref": "#/texts/46"}, {"$ref": "#/texts/47"}, {"$ref": "#/texts/48"}, {"$ref": "#/tables/0/captions/0"}, {"$ref": "#/tables/0"}, {"$ref": "#/texts/49"}, {"$ref": "#/texts/50"}, {"$ref": "#/texts/51"}, {"$ref": "#/texts/52"}, {"$ref": "#/texts/53"}, {"$ref": "#/tables/1"}, {"$ref": "#/texts/54"}, {"$ref": "#/texts/55"}, {"$ref": "#/texts/56"}, {"$ref": "#/texts/57"}, {"$ref": "#/tables/1/captions/0"}, {"$ref": "#/tables/2"}, {"$ref": "#/texts/58"}, {"$ref": "#/texts/59"}, {"$ref": "#/texts/60"}, {"$ref": "#/texts/61"}, {"$ref": "#/texts/62"}, {"$ref": "#/texts/63"}, {"$ref": "#/texts/64"}, {"$ref": "#/texts/65"}, {"$ref": "#/texts/66"}, {"$ref": "#/texts/67"}, {"$ref": "#/texts/68"}, {"$ref": "#/texts/69"}, {"$ref": "#/texts/70"}, {"$ref": "#/texts/71"}, {"$ref": "#/texts/72"}, {"$ref": "#/figures/5"}, {"$ref": "#/figures/5/captions/0"}, {"$ref": "#/texts/73"}, {"$ref": "#/texts/74"}, {"$ref": "#/texts/75"}, {"$ref": "#/texts/76"}, {"$ref": "#/figures/6"}, {"$ref": "#/texts/77"}, {"$ref": "#/texts/78"}, {"$ref": "#/texts/79"}, {"$ref": "#/texts/80"}, {"$ref": "#/figures/7"}, {"$ref": "#/texts/81"}, {"$ref": "#/texts/82"}, {"$ref": "#/texts/83"}, {"$ref": "#/texts/84"}, {"$ref": "#/texts/85"}, {"$ref": "#/texts/86"}, {"$ref": "#/texts/87"}, {"$ref": "#/texts/88"}, {"$ref": "#/texts/89"}, {"$ref": "#/texts/90"}, {"$ref": "#/texts/91"}, {"$ref": "#/texts/92"}, {"$ref": "#/texts/93"}, {"$ref": "#/texts/94"}, {"$ref": "#/texts/95"}, {"$ref": "#/texts/96"}, {"$ref": "#/texts/97"}, {"$ref": "#/texts/98"}, {"$ref": "#/texts/99"}, {"$ref": "#/texts/100"}, {"$ref": "#/texts/101"}, {"$ref": "#/texts/102"}, {"$ref": "#/texts/103"}, {"$ref": "#/texts/104"}, {"$ref": "#/texts/105"}, {"$ref": "#/texts/106"}, {"$ref": "#/texts/107"}, {"$ref": "#/texts/108"}, {"$ref": "#/texts/109"}, {"$ref": "#/texts/110"}], "description": {"languages": ["en"], "logs": [{"agent": "CCS", "comment": "CCS v0.0.0-dev parsing of documents", "date": "2023-05-06T03:50:43.616725+00:00", "type": "parsing"}], "title": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale."}, "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#", "figures": [{"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/14", "hash": 16535999405521191333, "orig": "Figure 1: A diagram of the conversion pipeline in the Corpus Conversion Service platform. It consists of 5 components: (1) Parsing of the document and its contained bitmap images, (2) Annotating the text of the parsed documents with layout semantic labels, (3) Training models based on the ground-truth acquired by the annotations, (4) Applying machine learned models on the parsed documents to determine the layout semantic label of each cell and finally (5) Assembling the document into a structured data format (e. g. JSON). The main conversion pipeline is depicted in blue and allows you to process and convert documents at scale into a structured data format. The green and orange sections can be used optionally, in order to process scanned documents (green) or train new models based on human annotation (orange).", "prov": [{"$ref": "#/page-elements/21"}], "text": "Figure 1: A diagram of the conversion pipeline in the Corpus Conversion Service platform. It consists of 5 components: (1) Parsing of the document and its contained bitmap images, (2) Annotating the text of the parsed documents with layout semantic labels, (3) Training models based on the ground-truth acquired by the annotations, (4) Applying machine learned models on the parsed documents to determine the layout semantic label of each cell and finally (5) Assembling the document into a structured data format (e. g. JSON). The main conversion pipeline is depicted in blue and allows you to process and convert documents at scale into a structured data format. The green and orange sections can be used optionally, in order to process scanned documents (green) or train new models based on human annotation (orange).", "text-hash": 9615465947839001361, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/0", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/20"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/29", "hash": 9115121388992506886, "orig": "Figure 3: The labelled cells annotated on the title page of a poster abstract about the CCS [11]. Here, the title, authors, affiliation, subtitle, main-text, caption and picture labels are represented respectively as red, green, purple, dark-red, yellow, orange and ivory.", "prov": [{"$ref": "#/page-elements/43"}], "text": "Figure 3: The labelled cells annotated on the title page of a poster abstract about the CCS [11]. Here, the title, authors, affiliation, subtitle, main-text, caption and picture labels are represented respectively as red, green, purple, dark-red, yellow, orange and ivory.", "text-hash": 17324714532994059892, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/1", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/36"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/24", "hash": 14775249782836392461, "orig": "Figure 2: The cells obtained for the title page of a poster abstract about the CCS [11] after the parsing stage. During the parsing, we extract all bounding boxes of the text (or cells) in such a way that they all have: (1) a maximum width, (2) are only single line and (3) split into multiple cells in case of listidentifiers, multi-columns or crossing vertical lines (such as in tables).", "prov": [{"$ref": "#/page-elements/38"}], "text": "Figure 2: The cells obtained for the title page of a poster abstract about the CCS [11] after the parsing stage. During the parsing, we extract all bounding boxes of the text (or cells) in such a way that they all have: (1) a maximum width, (2) are only single line and (3) split into multiple cells in case of listidentifiers, multi-columns or crossing vertical lines (such as in tables).", "text-hash": 6754994759646241897, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/2", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/37"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/41", "hash": 7479698582664857938, "orig": "Figure 4: The annotation rate of pages for two different collections (Physical Review B and Elsevier papers) as a function of the number of annotated pages. As one can observe, the mean annotation rate is increasing after each training (depicted by a vertical dashed red line). After the first training, the human annotator is presented a pre-annotated page, using the predictions from the latest model. As the predictions become better with increasing size of the ground-truth, less corrections need to be made and hence more pages can be annotated in similar time intervals.", "prov": [{"$ref": "#/page-elements/59"}], "text": "Figure 4: The annotation rate of pages for two different collections (Physical Review B and Elsevier papers) as a function of the number of annotated pages. As one can observe, the mean annotation rate is increasing after each training (depicted by a vertical dashed red line). After the first training, the human annotator is presented a pre-annotated page, using the predictions from the latest model. As the predictions become better with increasing size of the ground-truth, less corrections need to be made and hence more pages can be annotated in similar time intervals.", "text-hash": 504280783932681152, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/3", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/58"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/46", "hash": 17801697261174341699, "orig": "Figure 5: A typical image of a parsed PDF page that is fed to the default models. In red, we show the detection of the tables combined with the confidence of the model. The results displayed here originate from the YOLOv2 model.", "prov": [{"$ref": "#/page-elements/65"}], "text": "Figure 5: A typical image of a parsed PDF page that is fed to the default models. In red, we show the detection of the tables combined with the confidence of the model. The results displayed here originate from the YOLOv2 model.", "text-hash": 8628591081653072559, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/4", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/64"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/80", "hash": 3206590615695639432, "orig": "Figure 6: Diagram of the architecture of our platform. The architecture is composed from 4 layers: an interface layer with REST-API and frontend, an orchestration layer with a message broker and results backend, a compute layer consisting out of a variable number of asynchronous workers and finally a storage layer providing a NoSQL database and an object store. The NoSQL database stores the queryable meta-data of each file that is stored in the object store.", "prov": [{"$ref": "#/page-elements/105"}], "text": "Figure 6: Diagram of the architecture of our platform. The architecture is composed from 4 layers: an interface layer with REST-API and frontend, an orchestration layer with a message broker and results backend, a compute layer consisting out of a variable number of asynchronous workers and finally a storage layer providing a NoSQL database and an object store. The NoSQL database stores the queryable meta-data of each file that is stored in the object store.", "text-hash": 4488590919374042342, "type": "paragraph"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/5", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/104"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/85", "hash": 6667504298804810757, "orig": "Figure 7: Evolution of number of users and number of PDF pages on the platform. The jumps in the number of pages originates from big ingestions of documents performed by some users. This proves that the CCS platform is also able to accomodate these short burst of extreme activity.", "prov": [{"$ref": "#/page-elements/115"}], "text": "Figure 7: Evolution of number of users and number of PDF pages on the platform. The jumps in the number of pages originates from big ingestions of documents performed by some users. This proves that the CCS platform is also able to accomodate these short burst of extreme activity.", "text-hash": 14863303056159196785, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/6", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/114"}], "type": "figure"}, {"captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/90", "hash": 16175086861512378818, "orig": "Figure 8: Speedup in the pipeline components as a function of the number of worker nodes (each with four cores, running four local worker processes).", "prov": [{"$ref": "#/page-elements/122"}], "text": "Figure 8: Speedup in the pipeline components as a function of the number of worker nodes (each with four cores, running four local worker processes).", "text-hash": 9976536719025941296, "type": "caption"}], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/7", "footnotes": [], "hash": 18446744073709551615, "mentions": [], "prov": [{"$ref": "#/page-elements/121"}], "type": "figure"}], "footnotes": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/0", "hash": 13109829297289816265, "orig": "Permission to make digital or hard copies of all or part of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for components of this work owned by others than the author(s) must be honored. Abstracting with credit is permitted. To copy otherwise, or republish, to post on servers or to redistribute to lists, requires prior specific permission and/or a fee. Request permissions from permissions@acm.org.", "prov": [{"$ref": "#/page-elements/11"}], "text": "Permission to make digital or hard copies of all or part of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for components of this work owned by others than the author(s) must be honored. Abstracting with credit is permitted. To copy otherwise, or republish, to post on servers or to redistribute to lists, requires prior specific permission and/or a fee. Request permissions from permissions@acm.org.", "text-hash": 13032800243621120549, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/1", "hash": 6056950725387475159, "orig": "KDD \u201918, August 19-23, 2018, London, United Kingdom", "prov": [{"$ref": "#/page-elements/12"}], "text": "KDD \u201918, August 19-23, 2018, London, United Kingdom", "text-hash": 15473297532078357059, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/2", "hash": 82667377498161992, "orig": "\u00a9 2018 Copyright held by the owner/author(s). Publication rights licensed to ACM. ACM ISBN 978-1-4503-5552-0/18/08...$15.00", "prov": [{"$ref": "#/page-elements/13"}], "text": "\u00a9 2018 Copyright held by the owner/author(s). Publication rights licensed to ACM. ACM ISBN 978-1-4503-5552-0/18/08...$15.00", "text-hash": 3001373187661149606, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/3", "hash": 4157740687705909538, "orig": "https://doi.org/10.1145/3219819.3219834", "prov": [{"$ref": "#/page-elements/14"}], "text": "https://doi.org/10.1145/3219819.3219834", "text-hash": 3547103316902677392, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/4", "hash": 11592315251976452419, "orig": "$^{1}$This number originates from a keynote talk by Phil Ydens, Adobe\u2019s VP Engineering for Document Cloud. A video of the presentation can be found here: https://youtu.be/ 5Axw6OGPYHw", "prov": [{"$ref": "#/page-elements/18"}], "text": "$^{1}$This number originates from a keynote talk by Phil Ydens, Adobe\u2019s VP Engineering for Document Cloud. A video of the presentation can be found here: https://youtu.be/ 5Axw6OGPYHw", "text-hash": 14549584251446631343, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/5", "hash": 14606262418347792388, "orig": "$^{2}$This is clearly the case on the popular arXiv scientific online repository: https://arxiv. org/help/stats/2012_by_area/index", "prov": [{"$ref": "#/page-elements/19"}], "text": "$^{2}$This is clearly the case on the popular arXiv scientific online repository: https://arxiv. org/help/stats/2012_by_area/index", "text-hash": 7221931865252575858, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/6", "hash": 7599391434737032939, "orig": "$^{3}$https://www.xpdfreader.com", "prov": [{"$ref": "#/page-elements/26"}], "text": "$^{3}$https://www.xpdfreader.com", "text-hash": 104933780092600391, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/7", "hash": 9645151231942484724, "orig": "$^{4}$http://tabula.technology/", "prov": [{"$ref": "#/page-elements/27"}], "text": "$^{4}$http://tabula.technology/", "text-hash": 11894228156061308002, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/8", "hash": 4601317523235901886, "orig": "$^{5}$https://www.abbyy.com/", "prov": [{"$ref": "#/page-elements/28"}], "text": "$^{5}$https://www.abbyy.com/", "text-hash": 3391629868238619420, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/9", "hash": 1678429643964197526, "orig": "$^{6}$https://www.nuance.com/", "prov": [{"$ref": "#/page-elements/29"}], "text": "$^{6}$https://www.nuance.com/", "text-hash": 1693441792396921860, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/10", "hash": 9599864648545137978, "orig": "$^{7}$https://www.ibm.com/us-en/marketplace/data-capture-and-imaging", "prov": [{"$ref": "#/page-elements/30"}], "text": "$^{7}$https://www.ibm.com/us-en/marketplace/data-capture-and-imaging", "text-hash": 11939931591922575256, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/11", "hash": 11599600757439696813, "orig": "$^{8}$a line of text might be printed character-by-character, word-by-word or the entire text snippet.", "prov": [{"$ref": "#/page-elements/49"}], "text": "$^{8}$a line of text might be printed character-by-character, word-by-word or the entire text snippet.", "text-hash": 14551310605717713161, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/12", "hash": 8672351490975826115, "orig": "$^{9}$http://qpdf.sourceforge.net/", "prov": [{"$ref": "#/page-elements/50"}], "text": "$^{9}$http://qpdf.sourceforge.net/", "text-hash": 17478669388996915759, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/13", "hash": 13163501967272675186, "orig": "$^{10}$It is important to notice that there is no restriction on the number of labels nor the semantic meaning of these labels. The only limitation one has is that the set of semantic labels needs to be consistent across the dataset, but this is evidently true for any type of ML algorithm.", "prov": [{"$ref": "#/page-elements/57"}], "text": "$^{10}$It is important to notice that there is no restriction on the number of labels nor the semantic meaning of these labels. The only limitation one has is that the set of semantic labels needs to be consistent across the dataset, but this is evidently true for any type of ML algorithm.", "text-hash": 13266614683838167520, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/14", "hash": 16307739621375260129, "orig": "$^{11}$All the data is coming from the bulk data download https://arxiv.org/help/bulk_data_s3", "prov": [{"$ref": "#/page-elements/73"}], "text": "$^{11}$All the data is coming from the bulk data download https://arxiv.org/help/bulk_data_s3", "text-hash": 10131428201408538445, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/15", "hash": 16584453941359713372, "orig": "$^{12}$https://journals.aps.org/prb", "prov": [{"$ref": "#/page-elements/95"}], "text": "$^{12}$https://journals.aps.org/prb", "text-hash": 9846388834475228858, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/16", "hash": 7152618592130781617, "orig": "$^{13}$https://www.openapis.org/", "prov": [{"$ref": "#/page-elements/110"}], "text": "$^{13}$https://www.openapis.org/", "text-hash": 831347610428179229, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/17", "hash": 6593099618554401757, "orig": "$^{14}$https://www.rabbitmq.com/", "prov": [{"$ref": "#/page-elements/111"}], "text": "$^{14}$https://www.rabbitmq.com/", "text-hash": 15235037228412732729, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/18", "hash": 7200807455610600839, "orig": "$^{15}$https://www.redis.io/", "prov": [{"$ref": "#/page-elements/112"}], "text": "$^{15}$https://www.redis.io/", "text-hash": 782710111840296691, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/19", "hash": 1602196689966359724, "orig": "$^{16}$http://www.celeryproject.org/", "prov": [{"$ref": "#/page-elements/113"}], "text": "$^{16}$http://www.celeryproject.org/", "text-hash": 1778492971410642442, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/20", "hash": 4503261997707320357, "orig": "$^{17}$https://www.mongodb.com/", "prov": [{"$ref": "#/page-elements/120"}], "text": "$^{17}$https://www.mongodb.com/", "text-hash": 3489272016069066385, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/21", "hash": 2838531283607966593, "orig": "$^{18}$https://kubernetes.io/", "prov": [{"$ref": "#/page-elements/131"}], "text": "$^{18}$https://kubernetes.io/", "text-hash": 5145030134774826221, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/22", "hash": 3398848297472714606, "orig": "$^{19}$ibm.biz/privatecloud", "prov": [{"$ref": "#/page-elements/132"}], "text": "$^{19}$ibm.biz/privatecloud", "text-hash": 4585077909629360588, "type": "footnote"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/23", "hash": 6724984968154270143, "orig": "$^{20}$We don\u2019t show the number of documents, since the number of pages in a document can range from 1 to well above 1000. Consequently, the number of pages is a more robust metric to measure the scaling with regard to the corpus size.", "prov": [{"$ref": "#/page-elements/139"}], "text": "$^{20}$We don\u2019t show the number of documents, since the number of pages in a document can range from 1 to well above 1000. Consequently, the number of pages is a more robust metric to measure the scaling with regard to the corpus size.", "text-hash": 14814952417700014875, "type": "footnote"}], "hash": 18446744073709551615, "instances": {"data": [["numval", "year", 7377574370756688828, "TEXT", "#/texts/0", 1.0, 389609625548777054, 1345153950666588077, 18446744073709551615, 18446744073709551615, 34, 38, 34, 38, 4, 5, true, "2018", "2018"], ["numval", "ival", 7377574370756688828, "TEXT", "#/texts/0", 1.0, 15441160910541481790, 218889966910406464, 18446744073709551615, 18446744073709551615, 27, 29, 27, 29, 2, 3, true, "24", "24"], ["link", "email", 7377574370756688828, "TEXT", "#/texts/0", 1.0, 5663610854084581987, 12665388994729576179, 18446744073709551615, 18446744073709551615, 0, 38, 0, 38, 0, 5, true, "arXiv:1806.02284v1[cs.DL]24May2018", "arXiv:1806.02284v1 [cs.DL] 24 May 2018"], ["parenthesis", "square brackets", 7377574370756688828, "TEXT", "#/texts/0", 1.0, 8106340136782143757, 305332543809292699, 18446744073709551615, 18446744073709551615, 19, 26, 19, 26, 1, 2, true, "[cs.DL]", "[cs.DL]"], ["expression", "wtoken-concatenation", 7377574370756688828, "TEXT", "#/texts/0", 1.0, 5564484558542728887, 6260400721402515593, 18446744073709551615, 18446744073709551615, 0, 18, 0, 18, 0, 1, true, "arXiv:1806.02284v1", "arXiv:1806.02284v1"], ["expression", "wtoken-concatenation", 7377574370756688828, "TEXT", "#/texts/0", 1.0, 8106340136782143757, 305332543809292699, 18446744073709551615, 18446744073709551615, 19, 26, 19, 26, 1, 2, true, "[cs.DL]", "[cs.DL]"], ["sentence", "", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 11303007895399162817, 11350976242507888924, 18446744073709551615, 18446744073709551615, 0, 84, 0, 84, 0, 14, true, "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale."], ["term", "single-term", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 12638008641667971393, 2808934749433980912, 18446744073709551615, 18446744073709551615, 0, 25, 0, 25, 0, 3, true, "Corpus Conversion Service", "Corpus Conversion Service"], ["term", "single-term", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 3953336115302703444, 3908089371773344302, 18446744073709551615, 18446744073709551615, 29, 54, 29, 54, 5, 8, true, "Machine Learning Platform", "Machine Learning Platform"], ["term", "single-term", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 2543543638813814383, 14974042820297549065, 18446744073709551615, 18446744073709551615, 58, 74, 58, 74, 9, 11, true, "Ingest Documents", "Ingest Documents"], ["term", "single-term", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 329104162321612062, 9665794625919571011, 18446744073709551615, 18446744073709551615, 78, 83, 78, 83, 12, 13, true, "Scale", "Scale"], ["conn", "single-conn", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 15441160910541487054, 1862666054904793840, 18446744073709551615, 18446744073709551615, 75, 77, 75, 77, 11, 12, true, "at", "at"], ["conn", "single-conn", 10227328696767902037, "TEXT", "#/texts/1", 1.0, 15441160910541485865, 1862717525379277583, 18446744073709551615, 18446744073709551615, 55, 57, 55, 57, 8, 9, true, "to", "to"], ["link", "email", 18258237174351515285, "TEXT", "#/texts/3", 1.0, 7883794643982446593, 9473083479424942219, 18446744073709551615, 18446744073709551615, 0, 30, 0, 30, 0, 11, true, "taa,dol,cau,bek@zurich.ibm.com", "taa,dol,cau,bek@zurich.ibm.com"], ["geoloc", "country", 11056873211244709904, "TEXT", "#/texts/5", 1.0, 2664439525053388608, 16906723856094244091, 18446744073709551615, 18446744073709551615, 13, 24, 13, 24, 2, 3, true, "Switzerland", "Switzerland"], ["numval", "ival", 3624246356859711021, "TEXT", "#/texts/7", 1.0, 17767354399704235161, 12573472761345255474, 18446744073709551615, 18446744073709551615, 0, 1, 0, 1, 0, 1, true, "1", "1"], ["numval", "ival", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12178341415896436703, 12968333296314215347, 18446744073709551615, 18446744073709551615, 1491, 1494, 1491, 1494, 249, 250, true, "250", "250"], ["parenthesis", "round brackets", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8624098978506921550, 8067551676911300261, 18446744073709551615, 18446744073709551615, 309, 347, 309, 347, 51, 60, true, "(e.g. the PDF format or bitmap images)", "(e.g. the PDF format or bitmap images)"], ["parenthesis", "round brackets", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 4552190965366435023, 5994729969442454976, 18446744073709551615, 18446744073709551615, 388, 409, 388, 409, 68, 73, true, "(e.g. complex tables)", "(e.g. complex tables)"], ["parenthesis", "round brackets", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104053210116957, 3393895258272698836, 18446744073709551615, 18446744073709551615, 628, 633, 628, 633, 109, 112, true, "(CCS)", "(CCS)"], ["parenthesis", "round brackets", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8912272716224106832, 12227152516026650269, 18446744073709551615, 18446744073709551615, 708, 735, 708, 735, 124, 131, true, "(i.e. collect ground-truth)", "(i.e. collect ground-truth)"], ["expression", "common", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486545, 11606670743807693522, 18446744073709551615, 18446744073709551615, 709, 713, 709, 713, 125, 126, true, "ie", "i.e."], ["expression", "common", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541487324, 11606670863251774055, 18446744073709551615, 18446744073709551615, 310, 314, 310, 314, 52, 53, true, "eg", "e.g."], ["expression", "common", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541487324, 11606670863251791461, 18446744073709551615, 18446744073709551615, 389, 393, 389, 393, 69, 70, true, "eg", "e.g."], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15169931585135175826, 17270979630715224833, 18446744073709551615, 18446744073709551615, 525, 536, 525, 536, 93, 94, true, "cloud-based", "cloud-based"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6307689511527468252, 12199545311202481186, 18446744073709551615, 18446744073709551615, 743, 759, 743, 759, 133, 134, true, "machine-learning", "machine-learning"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3932662928795581219, 3325076288347729928, 18446744073709551615, 18446744073709551615, 828, 844, 828, 844, 144, 145, true, "bitmap-documents", "bitmap-documents"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3753411203337468488, 16756051673090395246, 18446744073709551615, 18446744073709551615, 1102, 1114, 1102, 1114, 187, 188, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6307689511527468252, 12199545311202523423, 18446744073709551615, 18446744073709551615, 1133, 1149, 1133, 1149, 191, 192, true, "machine-learning", "machine-learning"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3753411203337468488, 16756051673090420119, 18446744073709551615, 18446744073709551615, 1244, 1256, 1244, 1256, 210, 211, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 10391722136816057200, 4465071482523967093, 18446744073709551615, 18446744073709551615, 1512, 1533, 1512, 1533, 253, 254, true, "knowledge-engineering", "knowledge-engineering"], ["expression", "word-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 11355983594424639335, 375612941360355674, 18446744073709551615, 18446744073709551615, 1298, 1314, 1298, 1314, 219, 220, true, "precision/recall", "precision/recall"], ["expression", "wtoken-concatenation", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12178341415896195376, 12963254028349616217, 18446744073709551615, 18446744073709551615, 1339, 1342, 1339, 1342, 225, 226, true, "99%", "99%"], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8311273775079009361, 18234444390399509646, 18446744073709551615, 18446744073709551615, 0, 122, 0, 122, 0, 20, true, "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size.", "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8652887973149281574, 1544181945594032747, 18446744073709551615, 18446744073709551615, 123, 258, 123, 258, 20, 43, true, "Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable.", "Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5682935857557389413, 3518340224243798686, 18446744073709551615, 18446744073709551615, 259, 487, 259, 487, 43, 84, true, "Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging.", "Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 18403546089192870947, 3375274648488008071, 18446744073709551615, 18446744073709551615, 488, 575, 488, 575, 84, 101, true, "In this paper, we present a modular, cloud-based platform to ingest documents at scale.", "In this paper, we present a modular, cloud-based platform to ingest documents at scale."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15870780009666831983, 2120332988466055117, 18446744073709551615, 18446744073709551615, 576, 891, 576, 891, 101, 152, true, "This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format.", "This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 10285604264132694933, 1782145150804012891, 18446744073709551615, 18446744073709551615, 892, 1045, 892, 1045, 152, 177, true, "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents.", "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 696858082777940132, 6587401266180559184, 18446744073709551615, 18446744073709551615, 1046, 1196, 1046, 1196, 177, 201, true, "Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude.", "Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 11949985654620491247, 6433012828858116708, 18446744073709551615, 18446744073709551615, 1197, 1398, 1197, 1398, 201, 235, true, "This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output.", "This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output."], ["sentence", "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 11602122462230219692, 9062878903616548976, 18446744073709551615, 18446744073709551615, 1399, 1554, 1399, 1554, 235, 257, true, "The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements."], ["term", "enum-term-mark-1", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 9845754748010686003, 13443808248487347009, 18446744073709551615, 18446744073709551615, 433, 464, 433, 464, 77, 81, true, "qualitative and quantitive data", "qualitative and quantitive data"], ["term", "enum-term-mark-2", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14506873166110432521, 11857803489572599054, 18446744073709551615, 18446744073709551615, 323, 339, 323, 339, 55, 58, true, "format or bitmap", "format or bitmap"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16807436920751143074, 14986987871760575963, 18446744073709551615, 18446744073709551615, 9, 25, 9, 25, 2, 5, true, "past few decades", "past few decades"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 7863808487922385366, 2936430672705644663, 18446744073709551615, 18446744073709551615, 41, 60, 41, 60, 9, 11, true, "scientific articles", "scientific articles"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 7143078508811650826, 1305762834470469664, 18446744073709551615, 18446744073709551615, 65, 85, 65, 85, 12, 14, true, "technical literature", "technical literature"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 2831583870146744553, 1311385802074388264, 18446744073709551615, 18446744073709551615, 148, 158, 148, 158, 25, 27, true, "great need", "great need"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 1602384110795404989, 1921537330407092158, 18446744073709551615, 18446744073709551615, 319, 329, 319, 329, 54, 56, true, "PDF format", "PDF format"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 7850715239909526655, 8028877058422980465, 18446744073709551615, 18446744073709551615, 333, 346, 333, 346, 57, 59, true, "bitmap images", "bitmap images"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 1806804053579249155, 8335167387144157878, 18446744073709551615, 18446744073709551615, 389, 408, 389, 408, 69, 72, true, "eg complex tables", "e.g. complex tables"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 13450540556572295481, 4139295332657747437, 18446744073709551615, 18446744073709551615, 449, 464, 449, 464, 79, 81, true, "quantitive data", "quantitive data"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12206009578906402256, 12092500979427102718, 18446744073709551615, 18446744073709551615, 525, 545, 525, 545, 93, 95, true, "cloud-based platform", "cloud-based platform"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12638008641667971393, 6722150771778728224, 18446744073709551615, 18446744073709551615, 602, 627, 602, 627, 106, 109, true, "Corpus Conversion Service", "Corpus Conversion Service"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3735444463619010795, 10473776487201094119, 18446744073709551615, 18446744073709551615, 709, 728, 709, 728, 125, 128, true, "ie collect ground", "i.e. collect ground"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3416039644310333922, 4934158934704280837, 18446744073709551615, 18446744073709551615, 737, 785, 737, 785, 132, 136, true, "train machine-learning classification algorithms", "train machine-learning classification algorithms"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 2954625771153872709, 4652514773317300232, 18446744073709551615, 18446744073709551615, 850, 890, 850, 890, 147, 151, true, "structured content representation format", "structured content representation format"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 7838671148811051201, 3585713728473930092, 18446744073709551615, 18446744073709551615, 952, 990, 952, 990, 165, 168, true, "asynchronous microservice architecture", "asynchronous microservice architecture"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 11942859038914222878, 6623027391573465220, 18446744073709551615, 18446744073709551615, 1016, 1031, 1016, 1031, 172, 174, true, "massive amounts", "massive amounts"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5415884051047601374, 4355778428986290778, 18446744073709551615, 18446744073709551615, 1133, 1160, 1133, 1160, 191, 193, true, "machine-learning algorithms", "machine-learning algorithms"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 11805639520798919476, 8476511316725219115, 18446744073709551615, 18446744073709551615, 1227, 1240, 1227, 1240, 207, 209, true, "large amounts", "large amounts"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5928632445065269445, 14217942914367810037, 18446744073709551615, 18446744073709551615, 1265, 1276, 1265, 1276, 213, 215, true, "little time", "little time"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 10100743957883477761, 17954790962075745659, 18446744073709551615, 18446744073709551615, 1293, 1322, 1293, 1322, 218, 221, true, "good precision/recall metrics", "good precision/recall metrics"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14630472445500347050, 6260595242788033664, 18446744073709551615, 18446744073709551615, 1380, 1397, 1380, 1397, 232, 234, true, "structured output", "structured output"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 10465443055056631368, 58866334284871721, 18446744073709551615, 18446744073709551615, 1403, 1415, 1403, 1415, 236, 238, true, "CCS platform", "CCS platform"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 168078114375663109, 12852846298920524296, 18446744073709551615, 18446744073709551615, 1441, 1468, 1441, 1468, 242, 245, true, "IBM internal infrastructure", "IBM internal infrastructure"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8462871836886525200, 10493121872431814801, 18446744073709551615, 18446744073709551615, 1495, 1507, 1495, 1507, 250, 252, true, "active users", "active users"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12360325703059227080, 15341633962216548312, 18446744073709551615, 18446744073709551615, 1512, 1553, 1512, 1553, 253, 256, true, "knowledge-engineering project engagements", "knowledge-engineering project engagements"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206569333693762, 10666930667336151813, 18446744073709551615, 18446744073709551615, 31, 37, 31, 37, 7, 8, true, "amount", "amount"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625741058932, 1609635956783744714, 18446744073709551615, 18446744073709551615, 117, 121, 117, 121, 18, 19, true, "size", "size"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106478573663085763, 2644249028750571186, 18446744073709551615, 18446744073709551615, 163, 170, 163, 170, 28, 29, true, "systems", "systems"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6167933651658664291, 11942237281037682166, 18446744073709551615, 18446744073709551615, 193, 202, 193, 202, 33, 34, true, "documents", "documents"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104161785194305, 772802872201272523, 18446744073709551615, 18446744073709551615, 206, 211, 206, 211, 35, 36, true, "scale", "scale"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6184122545182835014, 10915241214874887145, 18446744073709551615, 18446744073709551615, 235, 244, 235, 244, 40, 41, true, "knowledge", "knowledge"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206548538896813, 17191059727726770924, 18446744073709551615, 18446744073709551615, 283, 289, 283, 289, 47, 48, true, "format", "format"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6167933651658664291, 11942237281037615868, 18446744073709551615, 18446744073709551615, 299, 308, 299, 308, 50, 51, true, "documents", "documents"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15493249494625550468, 17136530455551824273, 18446744073709551615, 18446744073709551615, 363, 375, 363, 375, 64, 65, true, "presentation", "presentation"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625696431489, 1272382058296184235, 18446744073709551615, 18446744073709551615, 383, 387, 383, 387, 67, 68, true, "data", "data"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5303544497514782120, 263131364412872028, 18446744073709551615, 18446744073709551615, 419, 429, 419, 429, 75, 76, true, "extraction", "extraction"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104161668023890, 773695676617294129, 18446744073709551615, 18446744073709551615, 496, 501, 496, 501, 86, 87, true, "paper", "paper"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6167933651658664291, 11942237281037632251, 18446744073709551615, 18446744073709551615, 556, 565, 556, 565, 97, 98, true, "documents", "documents"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104161785194305, 772802872201252868, 18446744073709551615, 18446744073709551615, 569, 574, 569, 574, 99, 100, true, "scale", "scale"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14814125365076808131, 9647025272576644413, 18446744073709551615, 18446744073709551615, 581, 589, 581, 589, 102, 103, true, "platform", "platform"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12178341415896221596, 12963251184768892790, 18446744073709551615, 18446744073709551615, 629, 632, 629, 632, 110, 111, true, "CCS", "CCS"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14814125852840540191, 2945478222614419396, 18446744073709551615, 18446744073709551615, 648, 656, 648, 656, 115, 116, true, "pipeline", "pipeline"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104159157820437, 995383834556884589, 18446744073709551615, 18446744073709551615, 670, 675, 670, 675, 118, 119, true, "users", "users"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6167933651658664291, 11942237281037582534, 18446744073709551615, 18446744073709551615, 698, 707, 698, 707, 123, 124, true, "documents", "documents"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104159241711235, 991946153785165058, 18446744073709551615, 18446744073709551615, 729, 734, 729, 734, 129, 130, true, "truth", "truth"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625631434316, 1612217538956723265, 18446744073709551615, 18446744073709551615, 813, 817, 813, 817, 140, 141, true, "type", "type"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12178341415896289890, 12968333890042400352, 18446744073709551615, 18446744073709551615, 821, 824, 821, 824, 142, 143, true, "PDF", "PDF"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3932662928795581219, 3325076288347729928, 18446744073709551615, 18446744073709551615, 828, 844, 828, 844, 144, 145, true, "bitmap-documents", "bitmap-documents"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106464525640940249, 12084772193525026048, 18446744073709551615, 18446744073709551615, 922, 929, 922, 929, 159, 160, true, "modules", "modules"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6167933651658664291, 11942237281037800116, 18446744073709551615, 18446744073709551615, 1035, 1044, 1035, 1044, 175, 176, true, "documents", "documents"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 2873671966753113989, 3590833722970570505, 18446744073709551615, 18446744073709551615, 1081, 1091, 1081, 1091, 184, 185, true, "capability", "capability"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104161571401725, 741255023938407211, 18446744073709551615, 18446744073709551615, 1177, 1182, 1177, 1182, 197, 198, true, "order", "order"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 6179392101937111178, 13132284913272968426, 18446744073709551615, 18446744073709551615, 1186, 1195, 1186, 1195, 199, 200, true, "magnitude", "magnitude"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3753411203337468488, 16756051673090420119, 18446744073709551615, 18446744073709551615, 1244, 1256, 1244, 1256, 210, 211, true, "ground-truth", "ground-truth"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104161634702433, 739201814026917115, 18446744073709551615, 18446744073709551615, 1330, 1335, 1330, 1335, 223, 224, true, "range", "range"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206521526353544, 16720692448055193361, 18446744073709551615, 18446744073709551615, 1348, 1354, 1348, 1354, 227, 228, true, "regard", "regard"], ["term", "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 2703018679320364082, 15916371892854536925, 18446744073709551615, 18446744073709551615, 1366, 1376, 1366, 1376, 230, 231, true, "conversion", "conversion"], ["verb", "compound-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 11956062550033090038, 9437126490011979695, 18446744073709551615, 18446744073709551615, 86, 113, 86, 113, 14, 17, true, "has increased exponentially", "has increased exponentially"], ["verb", "compound-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5690225847229166303, 18320034715902341983, 18446744073709551615, 18446744073709551615, 1115, 1129, 1115, 1129, 188, 190, true, "is accelerated", "is accelerated"], ["verb", "compound-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 9791407429604398000, 14740221032007164243, 18446744073709551615, 18446744073709551615, 1281, 1292, 1281, 1292, 216, 218, true, "obtain very", "obtain very"], ["verb", "compound-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 2604368229451749231, 5954729608874990660, 18446744073709551615, 18446744073709551615, 1416, 1437, 1416, 1437, 238, 241, true, "is currently deployed", "is currently deployed"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486535, 11606670739881444005, 18446744073709551615, 18446744073709551615, 143, 145, 143, 145, 23, 24, true, "is", "is"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 2873440693780286732, 16242747501520400497, 18446744073709551615, 18446744073709551615, 176, 186, 176, 186, 30, 32, true, "can ingest", "can ingest"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625618412480, 1610868918855298631, 18446744073709551615, 18446744073709551615, 216, 220, 216, 220, 37, 38, true, "make", "make"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5947879769709188533, 15628690943209790850, 18446744073709551615, 18446744073709551615, 225, 234, 225, 234, 39, 40, true, "contained", "contained"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625618412480, 1610868918855286250, 18446744073709551615, 18446744073709551615, 410, 414, 410, 414, 73, 74, true, "make", "make"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106476016677076976, 2082360003734177772, 18446744073709551615, 18446744073709551615, 506, 513, 506, 513, 89, 90, true, "present", "present"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206560503286032, 18414709282119286416, 18446744073709551615, 18446744073709551615, 549, 555, 549, 555, 96, 97, true, "ingest", "ingest"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206563350835754, 16668546032725707234, 18446744073709551615, 18446744073709551615, 591, 597, 591, 597, 104, 105, true, "called", "called"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 5584174880054122043, 1259340301497714443, 18446744073709551615, 18446744073709551615, 635, 645, 635, 645, 113, 114, true, "implements", "implements"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206569317834029, 10666754365487817153, 18446744073709551615, 18446744073709551615, 663, 669, 663, 669, 117, 118, true, "allows", "allows"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104161667983915, 773700989878712775, 18446744073709551615, 18446744073709551615, 679, 684, 679, 684, 120, 121, true, "parse", "parse"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14650452911780017077, 11510513167121376409, 18446744073709551615, 18446744073709551615, 689, 697, 689, 697, 122, 123, true, "annotate", "annotate"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106398484416229602, 5707746526356454429, 18446744073709551615, 18446744073709551615, 801, 808, 801, 808, 138, 139, true, "convert", "convert"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3534225588934870450, 17328851096576172964, 18446744073709551615, 18446744073709551615, 895, 904, 895, 904, 153, 155, true, "will show", "will show"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486535, 11606670739883745478, 18446744073709551615, 18446744073709551615, 930, 932, 930, 932, 160, 161, true, "is", "is"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206485955868973, 16260582896355405879, 18446744073709551615, 18446744073709551615, 1009, 1015, 1009, 1015, 171, 172, true, "handle", "handle"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3534225588934870450, 17328851096575956236, 18446744073709551615, 18446744073709551615, 1062, 1071, 1062, 1071, 180, 182, true, "will show", "will show"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206562264646932, 18168705856416964271, 18446744073709551615, 18446744073709551615, 1095, 1101, 1095, 1101, 186, 187, true, "gather", "gather"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206569317834029, 10666754365454877920, 18446744073709551615, 18446744073709551615, 1202, 1208, 1202, 1208, 202, 203, true, "allows", "allows"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106398484416916345, 5707744688882101082, 18446744073709551615, 18446744073709551615, 1358, 1365, 1358, 1365, 229, 230, true, "content", "content"], ["verb", "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106478708506631920, 17126853238947237410, 18446744073709551615, 18446744073709551615, 1473, 1480, 1473, 1480, 246, 247, true, "serving", "serving"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14650945419058940869, 11656646489767977845, 18446744073709551615, 18446744073709551615, 0, 8, 0, 8, 0, 2, true, "Over the", "Over the"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821546960, 18446744073709551615, 18446744073709551615, 38, 40, 38, 40, 8, 9, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486538, 11606670739901094601, 18446744073709551615, 18446744073709551615, 114, 116, 114, 116, 17, 18, true, "in", "in"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12178341415895625940, 12963192413398852201, 18446744073709551615, 18446744073709551615, 159, 162, 159, 162, 27, 28, true, "for", "for"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541487054, 11606670851925858322, 18446744073709551615, 18446744073709551615, 203, 205, 203, 205, 34, 35, true, "at", "at"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 14814148868025447689, 10464458716096298180, 18446744073709551615, 18446744073709551615, 290, 298, 290, 298, 48, 50, true, "of these", "of these"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206564601699726, 16611998392190665699, 18446744073709551615, 18446744073709551615, 310, 318, 310, 318, 52, 54, true, "eg the", "e.g. the"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206568455155979, 10578923885508625435, 18446744073709551615, 18446744073709551615, 356, 362, 356, 362, 62, 64, true, "as the", "as the"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206565712212855, 18288882301375407275, 18446744073709551615, 18446744073709551615, 376, 382, 376, 382, 65, 67, true, "of the", "of the"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821473010, 18446744073709551615, 18446744073709551615, 430, 432, 430, 432, 76, 77, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106396862006371970, 13002336324491202712, 18446744073709551615, 18446744073709551615, 488, 495, 488, 495, 84, 86, true, "In this", "In this"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541487054, 11606670851925882070, 18446744073709551615, 18446744073709551615, 566, 568, 566, 568, 98, 99, true, "at", "at"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821399597, 18446744073709551615, 18446744073709551615, 818, 820, 818, 820, 141, 142, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 3504047303033029818, 12858913108667382047, 18446744073709551615, 18446744073709551615, 905, 914, 905, 914, 155, 157, true, "that each", "that each"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206565712212855, 18288882301375701872, 18446744073709551615, 18446744073709551615, 915, 921, 915, 921, 157, 159, true, "of the", "of the"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821377067, 18446744073709551615, 18446744073709551615, 1032, 1034, 1032, 1034, 174, 175, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625631229034, 1612226062922593249, 18446744073709551615, 18446744073709551615, 1072, 1076, 1072, 1076, 182, 183, true, "that", "that"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486989, 11606670853486674912, 18446744073709551615, 18446744073709551615, 1130, 1132, 1130, 1132, 190, 191, true, "by", "by"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486989, 11606670853486803944, 18446744073709551615, 18446744073709551615, 1161, 1163, 1161, 1163, 193, 194, true, "by", "by"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541487054, 11606670851925780672, 18446744073709551615, 18446744073709551615, 1164, 1166, 1164, 1166, 194, 195, true, "at", "at"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821349359, 18446744073709551615, 18446744073709551615, 1183, 1185, 1183, 1185, 198, 199, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821388621, 18446744073709551615, 18446744073709551615, 1241, 1243, 1241, 1243, 209, 210, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541486538, 11606670739900210613, 18446744073709551615, 18446744073709551615, 1257, 1259, 1257, 1259, 211, 212, true, "in", "in"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 16381206560518651853, 18414993880775571288, 18446744073709551615, 18446744073709551615, 1323, 1329, 1323, 1329, 221, 223, true, "in the", "in the"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485670, 11606670832821292551, 18446744073709551615, 18446744073709551615, 1336, 1338, 1336, 1338, 224, 225, true, "of", "of"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625618037948, 1610651885976451134, 18446744073709551615, 18446744073709551615, 1343, 1347, 1343, 1347, 226, 227, true, "with", "with"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485678, 11606670855875426468, 18446744073709551615, 18446744073709551615, 1438, 1440, 1438, 1440, 241, 242, true, "on", "on"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625631229040, 1612226037052379844, 18446744073709551615, 18446744073709551615, 1486, 1490, 1486, 1490, 248, 249, true, "than", "than"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 12178341415895625940, 12963192413398671002, 18446744073709551615, 18446744073709551615, 1508, 1511, 1508, 1511, 252, 253, true, "for", "for"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485865, 11606670830397324540, 18446744073709551615, 18446744073709551615, 546, 548, 546, 548, 95, 96, true, "to", "to"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485865, 11606670830397301532, 18446744073709551615, 18446744073709551615, 676, 678, 676, 678, 119, 120, true, "to", "to"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 389609625631408052, 1612210503630929212, 18446744073709551615, 18446744073709551615, 845, 849, 845, 849, 145, 147, true, "to a", "to a"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 329104159243175056, 993032465640498236, 18446744073709551615, 18446744073709551615, 946, 951, 946, 951, 163, 165, true, "to an", "to an"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485865, 11606670830397529924, 18446744073709551615, 18446744073709551615, 1092, 1094, 1092, 1094, 185, 186, true, "to", "to"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 8106351192274276906, 17899388016831785682, 18446744073709551615, 18446744073709551615, 1212, 1219, 1212, 1219, 204, 206, true, "to both", "to both"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485865, 11606670830397545800, 18446744073709551615, 18446744073709551615, 1355, 1357, 1355, 1357, 228, 229, true, "to", "to"], ["conn", "single-conn", 17999848460847860039, "TEXT", "#/texts/8", 1.0, 15441160910541485865, 11606670830397544434, 18446744073709551615, 18446744073709551615, 1377, 1379, 1377, 1379, 231, 232, true, "to", "to"], ["reference", "container-title", 14387482728083328702, "TEXT", "#/texts/9", 1.0, 7430992009485070364, 3404236123378547578, 18446744073709551615, 18446744073709551615, 0, 21, 0, 21, 0, 4, true, "ACM Reference Format:", "ACM Reference Format:"], ["reference", "author", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 4686361850733567621, 14659076240775980364, 18446744073709551615, 18446744073709551615, 0, 15, 0, 15, 0, 4, true, "Peter W J Staar", "Peter W J Staar"], ["reference", "author", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 1571808557594152175, 2521268111811279239, 18446744073709551615, 18446744073709551615, 17, 30, 17, 30, 5, 7, true, "Michele Dolfi", "Michele Dolfi"], ["reference", "author", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 9737597816447750448, 18360796446007226291, 18446744073709551615, 18446744073709551615, 32, 46, 32, 46, 8, 10, true, "Christoph Auer", "Christoph Auer"], ["reference", "author", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 13732913329338511598, 1087221346292312189, 18446744073709551615, 18446744073709551615, 48, 61, 48, 61, 11, 14, true, "Costas Bekas.", "Costas Bekas."], ["reference", "container-title", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 18326306750753291457, 8917954083851035786, 18446744073709551615, 18446744073709551615, 154, 247, 154, 247, 31, 48, true, "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining", "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining"], ["reference", "date", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 389609625548777054, 918164764798402581, 18446744073709551615, 18446744073709551615, 62, 66, 62, 66, 14, 15, true, "2018", "2018"], ["reference", "date", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 17017808558592810577, 1917644983122671206, 18446744073709551615, 18446744073709551615, 249, 267, 249, 267, 49, 53, true, "August 19-23, 2018", "August 19-23, 2018"], ["reference", "doi", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 3534146179424153776, 16664784081959773586, 18446744073709551615, 18446744073709551615, 326, 344, 326, 344, 71, 72, true, "https://doi.org/10", "https://doi.org/10"], ["reference", "doi", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 7680709109455866852, 9531684221895358060, 18446744073709551615, 18446744073709551615, 346, 366, 346, 366, 73, 76, true, "1145/3219819.3219834", "1145/3219819.3219834"], ["reference", "location", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 7719325285618625183, 12559995503894274239, 18446744073709551615, 18446744073709551615, 269, 291, 269, 291, 54, 58, true, "London, United Kingdom", "London, United Kingdom"], ["reference", "location", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 16918962045161917454, 17491630952016593380, 18446744073709551615, 18446744073709551615, 298, 315, 298, 315, 61, 67, true, "New York, NY, USA", "New York, NY, USA"], ["reference", "title", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 2059592768319149889, 10728790470880119375, 18446744073709551615, 18446744073709551615, 68, 151, 68, 151, 16, 29, true, "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale", "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale"], ["numval", "fval", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415896439107, 14800962307501710678, 18446744073709551615, 18446744073709551615, 39, 42, 39, 42, 7, 8, true, "2.5", "2.5"], ["parenthesis", "round brackets", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 2236437873379298599, 8971166239141287330, 18446744073709551615, 18446744073709551615, 1048, 1094, 1048, 1094, 170, 180, true, "(e.g. find me a phase-diagram of material XYZ)", "(e.g. find me a phase-diagram of material XYZ)"], ["parenthesis", "round brackets", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 4516846515356980393, 4935623304895828855, 18446744073709551615, 18446744073709551615, 1196, 1246, 1196, 1246, 199, 210, true, "(with the PDF format being the most prevalent one)", "(with the PDF format being the most prevalent one)"], ["parenthesis", "round brackets", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 5879944210728656410, 9673170177479615330, 18446744073709551615, 18446744073709551615, 1432, 1473, 1432, 1473, 246, 257, true, "(documents, images, authors, tables, etc)", "(documents, images, authors, tables, etc)"], ["expression", "common", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541487324, 9094674364011169527, 18446744073709551615, 18446744073709551615, 1049, 1053, 1049, 1053, 171, 172, true, "eg", "e.g."], ["expression", "word-concatenation", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12555128312158075374, 3585475568588858575, 18446744073709551615, 18446744073709551615, 1064, 1077, 1064, 1077, 175, 176, true, "phase-diagram", "phase-diagram"], ["expression", "wtoken-concatenation", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9623123605532099037, 12825981064550354106, 18446744073709551615, 18446744073709551615, 79, 96, 79, 96, 13, 14, true, "circulation^{1}", "circulation$^{1}$"], ["expression", "wtoken-concatenation", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9653568957037915764, 1159839439008018639, 18446744073709551615, 18446744073709551615, 863, 882, 863, 882, 138, 139, true, "exponentially^{2}", "exponentially$^{2}$"], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 17192639608086865650, 10639035648049775025, 18446744073709551615, 18446744073709551615, 0, 97, 0, 97, 0, 15, true, "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$.", "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9088786707146406857, 17567093053494849836, 18446744073709551615, 18446744073709551615, 98, 252, 98, 252, 15, 41, true, "These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery.", "These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9645560666016248506, 8355944213796053339, 18446744073709551615, 18446744073709551615, 253, 359, 253, 359, 41, 59, true, "It is needless to say that valuable qualitative and quantitative information is contained in many of them.", "It is needless to say that valuable qualitative and quantitative information is contained in many of them."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 17647932338360720997, 5716030233811874384, 18446744073709551615, 18446744073709551615, 360, 509, 360, 509, 59, 84, true, "However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout.", "However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15487015001052727581, 14484812293778889252, 18446744073709551615, 18446744073709551615, 510, 722, 510, 722, 84, 115, true, "Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery.", "Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 3574328216950930229, 4905315167294659186, 18446744073709551615, 18446744073709551615, 723, 883, 723, 883, 115, 140, true, "In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$.", "In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8347632587306657460, 16097912844310233617, 18446744073709551615, 18446744073709551615, 884, 988, 884, 988, 140, 160, true, "This poses a real problem, since more and more information published in the PDF documents is going dark.", "This poses a real problem, since more and more information published in the PDF documents is going dark."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 7315676043002615146, 3020292113144700597, 18446744073709551615, 18446744073709551615, 989, 1133, 989, 1133, 160, 187, true, "In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components.", "In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8292138896065382931, 17716571591104291388, 18446744073709551615, 18446744073709551615, 1134, 1345, 1134, 1345, 187, 228, true, "First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML.", "First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML."], ["sentence", "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 18073096319598857596, 14789900833203243228, 18446744073709551615, 18446744073709551615, 1346, 1532, 1346, 1532, 228, 267, true, "Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context.", "Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context."], ["term", "enum-term-mark-1", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12322374974058800893, 6816531868111142674, 18446744073709551615, 18446744073709551615, 280, 329, 280, 329, 47, 52, true, "valuable qualitative and quantitative information", "valuable qualitative and quantitative information"], ["term", "enum-term-mark-4", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 11674491770136657522, 11680961660123138230, 18446744073709551615, 18446744073709551615, 1333, 1344, 1333, 1344, 224, 227, true, "JSON or XML", "JSON or XML"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 3693395590591757392, 2559252195012720165, 18446744073709551615, 18446744073709551615, 43, 65, 43, 65, 8, 11, true, "trillion PDF documents", "trillion PDF documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8414271082704541626, 9829432072489958078, 18446744073709551615, 18446744073709551615, 149, 163, 149, 163, 23, 25, true, "annual reports", "annual reports"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 3282133738476528713, 6601164231648618886, 18446744073709551615, 18446744073709551615, 193, 208, 193, 208, 32, 34, true, "research papers", "research papers"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 1102904554370006265, 13125714652652128474, 18446744073709551615, 18446744073709551615, 222, 251, 222, 251, 37, 40, true, "specific scientific discovery", "specific scientific discovery"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 7668210657519556598, 8800539397108400539, 18446744073709551615, 18446744073709551615, 305, 329, 305, 329, 50, 52, true, "quantitative information", "quantitative information"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 13935212089545515210, 4563100627799985741, 18446744073709551615, 18446744073709551615, 431, 452, 431, 452, 73, 75, true, "printing instructions", "printing instructions"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16582444977748815769, 16919788927196448661, 18446744073709551615, 18446744073709551615, 486, 508, 486, 508, 80, 83, true, "pleasing visual layout", "pleasing visual layout"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 4929058514881842733, 10224787839479118537, 18446744073709551615, 18446744073709551615, 519, 538, 519, 538, 86, 88, true, "data representation", "data representation"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14929125759175486455, 13997854025989108072, 18446744073709551615, 18446744073709551615, 547, 567, 547, 567, 90, 92, true, "enormous variability", "enormous variability"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 5746783959074166208, 15517192707477599154, 18446744073709551615, 18446744073709551615, 635, 649, 635, 649, 102, 104, true, "access content", "access content"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 2730405582718102128, 15726970596030809890, 18446744073709551615, 18446744073709551615, 702, 721, 702, 721, 112, 114, true, "knowledge discovery", "knowledge discovery"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16813764953769919795, 4260210876529689133, 18446744073709551615, 18446744073709551615, 742, 764, 742, 764, 119, 122, true, "sheer current quantity", "sheer current quantity"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16688986026552560644, 17177901629424753408, 18446744073709551615, 18446744073709551615, 783, 798, 783, 798, 126, 128, true, "submission rate", "submission rate"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12621877848489179259, 15237617635766653290, 18446744073709551615, 18446744073709551615, 829, 846, 829, 846, 133, 135, true, "scientific domain", "scientific domain"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 5853227681134087829, 1787086050256320443, 18446744073709551615, 18446744073709551615, 897, 909, 897, 909, 143, 145, true, "real problem", "real problem"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12653831733608918357, 6140974263001666382, 18446744073709551615, 18446744073709551615, 960, 973, 960, 973, 154, 156, true, "PDF documents", "PDF documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 10167329824705672383, 12701577379507576649, 18446744073709551615, 18446744073709551615, 1081, 1093, 1081, 1093, 177, 179, true, "material XYZ", "material XYZ"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 1602384110795404989, 18168403198260411892, 18446744073709551615, 18446744073709551615, 1206, 1216, 1206, 1216, 202, 204, true, "PDF format", "PDF format"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12595883072252114156, 7039273758002805758, 18446744073709551615, 18446744073709551615, 1232, 1245, 1232, 1245, 207, 209, true, "prevalent one", "prevalent one"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 4066887494406769292, 14849727204374093143, 18446744073709551615, 18446744073709551615, 1278, 1299, 1278, 1299, 215, 218, true, "structured data files", "structured data files"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14630472899120924944, 15550065915551638064, 18446744073709551615, 18446744073709551615, 1307, 1324, 1307, 1324, 220, 222, true, "structured format", "structured format"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 13018076357583391135, 18265178771346204830, 18446744073709551615, 18446744073709551615, 1365, 1377, 1365, 1377, 233, 235, true, "query engine", "query engine"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 11805624357079379862, 2927818536118337064, 18446744073709551615, 18446744073709551615, 1406, 1419, 1406, 1419, 242, 244, true, "large variety", "large variety"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9623123605532099037, 12825981064550354106, 18446744073709551615, 18446744073709551615, 79, 96, 79, 96, 13, 14, true, "circulation^{1}", "circulation$^{1}$"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950143797819, 18446744073709551615, 18446744073709551615, 104, 113, 104, 113, 16, 17, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106464587474035829, 6502274748172348363, 18446744073709551615, 18446744073709551615, 125, 132, 125, 132, 19, 20, true, "manuals", "manuals"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15361659830789508523, 8413399544610388116, 18446744073709551615, 18446744073709551615, 137, 147, 137, 147, 21, 22, true, "appliances", "appliances"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 5947879506556567994, 16771512443857485166, 18446744073709551615, 18446744073709551615, 167, 176, 167, 176, 26, 27, true, "companies", "companies"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415895525628, 14794601526936094944, 18446744073709551615, 18446744073709551615, 186, 189, 186, 189, 30, 31, true, "way", "way"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106398484416916345, 17530806449434366453, 18446744073709551615, 18446744073709551615, 369, 376, 369, 376, 61, 62, true, "content", "content"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415896289890, 14799990756781414830, 18446744073709551615, 18446744073709551615, 388, 391, 388, 391, 64, 65, true, "PDF", "PDF"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206597113188775, 13905938768963750102, 18446744073709551615, 18446744073709551615, 402, 408, 402, 408, 68, 69, true, "nature", "nature"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106478700233620678, 8336023496233777462, 18446744073709551615, 18446744073709551615, 420, 427, 420, 427, 71, 72, true, "streams", "streams"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106342461491420046, 4172004388378103877, 18446744073709551615, 18446744073709551615, 571, 578, 571, 578, 93, 94, true, "layouts", "layouts"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950143438881, 18446744073709551615, 18446744073709551615, 592, 601, 592, 601, 96, 97, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 11387678566946341343, 4163904415113468966, 18446744073709551615, 18446744073709551615, 674, 688, 674, 688, 109, 110, true, "representation", "representation"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14650447861280948245, 18066875144210692331, 18446744073709551615, 18446744073709551615, 726, 734, 726, 734, 116, 117, true, "addition", "addition"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142901645, 18446744073709551615, 18446744073709551615, 768, 777, 768, 777, 123, 124, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142902450, 18446744073709551615, 18446744073709551615, 812, 821, 812, 821, 130, 131, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9653568957037915764, 1159839439008018639, 18446744073709551615, 18446744073709551615, 863, 882, 863, 882, 138, 139, true, "exponentially^{2}", "exponentially$^{2}$"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14388065630035882329, 2686196032102535307, 18446744073709551615, 18446744073709551615, 931, 942, 931, 942, 150, 151, true, "information", "information"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 329104161571401725, 8768421271667196313, 18446744073709551615, 18446744073709551615, 992, 997, 992, 997, 161, 162, true, "order", "order"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106398484416916345, 17530806449433194901, 18446744073709551615, 18446744073709551615, 1010, 1017, 1010, 1017, 165, 166, true, "content", "content"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142885131, 18446744073709551615, 18446744073709551615, 1027, 1036, 1027, 1036, 168, 169, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541487324, 9094674364011169527, 18446744073709551615, 18446744073709551615, 1049, 1053, 1049, 1053, 171, 172, true, "eg", "e.g."], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12555128312158075374, 3585475568588858575, 18446744073709551615, 18446744073709551615, 1064, 1077, 1064, 1077, 175, 176, true, "phase-diagram", "phase-diagram"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 2703018952916355661, 10279229622173728080, 18446744073709551615, 18446744073709551615, 1122, 1132, 1122, 1132, 185, 186, true, "components", "components"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142942051, 18446744073709551615, 18446744073709551615, 1160, 1169, 1160, 1169, 193, 194, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106478777441543540, 773597955729195721, 18446744073709551615, 18446744073709551615, 1177, 1184, 1177, 1184, 196, 197, true, "variety", "variety"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106397728035763965, 11508792142722132367, 18446744073709551615, 18446744073709551615, 1188, 1195, 1188, 1195, 198, 199, true, "formats", "formats"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142865755, 18446744073709551615, 18446744073709551615, 1265, 1274, 1265, 1274, 213, 214, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625541450799, 476546803986815687, 18446744073709551615, 18446744073709551615, 1333, 1337, 1333, 1337, 224, 225, true, "JSON", "JSON"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415895541463, 14794406103722084656, 18446744073709551615, 18446744073709551615, 1341, 1344, 1341, 1344, 226, 227, true, "XML", "XML"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14652282388618227426, 14047491818249905874, 18446744073709551615, 18446744073709551615, 1423, 1431, 1423, 1431, 245, 246, true, "concepts", "concepts"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142859841, 18446744073709551615, 18446744073709551615, 1433, 1442, 1433, 1442, 247, 248, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206560620045048, 15910167584621803731, 18446744073709551615, 18446744073709551615, 1444, 1450, 1444, 1450, 249, 250, true, "images", "images"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106397759446161562, 17038239979594063466, 18446744073709551615, 18446744073709551615, 1452, 1459, 1452, 1459, 251, 252, true, "authors", "authors"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206513098478539, 8569522873910347573, 18446744073709551615, 18446744073709551615, 1461, 1467, 1461, 1467, 253, 254, true, "tables", "tables"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6167933651658664291, 3744443950142863448, 18446744073709551615, 18446744073709551615, 1495, 1504, 1495, 1504, 260, 261, true, "documents", "documents"], ["term", "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106398484416909789, 17530798545720977035, 18446744073709551615, 18446744073709551615, 1524, 1531, 1524, 1531, 265, 266, true, "context", "context"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 17551793109234931072, 9841315996119329650, 18446744073709551615, 18446744073709551615, 3, 15, 3, 15, 1, 3, true, "is estimated", "is estimated"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 696181546770410912, 10657444457642612809, 18446744073709551615, 18446744073709551615, 27, 38, 27, 38, 5, 7, true, "are roughly", "are roughly"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 17466643417440400812, 16357177041782037840, 18446744073709551615, 18446744073709551615, 330, 342, 330, 342, 52, 54, true, "is contained", "is contained"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15984679469930005672, 16512266362137627548, 18446744073709551615, 18446744073709551615, 409, 419, 409, 419, 69, 71, true, "reduced to", "reduced to"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 9871239675677535701, 8810214565092963488, 18446744073709551615, 18446744073709551615, 453, 483, 453, 483, 75, 79, true, "purposed to faithfully present", "purposed to faithfully present"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14133501046094794901, 4250240326135716646, 18446744073709551615, 18446744073709551615, 620, 634, 620, 634, 100, 102, true, "challenging to", "challenging to"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 18329554120394908623, 17010976290898309846, 18446744073709551615, 18446744073709551615, 847, 862, 847, 862, 135, 138, true, "is also growing", "is also growing"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14637952034068646347, 9688733531448391553, 18446744073709551615, 18446744073709551615, 974, 982, 974, 982, 156, 158, true, "is going", "is going"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14364253828417975278, 8778810672464165894, 18446744073709551615, 18446744073709551615, 1100, 1117, 1100, 1117, 182, 184, true, "needs essentially", "needs essentially"], ["verb", "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16971139354256206394, 8359549146932405741, 18446744073709551615, 18446744073709551615, 1145, 1159, 1145, 1159, 190, 193, true, "need to ingest", "need to ingest"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 329104161634702433, 8726454204599928234, 18446744073709551615, 18446744073709551615, 114, 119, 114, 119, 17, 18, true, "range", "range"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6180169263126451304, 4214562769527423312, 18446744073709551615, 18446744073709551615, 210, 219, 210, 219, 35, 36, true, "detailing", "detailing"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486535, 9094674367324716363, 18446744073709551615, 18446744073709551615, 256, 258, 256, 258, 42, 43, true, "is", "is"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415895645562, 14799989741446549720, 18446744073709551615, 18446744073709551615, 271, 274, 271, 274, 45, 46, true, "say", "say"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106397531449655911, 14632270885483087688, 18446744073709551615, 18446744073709551615, 377, 384, 377, 384, 62, 63, true, "encoded", "encoded"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486535, 9094674367324724925, 18446744073709551615, 18446744073709551615, 392, 394, 392, 394, 65, 66, true, "is", "is"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625618412480, 541954499163841946, 18446744073709551615, 18446744073709551615, 602, 606, 602, 606, 97, 98, true, "make", "make"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 3503810711254267897, 327944184510617093, 18446744073709551615, 18446744073709551615, 654, 663, 654, 663, 105, 106, true, "transform", "transform"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106397529675133622, 9265134128656073394, 18446744073709551615, 18446744073709551615, 694, 701, 694, 701, 111, 112, true, "enables", "enables"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6185033796712833759, 8158902570488040634, 18446744073709551615, 18446744073709551615, 802, 811, 802, 811, 129, 130, true, "published", "published"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 329104161594697075, 8726414758277463017, 18446744073709551615, 18446744073709551615, 889, 894, 889, 894, 141, 142, true, "poses", "poses"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6185033796712833759, 8158902570488066017, 18446744073709551615, 18446744073709551615, 943, 952, 943, 952, 151, 152, true, "published", "published"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625618412480, 541954499163850098, 18446744073709551615, 18446744073709551615, 1001, 1005, 1001, 1005, 163, 164, true, "make", "make"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625697824147, 497671517323247955, 18446744073709551615, 18446744073709551615, 1054, 1058, 1054, 1058, 172, 173, true, "find", "find"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 329104159301007417, 8863033603552468338, 18446744073709551615, 18446744073709551615, 1217, 1222, 1217, 1222, 204, 205, true, "being", "being"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106398484416229602, 17530813820733868718, 18446744073709551615, 18446744073709551615, 1251, 1258, 1251, 1258, 211, 212, true, "convert", "convert"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625621532398, 554816074249930520, 18446744073709551615, 18446744073709551615, 1358, 1362, 1358, 1362, 231, 232, true, "need", "need"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486535, 9094674367323996407, 18446744073709551615, 18446744073709551615, 1383, 1385, 1383, 1385, 236, 237, true, "is", "is"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625696287852, 497722139527509467, 18446744073709551615, 18446744073709551615, 1394, 1398, 1394, 1398, 239, 240, true, "deal", "deal"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 6168374324562720592, 185665609222125727, 18446744073709551615, 18446744073709551615, 1474, 1483, 1474, 1483, 257, 258, true, "extracted", "extracted"], ["verb", "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415895640485, 14799993819747716499, 18446744073709551615, 18446744073709551615, 1509, 1512, 1509, 1512, 262, 263, true, "put", "put"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106464587478437030, 6502547855313104919, 18446744073709551615, 18446744073709551615, 346, 353, 346, 353, 55, 57, true, "many of", "many of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 8106478685702231057, 1428751967817183488, 18446744073709551615, 18446744073709551615, 1325, 1332, 1325, 1332, 222, 224, true, "such as", "such as"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625631229034, 542250596720887578, 18446744073709551615, 18446744073709551615, 16, 20, 16, 20, 3, 4, true, "that", "that"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486538, 9094674367373732264, 18446744073709551615, 18446744073709551615, 76, 78, 76, 78, 12, 13, true, "in", "in"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625697843734, 497670111222755023, 18446744073709551615, 18446744073709551615, 120, 124, 120, 124, 18, 19, true, "from", "from"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 12178341415895625940, 14799992967704466108, 18446744073709551615, 18446744073709551615, 133, 136, 133, 136, 20, 21, true, "for", "for"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219234676, 18446744073709551615, 18446744073709551615, 164, 166, 164, 166, 25, 26, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 389609625631229034, 542250596720775319, 18446744073709551615, 18446744073709551615, 275, 279, 275, 279, 46, 47, true, "that", "that"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486538, 9094674367373513839, 18446744073709551615, 18446744073709551615, 343, 345, 343, 345, 54, 55, true, "in", "in"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486538, 9094674367373523345, 18446744073709551615, 18446744073709551615, 385, 387, 385, 387, 63, 64, true, "in", "in"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541486989, 9094674356673776478, 18446744073709551615, 18446744073709551615, 395, 397, 395, 397, 66, 67, true, "by", "by"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219219846, 18446744073709551615, 18446744073709551615, 428, 430, 428, 430, 72, 73, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219227273, 18446744073709551615, 18446744073709551615, 568, 570, 568, 570, 92, 93, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14154242830791309661, 1004085954587590076, 18446744073709551615, 18446744073709551615, 579, 591, 579, 591, 94, 96, true, "across these", "across these"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206560517276114, 15945165859804744982, 18446744073709551615, 18446744073709551615, 667, 673, 667, 673, 107, 109, true, "into a", "into a"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541480354, 9094674546964354786, 18446744073709551615, 18446744073709551615, 723, 725, 723, 725, 115, 116, true, "In", "In"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219198352, 18446744073709551615, 18446744073709551615, 765, 767, 765, 767, 122, 123, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219210972, 18446744073709551615, 18446744073709551615, 799, 801, 799, 801, 128, 129, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206560518651853, 15945529371230903899, 18446744073709551615, 18446744073709551615, 822, 828, 822, 828, 131, 133, true, "in the", "in the"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 329104161786618045, 8725299555592485331, 18446744073709551615, 18446744073709551615, 911, 916, 911, 916, 146, 147, true, "since", "since"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206560518651853, 15945529371230859398, 18446744073709551615, 18446744073709551615, 953, 959, 953, 959, 152, 154, true, "in the", "in the"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541480354, 9094674546964371620, 18446744073709551615, 18446744073709551615, 989, 991, 989, 991, 160, 161, true, "In", "In"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14814148868025447689, 3694567760357366516, 18446744073709551615, 18446744073709551615, 1018, 1026, 1018, 1026, 166, 168, true, "of these", "of these"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219324564, 18446744073709551615, 18446744073709551615, 1078, 1080, 1078, 1080, 176, 177, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206549292198744, 15968280101146838290, 18446744073709551615, 18446744073709551615, 1170, 1176, 1170, 1176, 194, 196, true, "from a", "from a"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219318707, 18446744073709551615, 18446744073709551615, 1185, 1187, 1185, 1187, 197, 198, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 14638857868319795209, 3807143954092066612, 18446744073709551615, 18446744073709551615, 1197, 1205, 1197, 1205, 200, 202, true, "with the", "with the"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206557726458966, 16025464328456092215, 18446744073709551615, 18446744073709551615, 1300, 1306, 1300, 1306, 218, 220, true, "with a", "with a"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206557726458966, 16025464328456099242, 18446744073709551615, 18446744073709551615, 1399, 1405, 1399, 1405, 240, 242, true, "with a", "with a"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485670, 9094674364219303614, 18446744073709551615, 18446744073709551615, 1420, 1422, 1420, 1422, 244, 245, true, "of", "of"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16057368201763467386, 216739275376297295, 18446744073709551615, 18446744073709551615, 1484, 1494, 1484, 1494, 258, 260, true, "from these", "from these"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 5748787292106066554, 4405126515520980867, 18446744073709551615, 18446744073709551615, 1513, 1523, 1513, 1523, 263, 265, true, "these into", "these into"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429163415, 18446744073709551615, 18446744073709551615, 190, 192, 190, 192, 31, 32, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429174755, 18446744073709551615, 18446744073709551615, 268, 270, 268, 270, 44, 45, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429173582, 18446744073709551615, 18446744073709551615, 417, 419, 417, 419, 70, 71, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429146067, 18446744073709551615, 18446744073709551615, 462, 464, 462, 464, 76, 77, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429194340, 18446744073709551615, 18446744073709551615, 632, 634, 632, 634, 101, 102, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 16381206519425733256, 5984372374891954420, 18446744073709551615, 18446744073709551615, 735, 741, 735, 741, 117, 119, true, "to the", "to the"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429185213, 18446744073709551615, 18446744073709551615, 998, 1000, 998, 1000, 162, 163, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429226599, 18446744073709551615, 18446744073709551615, 1150, 1152, 1150, 1152, 191, 192, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429235584, 18446744073709551615, 18446744073709551615, 1275, 1277, 1275, 1277, 214, 215, true, "to", "to"], ["conn", "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, 15441160910541485865, 9094674369429209693, 18446744073709551615, 18446744073709551615, 1391, 1393, 1391, 1393, 238, 239, true, "to", "to"], ["parenthesis", "round brackets", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 329104053210116957, 4933919093561563747, 18446744073709551615, 18446744073709551615, 295, 300, 295, 300, 52, 55, true, "(CCS)", "(CCS)"], ["sentence", "", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 5306542014856411002, 14493189109864111156, 18446744073709551615, 18446744073709551615, 0, 132, 0, 132, 0, 24, true, "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files.", "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files."], ["sentence", "", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 2369217517028793827, 11890147189063173430, 18446744073709551615, 18446744073709551615, 133, 246, 133, 246, 24, 45, true, "The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms.", "The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms."], ["sentence", "", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 12064124790943537514, 9632597734224986436, 18446744073709551615, 18446744073709551615, 247, 375, 247, 375, 45, 69, true, "This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components.", "This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components."], ["sentence", "", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 1805453063572196406, 15284543814810892665, 18446744073709551615, 18446744073709551615, 376, 440, 376, 440, 69, 82, true, "Each of these microservices can be consumed by its own REST API.", "Each of these microservices can be consumed by its own REST API."], ["sentence", "", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 210366145485171616, 10779316463372138244, 18446744073709551615, 18446744073709551615, 441, 606, 441, 606, 82, 109, true, "This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform.", "This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform."], ["sentence", "", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 13863701154380798624, 5607686807400793153, 18446744073709551615, 18446744073709551615, 607, 891, 607, 891, 109, 153, true, "In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures.", "In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures."], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 3741141293805179509, 9675794815446093236, 18446744073709551615, 18446744073709551615, 40, 55, 40, 55, 9, 11, true, "first component", "first component"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 4066887494406769292, 15944572553884562120, 18446744073709551615, 18446744073709551615, 110, 131, 110, 131, 20, 23, true, "structured data files", "structured data files"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15684933964106580812, 12993940953903139083, 18446744073709551615, 18446744073709551615, 208, 225, 208, 225, 40, 42, true, "trainable machine", "trainable machine"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 12638008641667971393, 14590037144173376663, 18446744073709551615, 18446744073709551615, 269, 294, 269, 294, 49, 52, true, "Corpus Conversion Service", "Corpus Conversion Service"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 3812062755894317903, 5752895239615977865, 18446744073709551615, 18446744073709551615, 359, 374, 359, 374, 66, 68, true, "main components", "main components"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 7904009099850099728, 6069321302342300412, 18446744073709551615, 18446744073709551615, 427, 439, 427, 439, 78, 81, true, "own REST API", "own REST API"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14315066823203278267, 5715163301899035549, 18446744073709551615, 18446744073709551615, 483, 500, 483, 500, 90, 92, true, "complex pipelines", "complex pipelines"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 7501920923775581134, 5285457240038734782, 18446744073709551615, 18446744073709551615, 567, 584, 567, 584, 103, 105, true, "new microservices", "new microservices"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 9920918086675479799, 11129371561875838665, 18446744073709551615, 18446744073709551615, 689, 725, 689, 725, 122, 125, true, "asynchronous communication protocols", "asynchronous communication protocols"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 17000938524684089439, 12283057491291530260, 18446744073709551615, 18446744073709551615, 742, 755, 742, 755, 129, 131, true, "many benefits", "many benefits"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 4253886245479866309, 90465651070093109, 18446744073709551615, 18446744073709551615, 773, 799, 773, 799, 136, 139, true, "proper resource management", "proper resource management"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 17671651082391847352, 4285231550406356710, 18446744073709551615, 18446744073709551615, 812, 831, 812, 831, 141, 143, true, "strong dependencies", "strong dependencies"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 239702429653970881, 11301722290661797635, 18446744073709551615, 18446744073709551615, 870, 890, 870, 890, 149, 152, true, "single task failures", "single task failures"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 329104161668023890, 13427899720650205831, 18446744073709551615, 18446744073709551615, 8, 13, 8, 13, 2, 3, true, "paper", "paper"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 6182654480499682241, 9496359210917921791, 18446744073709551615, 18446744073709551615, 61, 70, 61, 70, 13, 14, true, "ingestion", "ingestion"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 6167933651658664291, 16598695715373476800, 18446744073709551615, 18446744073709551615, 74, 83, 74, 83, 15, 16, true, "documents", "documents"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 2703018679320364082, 14545924726949564279, 18446744073709551615, 18446744073709551615, 94, 104, 94, 104, 18, 19, true, "conversion", "conversion"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14635106751859230946, 3899039667786064358, 18446744073709551615, 18446744073709551615, 137, 145, 137, 145, 25, 26, true, "solution", "solution"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14814125365076808131, 9349977279496653565, 18446744073709551615, 18446744073709551615, 176, 184, 176, 184, 33, 34, true, "platform", "platform"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 389609625695918821, 3664761358525422290, 18446744073709551615, 18446744073709551615, 199, 203, 199, 203, 38, 39, true, "core", "core"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15359670209433732834, 1709633722429132795, 18446744073709551615, 18446744073709551615, 235, 245, 235, 245, 43, 44, true, "algorithms", "algorithms"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14814125365076808131, 9349977279496707806, 18446744073709551615, 18446744073709551615, 252, 260, 252, 260, 46, 47, true, "platform", "platform"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 12178341415896221596, 5842744026410738636, 18446744073709551615, 18446744073709551615, 296, 299, 296, 299, 53, 54, true, "CCS", "CCS"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 12178341415895638602, 5842694294408079134, 18446744073709551615, 18446744073709551615, 320, 323, 320, 323, 60, 61, true, "set", "set"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 990358581043194791, 393905999985964694, 18446744073709551615, 18446744073709551615, 327, 340, 327, 340, 62, 63, true, "microservices", "microservices"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 990358581043194791, 393905999985936006, 18446744073709551615, 18446744073709551615, 390, 403, 390, 403, 72, 73, true, "microservices", "microservices"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14650448032998792781, 15963759494992376767, 18446744073709551615, 18446744073709551615, 446, 454, 446, 454, 83, 84, true, "approach", "approach"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 6167933651658664291, 16598695715374233051, 18446744073709551615, 18446744073709551615, 512, 521, 512, 521, 94, 95, true, "documents", "documents"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14814125365076808131, 9349977279496610029, 18446744073709551615, 18446744073709551615, 597, 605, 597, 605, 107, 108, true, "platform", "platform"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 329104161571401725, 13426123714444340915, 18446744073709551615, 18446744073709551615, 610, 615, 610, 615, 110, 111, true, "order", "order"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14814125365076808131, 9349977279496698149, 18446744073709551615, 18446744073709551615, 629, 637, 629, 637, 114, 115, true, "platform", "platform"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 990358581043194791, 393905999985528944, 18446744073709551615, 18446744073709551615, 652, 665, 652, 665, 118, 119, true, "microservices", "microservices"], ["term", "single-term", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14814125365076808131, 9349977279496695017, 18446744073709551615, 18446744073709551615, 846, 854, 846, 854, 146, 147, true, "platform", "platform"], ["verb", "compound-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 8568388710680918302, 1832540720065690143, 18446744073709551615, 18446744073709551615, 18, 32, 18, 32, 5, 7, true, "focus entirely", "focus entirely"], ["verb", "compound-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 5237537207757377628, 6864205941272212007, 18446744073709551615, 18446744073709551615, 149, 167, 149, 167, 27, 30, true, "propose is thought", "propose is thought"], ["verb", "compound-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15903921305565697154, 7448795222128154927, 18446744073709551615, 18446744073709551615, 404, 419, 404, 419, 73, 76, true, "can be consumed", "can be consumed"], ["verb", "compound-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 8944903948136983007, 3100279804263702344, 18446744073709551615, 18446744073709551615, 666, 680, 666, 680, 119, 121, true, "are integrated", "are integrated"], ["verb", "compound-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 7780068026497460305, 562602692899396130, 18446744073709551615, 18446744073709551615, 760, 772, 760, 772, 133, 136, true, "allows to do", "allows to do"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 12178341415895601584, 5841349058796574805, 18446744073709551615, 18446744073709551615, 204, 207, 204, 207, 39, 40, true, "has", "has"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14639581097006750428, 4101766079705362430, 18446744073709551615, 18446744073709551615, 226, 234, 226, 234, 42, 43, true, "learning", "learning"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 16381206563350835754, 15338244529159273971, 18446744073709551615, 18446744073709551615, 262, 268, 262, 268, 48, 49, true, "called", "called"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14652282307475037790, 8362404979343840295, 18446744073709551615, 18446744073709551615, 302, 310, 302, 310, 56, 57, true, "consists", "consists"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 6167774653473311671, 8932714637044289580, 18446744073709551615, 18446744073709551615, 341, 350, 341, 350, 63, 64, true, "organized", "organized"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 16381206569317834029, 15127822949531520780, 18446744073709551615, 18446744073709551615, 464, 470, 464, 470, 86, 87, true, "allows", "allows"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 329104159303279946, 13502145352581782916, 18446744073709551615, 18446744073709551615, 477, 482, 477, 482, 89, 90, true, "build", "build"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 8106476000254393164, 1725287517912256023, 18446744073709551615, 18446744073709551615, 504, 511, 504, 511, 93, 94, true, "process", "process"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 16381206569317834029, 15127822949531294179, 18446744073709551615, 18446744073709551615, 546, 552, 546, 552, 99, 100, true, "allows", "allows"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 8106396517344986388, 5854485364096172979, 18446744073709551615, 18446744073709551615, 559, 566, 559, 566, 102, 103, true, "develop", "develop"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 389609625618412480, 3672855485569275414, 18446744073709551615, 18446744073709551615, 619, 623, 619, 623, 112, 113, true, "make", "make"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 329104159209890617, 13606843864069204390, 18446744073709551615, 18446744073709551615, 733, 738, 733, 738, 127, 128, true, "gives", "gives"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 5305301449677211216, 8681985492456152514, 18446744073709551615, 18446744073709551615, 801, 811, 801, 811, 140, 141, true, "eliminates", "eliminates"], ["verb", "single-verb", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 329104161505838030, 13472448784809337111, 18446744073709551615, 18446744073709551615, 836, 841, 836, 841, 144, 145, true, "makes", "makes"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 1993790582685692910, 3267300742396852093, 18446744073709551615, 18446744073709551615, 855, 869, 855, 869, 147, 149, true, "robust against", "robust against"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 8106396862006371970, 13009000795262405678, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 2, true, "In this", "In this"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 16381206566339127348, 15334506191466791715, 18446744073709551615, 18446744073709551615, 33, 39, 33, 39, 7, 9, true, "on the", "on the"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485670, 4857876500911665887, 18446744073709551615, 18446744073709551615, 71, 73, 71, 73, 14, 15, true, "of", "of"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 389609625698622943, 3653991554605439637, 18446744073709551615, 18446744073709551615, 105, 109, 105, 109, 19, 20, true, "into", "into"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485670, 4857876500911708855, 18446744073709551615, 18446744073709551615, 168, 170, 168, 170, 30, 31, true, "of", "of"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 389609625700764258, 3654402694655081504, 18446744073709551615, 18446744073709551615, 171, 175, 171, 175, 31, 33, true, "as a", "as a"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 14638855195670894879, 12124056112419286236, 18446744073709551615, 18446744073709551615, 186, 194, 186, 194, 35, 37, true, "which at", "which at"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 12178341415895623120, 5842693827432037020, 18446744073709551615, 18446744073709551615, 311, 314, 311, 314, 57, 58, true, "out", "out"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 389609625620237736, 3672771697496836670, 18446744073709551615, 18446744073709551615, 315, 319, 315, 319, 58, 60, true, "of a", "of a"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485670, 4857876500911649386, 18446744073709551615, 18446744073709551615, 324, 326, 324, 326, 61, 62, true, "of", "of"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541486538, 4857876073127839401, 18446744073709551615, 18446744073709551615, 351, 353, 351, 353, 64, 65, true, "in", "in"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 13852121904094090198, 14590995273314953312, 18446744073709551615, 18446744073709551615, 376, 389, 376, 389, 69, 72, true, "Each of these", "Each of these"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541486989, 4857876114906482442, 18446744073709551615, 18446744073709551615, 420, 422, 420, 422, 76, 77, true, "by", "by"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 752127337293867046, 13713074507145666172, 18446744073709551615, 18446744073709551615, 585, 596, 585, 596, 105, 107, true, "against the", "against the"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541480354, 4857876037199396344, 18446744073709551615, 18446744073709551615, 607, 609, 607, 609, 109, 110, true, "In", "In"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 8106478041484051995, 2311188108209868134, 18446744073709551615, 18446744073709551615, 681, 688, 681, 688, 121, 122, true, "through", "through"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485865, 4857876500092540787, 18446744073709551615, 18446744073709551615, 474, 476, 474, 476, 88, 89, true, "to", "to"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485865, 4857876500092539243, 18446744073709551615, 18446744073709551615, 501, 503, 501, 503, 92, 93, true, "to", "to"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485865, 4857876500092543847, 18446744073709551615, 18446744073709551615, 556, 558, 556, 558, 101, 102, true, "to", "to"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485865, 4857876500092547312, 18446744073709551615, 18446744073709551615, 616, 618, 616, 618, 111, 112, true, "to", "to"], ["conn", "single-conn", 3749305213430885773, "TEXT", "#/texts/12", 1.0, 15441160910541485865, 4857876500092426670, 18446744073709551615, 18446744073709551615, 767, 769, 767, 769, 134, 135, true, "to", "to"], ["numval", "ival", 3409470577915009676, "TEXT", "#/texts/13", 1.0, 17767354399704235162, 16337218082829608086, 18446744073709551615, 18446744073709551615, 142, 143, 142, 143, 27, 28, true, "2", "2"], ["expression", "word-concatenation", 3409470577915009676, "TEXT", "#/texts/13", 1.0, 5044385734724420019, 14795950652192688492, 18446744073709551615, 18446744073709551615, 175, 191, 175, 191, 34, 35, true, "state-of-the-art", "state-of-the-art"], ["numval", "ival", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 17767354399704235163, 1719697440342653142, 18446744073709551615, 18446744073709551615, 33, 34, 33, 34, 5, 6, true, "3", "3"], ["numval", "ival", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 17767354399704235156, 1719697438695412307, 18446744073709551615, 18446744073709551615, 105, 106, 105, 106, 20, 21, true, "4", "4"], ["numval", "ival", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 17767354399704235157, 1719697440128552642, 18446744073709551615, 18446744073709551615, 301, 302, 301, 302, 58, 59, true, "5", "5"], ["parenthesis", "round brackets", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 12960504640524214008, 7549890404163577655, 18446744073709551615, 18446744073709551615, 216, 243, 216, 243, 41, 48, true, "(both in users and content)", "(both in users and content)"], ["expression", "wtoken-concatenation", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 329104161622136223, 9304407318657891408, 18446744073709551615, 18446744073709551615, 334, 339, 334, 339, 65, 66, true, "w.r.t", "w.r.t"], ["sentence", "", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 13562905925698502846, 15259172910127558115, 18446744073709551615, 18446744073709551615, 22, 93, 22, 93, 3, 18, true, "In Section 3, we present the design of the platform and its components.", "In Section 3, we present the design of the platform and its components."], ["sentence", "", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 1592324435600311370, 6679784309178480570, 18446744073709551615, 18446744073709551615, 94, 280, 94, 280, 18, 54, true, "In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively.", "In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively."], ["sentence", "", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 18017856606572388707, 11119000415778134338, 18446744073709551615, 18446744073709551615, 281, 340, 281, 340, 54, 67, true, "Finally, in Section 5, we discuss the open questions w.r.t.", "Finally, in Section 5, we discuss the open questions w.r.t."], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 2969135035769345619, 4036418751139797908, 18446744073709551615, 18446744073709551615, 0, 20, 0, 20, 0, 2, true, "processing solutions", "processing solutions"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 7362111305564357210, 14399545547382599450, 18446744073709551615, 18446744073709551615, 141, 159, 141, 159, 28, 30, true, "deployment methods", "deployment methods"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 14592782398836220527, 13536879568188234094, 18446744073709551615, 18446744073709551615, 178, 193, 178, 193, 35, 37, true, "platform scales", "platform scales"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 4421383392096991748, 6284876151106966992, 18446744073709551615, 18446744073709551615, 248, 265, 248, 265, 49, 51, true, "compute resources", "compute resources"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8051609034415273401, 10487247228020021805, 18446744073709551615, 18446744073709551615, 319, 333, 319, 333, 63, 65, true, "open questions", "open questions"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8935218952277810589, 8068414338192124611, 18446744073709551615, 18446744073709551615, 354, 373, 354, 373, 69, 72, true, "possible next steps", "possible next steps"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106352240078799135, 10120178145746215787, 18446744073709551615, 18446744073709551615, 25, 32, 25, 32, 4, 5, true, "Section", "Section"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 16381206568241679420, 8738387660838128289, 18446744073709551615, 18446744073709551615, 51, 57, 51, 57, 10, 11, true, "design", "design"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 14814125365076808131, 2092259178040575550, 18446744073709551615, 18446744073709551615, 65, 73, 65, 73, 13, 14, true, "platform", "platform"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 2703018952916355661, 11574998382432588793, 18446744073709551615, 18446744073709551615, 82, 92, 82, 92, 16, 17, true, "components", "components"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106352240078799135, 10120178145746219109, 18446744073709551615, 18446744073709551615, 97, 104, 97, 104, 19, 20, true, "Section", "Section"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 11899564443746965611, 8824822780498807299, 18446744073709551615, 18446744073709551615, 123, 135, 123, 135, 25, 26, true, "architecture", "architecture"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 16381206521526353544, 8079494218851857408, 18446744073709551615, 18446744073709551615, 199, 205, 199, 205, 38, 39, true, "regard", "regard"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 16381206519640398140, 414969871814550286, 18446744073709551615, 18446744073709551615, 209, 215, 209, 215, 40, 41, true, "volume", "volume"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 329104159157820437, 13127829657860064361, 18446744073709551615, 18446744073709551615, 225, 230, 225, 230, 44, 45, true, "users", "users"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106398484416916345, 11293518884131724477, 18446744073709551615, 18446744073709551615, 235, 242, 235, 242, 46, 47, true, "content", "content"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106352240078799135, 10120178145746003140, 18446744073709551615, 18446744073709551615, 293, 300, 293, 300, 57, 58, true, "Section", "Section"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 14634109233387695059, 13015968863509180771, 18446744073709551615, 18446744073709551615, 341, 349, 341, 349, 67, 68, true, "research", "research"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 1525875096007260836, 12021364178741137402, 18446744073709551615, 18446744073709551615, 381, 392, 381, 392, 74, 75, true, "development", "development"], ["term", "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 14814125365076808131, 2092259178040758447, 18446744073709551615, 18446744073709551615, 400, 408, 400, 408, 77, 78, true, "platform", "platform"], ["verb", "single-verb", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106476016677076976, 9844196961628278464, 18446744073709551615, 18446744073709551615, 39, 46, 39, 46, 8, 9, true, "present", "present"], ["verb", "single-verb", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106397868479560363, 8170627791942563001, 18446744073709551615, 18446744073709551615, 111, 118, 111, 118, 23, 24, true, "discuss", "discuss"], ["verb", "single-verb", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106397868479560363, 8170627791941832362, 18446744073709551615, 18446744073709551615, 307, 314, 307, 314, 61, 62, true, "discuss", "discuss"], ["verb", "single-verb", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 329104161622136223, 9304407318657891408, 18446744073709551615, 18446744073709551615, 334, 339, 334, 339, 65, 66, true, "w.r.t", "w.r.t"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 15441160910541480354, 13110915667349394507, 18446744073709551615, 18446744073709551615, 22, 24, 22, 24, 3, 4, true, "In", "In"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 16381206565712212855, 8774362010989401403, 18446744073709551615, 18446744073709551615, 58, 64, 58, 64, 11, 13, true, "of the", "of the"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 15441160910541480354, 13110915667349571689, 18446744073709551615, 18446744073709551615, 94, 96, 94, 96, 18, 19, true, "In", "In"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 389609625618037948, 4823273682945992581, 18446744073709551615, 18446744073709551615, 194, 198, 194, 198, 37, 38, true, "with", "with"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 8106396909821462677, 1088815676641410103, 18446744073709551615, 18446744073709551615, 217, 224, 217, 224, 42, 44, true, "both in", "both in"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 15441160910541486538, 13110916059597983243, 18446744073709551615, 18446744073709551615, 290, 292, 290, 292, 56, 57, true, "in", "in"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 16381206560518651853, 668849598704261767, 18446744073709551615, 18446744073709551615, 374, 380, 374, 380, 72, 74, true, "in the", "in the"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 16381206565712212855, 8774362010989370225, 18446744073709551615, 18446744073709551615, 393, 399, 393, 399, 75, 77, true, "of the", "of the"], ["conn", "single-conn", 17187299362680072378, "TEXT", "#/texts/14", 1.0, 15441160910541485865, 13110915963300809577, 18446744073709551615, 18446744073709551615, 206, 208, 206, 208, 39, 40, true, "to", "to"], ["numval", "ival", 697648145931166262, "TEXT", "#/texts/15", 1.0, 17767354399704235162, 7083995155582974975, 18446744073709551615, 18446744073709551615, 0, 1, 0, 1, 0, 1, true, "2", "2"], ["numval", "ival", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 17767354399704235163, 2552838057434759723, 18446744073709551615, 18446744073709551615, 130, 131, 130, 131, 20, 21, true, "3", "3"], ["numval", "ival", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 17767354399704235156, 2552838057671732941, 18446744073709551615, 18446744073709551615, 133, 134, 133, 134, 22, 23, true, "4", "4"], ["parenthesis", "square brackets", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206577288742091, 6894361769431189204, 18446744073709551615, 18446744073709551615, 129, 135, 129, 135, 19, 24, true, "[3, 4]", "[3, 4]"], ["expression", "common", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541486545, 16301782680726802891, 18446744073709551615, 18446744073709551615, 558, 562, 558, 562, 101, 102, true, "ie", "i.e."], ["expression", "word-concatenation", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14650469740546809126, 13134297167790756810, 18446744073709551615, 18446744073709551615, 741, 749, 741, 749, 135, 136, true, "JSON/XML", "JSON/XML"], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 7413762744011699502, 5112650843480238838, 18446744073709551615, 18446744073709551615, 0, 136, 0, 136, 0, 25, true, "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4].", "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4]."], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 5916877018351655351, 15887401132590714495, 18446744073709551615, 18446744073709551615, 137, 205, 137, 205, 25, 38, true, "Broadly speaking, there are two types of approaches to this problem.", "Broadly speaking, there are two types of approaches to this problem."], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 6270962906961324285, 7804853914853957296, 18446744073709551615, 18446744073709551615, 206, 359, 206, 359, 38, 66, true, "In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document.", "In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document."], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 1323125914001755357, 2708959011598697473, 18446744073709551615, 18446744073709551615, 360, 443, 360, 443, 66, 83, true, "This can be done through a conversion from PDF towards HTML or MS Word for example.", "This can be done through a conversion from PDF towards HTML or MS Word for example."], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 7959677268287021834, 6471587455066159969, 18446744073709551615, 18446744073709551615, 444, 711, 444, 711, 83, 128, true, "The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format.", "The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format."], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 9906268904976001851, 12420000417227776440, 18446744073709551615, 18446744073709551615, 712, 780, 712, 780, 128, 142, true, "For example, this could be a JSON/XML file with a particular schema.", "For example, this could be a JSON/XML file with a particular schema."], ["sentence", "", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 9067254065901696428, 10388894100200420496, 18446744073709551615, 18446744073709551615, 781, 955, 781, 955, 142, 173, true, "Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution.", "Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution."], ["term", "enum-term-mark-4", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 2074372556278321470, 3687797441781668801, 18446744073709551615, 18446744073709551615, 415, 430, 415, 430, 76, 80, true, "HTML or MS Word", "HTML or MS Word"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12653831733608918357, 1251885133784117773, 18446744073709551615, 18446744073709551615, 23, 36, 23, 36, 4, 6, true, "PDF documents", "PDF documents"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 1649772470814702484, 1849781250727403708, 18446744073709551615, 18446744073709551615, 41, 73, 41, 73, 7, 10, true, "automatic content reconstruction", "automatic content reconstruction"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 4649638595618642234, 17675128594551486840, 18446744073709551615, 18446744073709551615, 86, 105, 86, 105, 13, 15, true, "outstanding problem", "outstanding problem"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 9088977435888678827, 7025359603537163328, 18446744073709551615, 18446744073709551615, 213, 227, 213, 227, 40, 42, true, "first approach", "first approach"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 5396697874491186037, 9700463201577231321, 18446744073709551615, 18446744073709551615, 320, 342, 320, 342, 59, 62, true, "original visual layout", "original visual layout"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106471324341093100, 10896171766474086033, 18446744073709551615, 18446744073709551615, 423, 430, 423, 430, 78, 80, true, "MS Word", "MS Word"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 10632085908481842480, 3848207310545898370, 18446744073709551615, 18446744073709551615, 448, 472, 448, 472, 84, 87, true, "second approach attempts", "second approach attempts"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 11738704476441755021, 15052719376970997774, 18446744073709551615, 18446744073709551615, 670, 687, 670, 687, 121, 123, true, "original document", "original document"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14630472899120924944, 11642528133024722414, 18446744073709551615, 18446744073709551615, 693, 710, 693, 710, 125, 127, true, "structured format", "structured format"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 673611805924135293, 4470122145607424586, 18446744073709551615, 18446744073709551615, 741, 754, 741, 754, 135, 137, true, "JSON/XML file", "JSON/XML file"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 3982493928589580498, 8690888332062541868, 18446744073709551615, 18446744073709551615, 762, 779, 762, 779, 139, 141, true, "particular schema", "particular schema"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12638008641667971393, 2648999888003643003, 18446744073709551615, 18446744073709551615, 791, 816, 791, 816, 144, 147, true, "Corpus Conversion Service", "Corpus Conversion Service"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 5385563887835458888, 7655934123629815969, 18446744073709551615, 18446744073709551615, 836, 846, 836, 846, 152, 154, true, "first step", "first step"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 13157956405326233364, 1973865905648942248, 18446744073709551615, 18446744073709551615, 857, 885, 857, 885, 156, 159, true, "knowledge discovery platform", "knowledge discovery platform"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 2940970869648856259, 4641698687139622359, 18446744073709551615, 18446744073709551615, 923, 938, 923, 938, 167, 169, true, "second approach", "second approach"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625631210899, 11923225543122978149, 18446744073709551615, 18446744073709551615, 4, 8, 4, 8, 1, 2, true, "task", "task"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106396543030413423, 13754047564658723918, 18446744073709551615, 18446744073709551615, 121, 128, 121, 128, 18, 19, true, "decades", "decades"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 329104159243796903, 14986068250266130028, 18446744073709551615, 18446744073709551615, 169, 174, 169, 174, 31, 32, true, "types", "types"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15361660588616680195, 14902329689287095849, 18446744073709551615, 18446744073709551615, 178, 188, 178, 188, 33, 34, true, "approaches", "approaches"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106476000253296785, 13943577597598710603, 18446744073709551615, 18446744073709551615, 197, 204, 197, 204, 36, 37, true, "problem", "problem"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 6167933651658664291, 4335834381970813744, 18446744073709551615, 18446744073709551615, 229, 238, 229, 238, 43, 44, true, "documents", "documents"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625699055241, 11924235864545270440, 18446744073709551615, 18446744073709551615, 262, 266, 262, 266, 48, 49, true, "goal", "goal"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106398484416916345, 14750854907318105695, 18446744073709551615, 18446744073709551615, 284, 291, 284, 291, 52, 53, true, "content", "content"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14650401089286948001, 3940465185316459202, 18446744073709551615, 18446744073709551615, 350, 358, 350, 358, 64, 65, true, "document", "document"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 2703018679320364082, 9140199708408016140, 18446744073709551615, 18446744073709551615, 387, 397, 387, 397, 72, 73, true, "conversion", "conversion"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12178341415896289890, 5663892799429042575, 18446744073709551615, 18446744073709551615, 403, 406, 403, 406, 74, 75, true, "PDF", "PDF"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625536535062, 4886157403217231277, 18446744073709551615, 18446744073709551615, 415, 419, 415, 419, 76, 77, true, "HTML", "HTML"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106397496085150773, 18393269004923492619, 18446744073709551615, 18446744073709551615, 435, 442, 435, 442, 81, 82, true, "example", "example"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14650401089286948001, 3940465185316402512, 18446744073709551615, 18446744073709551615, 488, 496, 488, 496, 90, 91, true, "document", "document"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206548538896813, 5095866240506163040, 18446744073709551615, 18446744073709551615, 504, 510, 504, 510, 93, 94, true, "format", "format"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 11387678566946341343, 17509744267177528169, 18446744073709551615, 18446744073709551615, 565, 579, 565, 579, 103, 104, true, "representation", "representation"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14650401089286948001, 3940465185316385541, 18446744073709551615, 18446744073709551615, 587, 595, 587, 595, 106, 107, true, "document", "document"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206590620761857, 2909000255032340916, 18446744073709551615, 18446744073709551615, 624, 630, 624, 630, 112, 113, true, "layout", "layout"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106398484416916345, 14750854907318660619, 18446744073709551615, 18446744073709551615, 653, 660, 653, 660, 118, 119, true, "content", "content"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106397496085150773, 18393269004923502689, 18446744073709551615, 18446744073709551615, 716, 723, 716, 723, 129, 130, true, "example", "example"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 6167933651658664291, 4335834381973654488, 18446744073709551615, 18446744073709551615, 890, 899, 890, 899, 160, 161, true, "documents", "documents"], ["term", "single-term", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14635106751859230946, 4735627980056120373, 18446744073709551615, 18446744073709551615, 946, 954, 946, 954, 171, 172, true, "solution", "solution"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14637910066599595367, 12407342884124908229, 18446744073709551615, 18446744073709551615, 74, 82, 74, 82, 10, 12, true, "has been", "has been"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 11306777524314851869, 10312513804613748064, 18446744073709551615, 18446744073709551615, 239, 252, 239, 252, 44, 46, true, "are converted", "are converted"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14892762836290175599, 11884720085873805949, 18446744073709551615, 18446744073709551615, 365, 376, 365, 376, 67, 70, true, "can be done", "can be done"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 5725510425841190313, 6793581731148837080, 18446744073709551615, 18446744073709551615, 516, 556, 516, 556, 95, 100, true, "can be easily processed programmatically", "can be easily processed programmatically"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12324009607163840510, 17415295047313605608, 18446744073709551615, 18446744073709551615, 602, 619, 602, 619, 108, 111, true, "is not preserving", "is not preserving"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15603860853961168192, 1871538778850020675, 18446744073709551615, 18446744073709551615, 817, 827, 817, 827, 147, 149, true, "is thought", "is thought"], ["verb", "compound-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 5518720680045185536, 10887625114734223201, 18446744073709551615, 18446744073709551615, 904, 914, 904, 914, 163, 165, true, "have opted", "have opted"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 2703018679320640424, 9140221220687091912, 18446744073709551615, 18446744073709551615, 12, 22, 12, 22, 3, 4, true, "converting", "converting"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14635107222397821279, 10264365568526316164, 18446744073709551615, 18446744073709551615, 145, 153, 145, 153, 26, 27, true, "speaking", "speaking"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12178341415895564896, 5663898048848086764, 18446744073709551615, 18446744073709551615, 161, 164, 161, 164, 29, 30, true, "are", "are"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 6168331468892821959, 4002607006832828199, 18446744073709551615, 18446744073709551615, 270, 279, 270, 279, 50, 51, true, "represent", "represent"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106398484416229602, 14750847589478427809, 18446744073709551615, 18446744073709551615, 476, 483, 476, 483, 88, 89, true, "convert", "convert"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541486545, 16301782680726802891, 18446744073709551615, 18446744073709551615, 558, 562, 558, 562, 101, 102, true, "ie", "i.e."], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14652282307552191074, 15854614648765054100, 18446744073709551615, 18446744073709551615, 636, 644, 636, 644, 115, 116, true, "contains", "contains"], ["verb", "single-verb", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14652284122054107911, 17209935458057025089, 18446744073709551615, 18446744073709551615, 730, 738, 730, 738, 132, 134, true, "could be", "could be"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14652285297651805672, 1311477044897965993, 18446744073709551615, 18446744073709551615, 295, 303, 295, 303, 54, 56, true, "close as", "close as"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541485670, 16301784280431244556, 18446744073709551615, 18446744073709551615, 9, 11, 9, 11, 2, 3, true, "of", "of"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12178341415895625940, 5663899155610812530, 18446744073709551615, 18446744073709551615, 106, 109, 106, 109, 15, 16, true, "for", "for"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625618865305, 11914918762736598377, 18446744073709551615, 18446744073709551615, 110, 114, 110, 114, 16, 17, true, "over", "over"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541485670, 16301784280431329095, 18446744073709551615, 18446744073709551615, 175, 177, 175, 177, 32, 33, true, "of", "of"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16380809977974811061, 2944931246915770910, 18446744073709551615, 18446744073709551615, 206, 212, 206, 212, 38, 40, true, "In the", "In the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14638857868319795209, 15171163761431783385, 18446744073709551615, 18446744073709551615, 253, 261, 253, 261, 46, 48, true, "with the", "with the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206565712212855, 13403044955273571907, 18446744073709551615, 18446744073709551615, 343, 349, 343, 349, 62, 64, true, "of the", "of the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 3505887731517758060, 7420350098754473395, 18446744073709551615, 18446744073709551615, 377, 386, 377, 386, 70, 72, true, "through a", "through a"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625697843734, 11917857471064369379, 18446744073709551615, 18446744073709551615, 398, 402, 398, 402, 73, 74, true, "from", "from"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106351183251325893, 13733355420818968171, 18446744073709551615, 18446744073709551615, 407, 414, 407, 414, 75, 76, true, "towards", "towards"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12178341415895625940, 5663899155610803742, 18446744073709551615, 18446744073709551615, 431, 434, 431, 434, 80, 81, true, "for", "for"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206560517276114, 13525661071352042922, 18446744073709551615, 18446744073709551615, 497, 503, 497, 503, 91, 93, true, "into a", "into a"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206565712212855, 13403044955274076230, 18446744073709551615, 18446744073709551615, 580, 586, 580, 586, 104, 106, true, "of the", "of the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 14637917359887717745, 184091670553687293, 18446744073709551615, 18446744073709551615, 661, 669, 661, 669, 119, 121, true, "from the", "from the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625698530964, 11924245681540490403, 18446744073709551615, 18446744073709551615, 688, 692, 688, 692, 123, 125, true, "in a", "in a"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12178341415896108722, 5663904553857312325, 18446744073709551615, 18446744073709551615, 712, 715, 712, 715, 128, 129, true, "For", "For"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206557726458966, 13428361439789699478, 18446744073709551615, 18446744073709551615, 755, 761, 755, 761, 137, 139, true, "with a", "with a"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 329104162323265917, 13893873386515814556, 18446744073709551615, 18446744073709551615, 781, 786, 781, 786, 142, 143, true, "Since", "Since"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541485670, 16301784280431091326, 18446744073709551615, 18446744073709551615, 828, 830, 828, 830, 149, 150, true, "of", "of"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 389609625700764258, 11918714484856028265, 18446744073709551615, 18446744073709551615, 831, 835, 831, 835, 150, 152, true, "as a", "as a"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 3512299892331381400, 9603465650093366657, 18446744073709551615, 18446744073709551615, 847, 856, 847, 856, 154, 156, true, "towards a", "towards a"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 12178341415895625940, 5663899155610829905, 18446744073709551615, 18446744073709551615, 886, 889, 886, 889, 159, 160, true, "for", "for"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106397727991264470, 13733498763290197426, 18446744073709551615, 18446744073709551615, 915, 922, 915, 922, 165, 167, true, "for the", "for the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541486538, 16301782677078975107, 18446744073709551615, 18446744073709551615, 939, 941, 939, 941, 169, 170, true, "in", "in"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 8106351192298715310, 13572120365031968055, 18446744073709551615, 18446744073709551615, 189, 196, 189, 196, 34, 36, true, "to this", "to this"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541485865, 16301784282097407071, 18446744073709551615, 18446744073709551615, 267, 269, 267, 269, 49, 50, true, "to", "to"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 16381206519425733256, 5705382060992960144, 18446744073709551615, 18446744073709551615, 313, 319, 313, 319, 57, 59, true, "to the", "to the"], ["conn", "single-conn", 7935233310532930917, "TEXT", "#/texts/16", 1.0, 15441160910541485865, 16301784282097404239, 18446744073709551615, 18446744073709551615, 473, 475, 473, 475, 87, 88, true, "to", "to"], ["numval", "ival", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 17767354399704235163, 4202447182575023190, 18446744073709551615, 18446744073709551615, 146, 147, 146, 147, 23, 24, true, "3", "3"], ["numval", "ival", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 17767354399704235158, 4202447181925614568, 18446744073709551615, 18446744073709551615, 231, 232, 231, 232, 38, 39, true, "6", "6"], ["expression", "word-concatenation", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 17169426750435213530, 6067173531611560767, 18446744073709551615, 18446744073709551615, 112, 123, 112, 123, 18, 19, true, "open-source", "open-source"], ["expression", "word-concatenation", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 17169426750435213530, 6067173531611378187, 18446744073709551615, 18446744073709551615, 270, 281, 270, 281, 46, 47, true, "open-source", "open-source"], ["expression", "wtoken-concatenation", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 2465266896652791125, 1865592824578355441, 18446744073709551615, 18446744073709551615, 152, 164, 152, 164, 25, 26, true, "Tabula^{4}", "Tabula$^{4}$"], ["expression", "wtoken-concatenation", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 14650455842370127505, 14507811374734524059, 18446744073709551615, 18446744073709551615, 212, 222, 212, 222, 35, 36, true, "Abby^{5}", "Abby$^{5}$"], ["expression", "wtoken-concatenation", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 9454541915838194653, 7851877499332286379, 18446744073709551615, 18446744073709551615, 236, 249, 236, 249, 40, 41, true, "DataCap^{7}", "DataCap$^{7}$"], ["sentence", "", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 9662107284464852524, 16955385700801276233, 18446744073709551615, 18446744073709551615, 0, 90, 0, 90, 0, 14, true, "Many solutions have already been developed that tackle the problem of document conversion.", "Many solutions have already been developed that tackle the problem of document conversion."], ["sentence", "", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 17696925929616730455, 10132403727341574990, 18446744073709551615, 18446744073709551615, 91, 165, 91, 165, 14, 27, true, "There are well known open-source programs such as Xpdf 3 and Tabula$^{4}$.", "There are well known open-source programs such as Xpdf 3 and Tabula$^{4}$."], ["sentence", "", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 9728244801126698359, 3329400039246394270, 18446744073709551615, 18446744073709551615, 166, 250, 166, 250, 27, 42, true, "There are also proprietary solutions, such as Abby$^{5}$, Nuance 6 or DataCap$^{7}$.", "There are also proprietary solutions, such as Abby$^{5}$, Nuance 6 or DataCap$^{7}$."], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 344850509042108766, 1631224707378500329, 18446744073709551615, 18446744073709551615, 0, 14, 0, 14, 0, 2, true, "Many solutions", "Many solutions"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 17523665367407867975, 16853212651814283089, 18446744073709551615, 18446744073709551615, 70, 89, 70, 89, 11, 13, true, "document conversion", "document conversion"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 9901611344946969001, 12100342176073920155, 18446744073709551615, 18446744073709551615, 112, 132, 112, 132, 18, 20, true, "open-source programs", "open-source programs"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 1906608866734760053, 9360530039227743648, 18446744073709551615, 18446744073709551615, 181, 202, 181, 202, 30, 32, true, "proprietary solutions", "proprietary solutions"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 10744767603644001295, 6556612560521384278, 18446744073709551615, 18446744073709551615, 270, 291, 270, 291, 46, 48, true, "open-source solutions", "open-source solutions"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 3984902188979412540, 2758396569987389277, 18446744073709551615, 18446744073709551615, 303, 332, 303, 332, 51, 54, true, "proprietary solutions support", "proprietary solutions support"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 8106476000253296785, 7348997524064119191, 18446744073709551615, 18446744073709551615, 59, 66, 59, 66, 9, 10, true, "problem", "problem"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 389609625540497480, 14650540096238819920, 18446744073709551615, 18446744073709551615, 141, 145, 141, 145, 22, 23, true, "Xpdf", "Xpdf"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 2465266896652791125, 1865592824578355441, 18446744073709551615, 18446744073709551615, 152, 164, 152, 164, 25, 26, true, "Tabula^{4}", "Tabula$^{4}$"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 14650455842370127505, 14507811374734524059, 18446744073709551615, 18446744073709551615, 212, 222, 212, 222, 35, 36, true, "Abby^{5}", "Abby$^{5}$"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 16381206554997419890, 18238264595526178929, 18446744073709551615, 18446744073709551615, 224, 230, 224, 230, 37, 38, true, "Nuance", "Nuance"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 9454541915838194653, 7851877499332286379, 18446744073709551615, 18446744073709551615, 236, 249, 236, 249, 40, 41, true, "DataCap^{7}", "DataCap$^{7}$"], ["term", "single-term", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 14652282307509823780, 1841895394656946508, 18446744073709551615, 18446744073709551615, 254, 262, 254, 262, 43, 44, true, "contrast", "contrast"], ["verb", "compound-verb", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 48606693242407270, 11184224818946054040, 18446744073709551615, 18446744073709551615, 15, 42, 15, 42, 2, 6, true, "have already been developed", "have already been developed"], ["verb", "compound-verb", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 13108247384673291996, 6754522763130422012, 18446744073709551615, 18446744073709551615, 97, 111, 97, 111, 15, 18, true, "are well known", "are well known"], ["verb", "compound-verb", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 14650447942981204525, 15164378737851800332, 18446744073709551615, 18446744073709551615, 172, 180, 172, 180, 28, 30, true, "are also", "are also"], ["verb", "single-verb", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 16381206513070605866, 8871660528632519573, 18446744073709551615, 18446744073709551615, 48, 54, 48, 54, 7, 8, true, "tackle", "tackle"], ["conn", "single-conn", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 8106478685702231057, 14269895558410210677, 18446744073709551615, 18446744073709551615, 133, 140, 133, 140, 20, 22, true, "such as", "such as"], ["conn", "single-conn", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 8106478685702231057, 14269895558410270685, 18446744073709551615, 18446744073709551615, 204, 211, 204, 211, 33, 35, true, "such as", "such as"], ["conn", "single-conn", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 15441160910541485670, 9244456599053778727, 18446744073709551615, 18446744073709551615, 67, 69, 67, 69, 10, 11, true, "of", "of"], ["conn", "single-conn", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 15441160910541480354, 9244457284230350460, 18446744073709551615, 18446744073709551615, 251, 253, 251, 253, 42, 43, true, "In", "In"], ["conn", "single-conn", 2762070725424637531, "TEXT", "#/texts/17", 1.0, 16381206519425733256, 6316528514956069057, 18446744073709551615, 18446744073709551615, 263, 269, 263, 269, 44, 46, true, "to the", "to the"], ["parenthesis", "reference", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 12178341415895551530, 7007012457190989418, 18446744073709551615, 18446744073709551615, 299, 302, 299, 302, 49, 50, true, "[1]", "[1]"], ["expression", "wtoken-concatenation", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 12178341415895551530, 7007012457190989418, 18446744073709551615, 18446744073709551615, 299, 302, 299, 302, 49, 50, true, "[1]", "[1]"], ["sentence", "", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 431376792375667906, 14327044623450240949, 18446744073709551615, 18446744073709551615, 35, 161, 35, 161, 5, 25, true, "Besides the well known opensource and proprietary solutions, there are also countless academic solutions as well as libraries.", "Besides the well known opensource and proprietary solutions, there are also countless academic solutions as well as libraries."], ["sentence", "", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 5097963969909323475, 6200497828341591337, 18446744073709551615, 18446744073709551615, 162, 298, 162, 298, 25, 49, true, "For example, the challenge of segmenting complex page layouts is actively addressed by recurring competitions posed by ICDAR, as in Ref.", "For example, the challenge of segmenting complex page layouts is actively addressed by recurring competitions posed by ICDAR, as in Ref."], ["sentence", "", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 3625891827316137320, 6327186273637832744, 18446744073709551615, 18446744073709551615, 299, 325, 299, 325, 49, 54, true, "[1] and previous editions.", "[1] and previous editions."], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 1906608866734760053, 3011555850208054480, 18446744073709551615, 18446744073709551615, 73, 94, 73, 94, 11, 13, true, "proprietary solutions", "proprietary solutions"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 13555405798633159070, 12440619026730950763, 18446744073709551615, 18446744073709551615, 111, 139, 111, 139, 17, 20, true, "countless academic solutions", "countless academic solutions"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 224884857456525093, 8800751904675089400, 18446744073709551615, 18446744073709551615, 203, 223, 203, 223, 32, 35, true, "complex page layouts", "complex page layouts"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 7206649825919626318, 10082144117564749216, 18446744073709551615, 18446744073709551615, 307, 324, 307, 324, 51, 53, true, "previous editions", "previous editions"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 5303544497514782120, 7345008805812505585, 18446744073709551615, 18446744073709551615, 0, 10, 0, 10, 0, 1, true, "extraction", "extraction"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 6167933651658664291, 12757348439776621126, 18446744073709551615, 18446744073709551615, 24, 33, 24, 33, 3, 4, true, "documents", "documents"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 13985989196784208042, 6397101251702544653, 18446744073709551615, 18446744073709551615, 58, 68, 58, 68, 9, 10, true, "opensource", "opensource"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 5943277062935477155, 1344882994493538740, 18446744073709551615, 18446744073709551615, 151, 160, 151, 160, 23, 24, true, "libraries", "libraries"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 8106397496085150773, 15757418808623037344, 18446744073709551615, 18446744073709551615, 166, 173, 166, 173, 26, 27, true, "example", "example"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 5948328731789408366, 18296547164130656702, 18446744073709551615, 18446744073709551615, 179, 188, 179, 188, 29, 30, true, "challenge", "challenge"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 4575915900499789029, 3616265821829049845, 18446744073709551615, 18446744073709551615, 259, 271, 259, 271, 40, 41, true, "competitions", "competitions"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 329104161878859757, 6665347482264844132, 18446744073709551615, 18446744073709551615, 281, 286, 281, 286, 43, 44, true, "ICDAR", "ICDAR"], ["term", "single-term", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 12178341415896316683, 7006656170865370612, 18446744073709551615, 18446744073709551615, 294, 297, 294, 297, 47, 48, true, "Ref", "Ref"], ["verb", "compound-verb", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 14650447942981204525, 14209441569513214592, 18446744073709551615, 18446744073709551615, 102, 110, 102, 110, 15, 17, true, "are also", "are also"], ["verb", "compound-verb", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 13582317933598830804, 14887377263055835155, 18446744073709551615, 18446744073709551615, 224, 245, 224, 245, 35, 38, true, "is actively addressed", "is actively addressed"], ["verb", "single-verb", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 8106478648743879659, 12679135976426961828, 18446744073709551615, 18446744073709551615, 16, 23, 16, 23, 2, 3, true, "scanned", "scanned"], ["verb", "single-verb", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 329104158563393892, 6347084796372522947, 18446744073709551615, 18446744073709551615, 52, 57, 52, 57, 8, 9, true, "known", "known"], ["verb", "single-verb", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15942873022707780098, 17012788479993724570, 18446744073709551615, 18446744073709551615, 192, 202, 192, 202, 31, 32, true, "segmenting", "segmenting"], ["verb", "single-verb", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 6165973069391114794, 11863768156487224924, 18446744073709551615, 18446744073709551615, 249, 258, 249, 258, 39, 40, true, "recurring", "recurring"], ["verb", "single-verb", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 329104161594697084, 6627718053570720382, 18446744073709551615, 18446744073709551615, 272, 277, 272, 277, 41, 42, true, "posed", "posed"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 389609625697843734, 12402283298966649397, 18446744073709551615, 18446744073709551615, 11, 15, 11, 15, 1, 2, true, "from", "from"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 2330907114827395751, 7417154358652385512, 18446744073709551615, 18446744073709551615, 35, 46, 35, 46, 5, 7, true, "Besides the", "Besides the"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15441160910541487053, 8488079551389036732, 18446744073709551615, 18446744073709551615, 148, 150, 148, 150, 22, 23, true, "as", "as"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 12178341415896108722, 7006564727319883066, 18446744073709551615, 18446744073709551615, 162, 165, 162, 165, 25, 26, true, "For", "For"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15441160910541485670, 8488079584067276180, 18446744073709551615, 18446744073709551615, 189, 191, 189, 191, 30, 31, true, "of", "of"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15441160910541486989, 8488079561168943847, 18446744073709551615, 18446744073709551615, 246, 248, 246, 248, 38, 39, true, "by", "by"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15441160910541486989, 8488079561168941359, 18446744073709551615, 18446744073709551615, 278, 280, 278, 280, 42, 43, true, "by", "by"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15441160910541487053, 8488079551389030241, 18446744073709551615, 18446744073709551615, 288, 290, 288, 290, 45, 46, true, "as", "as"], ["conn", "single-conn", 7536915191196259776, "TEXT", "#/texts/18", 1.0, 15441160910541486538, 8488079680818651488, 18446744073709551615, 18446744073709551615, 291, 293, 291, 293, 46, 47, true, "in", "in"], ["numval", "ival", 11495493007651807568, "TEXT", "#/texts/19", 1.0, 17767354399704235163, 1677212978845340209, 18446744073709551615, 18446744073709551615, 0, 1, 0, 1, 0, 1, true, "3", "3"], ["sentence", "", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 333520156392116834, 15811852122116104463, 18446744073709551615, 18446744073709551615, 0, 174, 0, 174, 0, 33, true, "Given the plethora of existing solutions, we would like to point out how our solution differs from these, and thus approaches the problem of document conversion in a new way.", "Given the plethora of existing solutions, we would like to point out how our solution differs from these, and thus approaches the problem of document conversion in a new way."], ["term", "single-term", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 17523665367407867975, 12904404874656037431, 18446744073709551615, 18446744073709551615, 141, 160, 141, 160, 26, 28, true, "document conversion", "document conversion"], ["term", "single-term", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 8106342689900857659, 11867579569069297583, 18446744073709551615, 18446744073709551615, 166, 173, 166, 173, 30, 32, true, "new way", "new way"], ["term", "single-term", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 14814124842239312745, 3078034766037725322, 18446744073709551615, 18446744073709551615, 10, 18, 10, 18, 2, 3, true, "plethora", "plethora"], ["term", "single-term", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 6168765157982633013, 14939317711471735014, 18446744073709551615, 18446744073709551615, 31, 40, 31, 40, 5, 6, true, "solutions", "solutions"], ["term", "single-term", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 14635106751859230946, 2774775706852130043, 18446744073709551615, 18446744073709551615, 77, 85, 77, 85, 15, 16, true, "solution", "solution"], ["term", "single-term", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 8106476000253296785, 9669543175138669254, 18446744073709551615, 18446744073709551615, 130, 137, 130, 137, 24, 25, true, "problem", "problem"], ["verb", "compound-verb", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 17737636265695672887, 18332533904749557797, 18446744073709551615, 18446744073709551615, 45, 64, 45, 64, 8, 12, true, "would like to point", "would like to point"], ["verb", "single-verb", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 329104162248557356, 752941156226945092, 18446744073709551615, 18446744073709551615, 0, 5, 0, 5, 0, 1, true, "Given", "Given"], ["verb", "single-verb", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 14652255875895390162, 653651603361381634, 18446744073709551615, 18446744073709551615, 22, 30, 22, 30, 4, 5, true, "existing", "existing"], ["verb", "single-verb", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 8106396886952123470, 12838801329929880418, 18446744073709551615, 18446744073709551615, 86, 93, 86, 93, 16, 17, true, "differs", "differs"], ["verb", "single-verb", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 15361660588616680195, 3054906681832241655, 18446744073709551615, 18446744073709551615, 115, 125, 115, 125, 22, 23, true, "approaches", "approaches"], ["conn", "single-conn", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 15441160910541485670, 15411912721590762410, 18446744073709551615, 18446744073709551615, 19, 21, 19, 21, 3, 4, true, "of", "of"], ["conn", "single-conn", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 16057368201763467386, 7544318374715286400, 18446744073709551615, 18446744073709551615, 94, 104, 94, 104, 17, 19, true, "from these", "from these"], ["conn", "single-conn", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 15441160910541485670, 15411912721590754628, 18446744073709551615, 18446744073709551615, 138, 140, 138, 140, 25, 26, true, "of", "of"], ["conn", "single-conn", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 389609625698530964, 7158441064407457660, 18446744073709551615, 18446744073709551615, 161, 165, 161, 165, 28, 30, true, "in a", "in a"], ["conn", "single-conn", 7650015170039242996, "TEXT", "#/texts/20", 1.0, 15441160910541485865, 15411912723377752993, 18446744073709551615, 18446744073709551615, 56, 58, 56, 58, 10, 11, true, "to", "to"], ["expression", "word-concatenation", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15982564436466431745, 16753386948407940471, 18446744073709551615, 18446744073709551615, 41, 51, 41, 51, 10, 11, true, "rule-based", "rule-based"], ["expression", "word-concatenation", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 3753411203337468488, 15547653955780150850, 18446744073709551615, 18446744073709551615, 193, 205, 193, 205, 32, 33, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 1645908499873224608, 218341843414296, 18446744073709551615, 18446744073709551615, 388, 402, 388, 402, 61, 62, true, "time-consuming", "time-consuming"], ["expression", "word-concatenation", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15982564436466431745, 16753386948408616238, 18446744073709551615, 18446744073709551615, 436, 446, 436, 446, 67, 68, true, "rule-based", "rule-based"], ["sentence", "", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 6925086339785270582, 4967815864262610827, 18446744073709551615, 18446744073709551615, 0, 236, 0, 236, 0, 38, true, "The key idea is that we do not write any rule-based conversion algorithms, but rather utilize generic machine learning algorithms which produce models that can be easily and quickly trained on ground-truth acquired via human annotation.", "The key idea is that we do not write any rule-based conversion algorithms, but rather utilize generic machine learning algorithms which produce models that can be easily and quickly trained on ground-truth acquired via human annotation."], ["sentence", "", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 6407641312915953840, 824387259931288587, 18446744073709551615, 18446744073709551615, 237, 469, 237, 469, 38, 71, true, "This flexible mechanism allows us to adapt very quickly to certain templates of documents, achieve very accurate results and ultimately eliminates the time-consuming and costly tuning of traditional rule-based conversion algorithms.", "This flexible mechanism allows us to adapt very quickly to certain templates of documents, achieve very accurate results and ultimately eliminates the time-consuming and costly tuning of traditional rule-based conversion algorithms."], ["sentence", "", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 8689787920237942067, 876894582857790117, 18446744073709551615, 18446744073709551615, 470, 594, 470, 594, 71, 93, true, "This approach is in stark contrast to the previously mentioned state of the art conversion systems, which are all rulebased.", "This approach is in stark contrast to the previously mentioned state of the art conversion systems, which are all rulebased."], ["term", "enum-term-mark-1", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 17700892478496312051, 10821180144678536592, 18446744073709551615, 18446744073709551615, 388, 420, 388, 420, 61, 65, true, "time-consuming and costly tuning", "time-consuming and costly tuning"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 14634111720801022362, 3195488238150353861, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 1, 3, true, "key idea", "key idea"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 16546787775173266207, 17746572894245856976, 18446744073709551615, 18446744073709551615, 41, 73, 41, 73, 10, 13, true, "rule-based conversion algorithms", "rule-based conversion algorithms"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 268562468797662458, 10942359132766872226, 18446744073709551615, 18446744073709551615, 94, 109, 94, 109, 17, 19, true, "generic machine", "generic machine"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 1385949438436740444, 1066431205762218852, 18446744073709551615, 18446744073709551615, 219, 235, 219, 235, 35, 37, true, "human annotation", "human annotation"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 8882320954266966503, 11671713689267052644, 18446744073709551615, 18446744073709551615, 242, 260, 242, 260, 39, 41, true, "flexible mechanism", "flexible mechanism"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 9530639523283095495, 1716193621416828213, 18446744073709551615, 18446744073709551615, 296, 313, 296, 313, 48, 50, true, "certain templates", "certain templates"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 363090472507169169, 8647809295474703230, 18446744073709551615, 18446744073709551615, 341, 357, 341, 357, 55, 57, true, "accurate results", "accurate results"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 4501351594407908143, 8920783459215236990, 18446744073709551615, 18446744073709551615, 407, 420, 407, 420, 63, 65, true, "costly tuning", "costly tuning"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 3911758940362647139, 2852281587036452792, 18446744073709551615, 18446744073709551615, 424, 468, 424, 468, 66, 70, true, "traditional rule-based conversion algorithms", "traditional rule-based conversion algorithms"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 4914830112961611503, 16550272884551547652, 18446744073709551615, 18446744073709551615, 490, 504, 490, 504, 75, 77, true, "stark contrast", "stark contrast"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 1888718621804377149, 4793363709933306705, 18446744073709551615, 18446744073709551615, 546, 568, 546, 568, 84, 87, true, "art conversion systems", "art conversion systems"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15359670209433732834, 14480479537813821910, 18446744073709551615, 18446744073709551615, 119, 129, 119, 129, 20, 21, true, "algorithms", "algorithms"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 16381206567230470443, 14326281487546411484, 18446744073709551615, 18446744073709551615, 144, 150, 144, 150, 23, 24, true, "models", "models"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 3753411203337468488, 15547653955780150850, 18446744073709551615, 18446744073709551615, 193, 205, 193, 205, 32, 33, true, "ground-truth", "ground-truth"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 6167933651658664291, 15122240631633557724, 18446744073709551615, 18446744073709551615, 317, 326, 317, 326, 51, 52, true, "documents", "documents"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 14650448032998792781, 15879979342361769675, 18446744073709551615, 18446744073709551615, 475, 483, 475, 483, 72, 73, true, "approach", "approach"], ["term", "single-term", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 329104161640023790, 11881717169008578620, 18446744073709551615, 18446744073709551615, 533, 538, 533, 538, 81, 82, true, "state", "state"], ["verb", "compound-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 1652981674969121277, 5345522223761467591, 18446744073709551615, 18446744073709551615, 24, 36, 24, 36, 6, 9, true, "do not write", "do not write"], ["verb", "compound-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 14953425203724984554, 9921592199341111751, 18446744073709551615, 18446744073709551615, 156, 169, 156, 169, 25, 28, true, "can be easily", "can be easily"], ["verb", "compound-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 5286109702157457123, 14247791188942099735, 18446744073709551615, 18446744073709551615, 274, 295, 274, 295, 44, 48, true, "adapt very quickly to", "adapt very quickly to"], ["verb", "compound-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15545418999534241127, 5045193591230796924, 18446744073709551615, 18446744073709551615, 328, 340, 328, 340, 53, 55, true, "achieve very", "achieve very"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541486535, 7262104292544184272, 18446744073709551615, 18446744073709551615, 13, 15, 13, 15, 3, 4, true, "is", "is"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 8106477998160600386, 17975660313851847310, 18446744073709551615, 18446744073709551615, 86, 93, 86, 93, 16, 17, true, "utilize", "utilize"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 14639581097006750428, 13078762433064859990, 18446744073709551615, 18446744073709551615, 110, 118, 110, 118, 19, 20, true, "learning", "learning"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 8106476000256008955, 13110106039936157176, 18446744073709551615, 18446744073709551615, 136, 143, 136, 143, 22, 23, true, "produce", "produce"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 8106351024635822250, 6482860715812624614, 18446744073709551615, 18446744073709551615, 182, 189, 182, 189, 30, 31, true, "trained", "trained"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 14650442552334623127, 18275297430362112280, 18446744073709551615, 18446744073709551615, 206, 214, 206, 214, 33, 34, true, "acquired", "acquired"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 16381206569317834029, 14325451716460439025, 18446744073709551615, 18446744073709551615, 261, 267, 261, 267, 41, 42, true, "allows", "allows"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 5305301449677211216, 12271059041548086334, 18446744073709551615, 18446744073709551615, 373, 383, 373, 383, 59, 60, true, "eliminates", "eliminates"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541486535, 7262104292544005935, 18446744073709551615, 18446744073709551615, 484, 486, 484, 486, 73, 74, true, "is", "is"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 6182925164797550141, 4080506602497980982, 18446744073709551615, 18446744073709551615, 523, 532, 523, 532, 80, 81, true, "mentioned", "mentioned"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 12178341415895564896, 11980759772482637587, 18446744073709551615, 18446744073709551615, 576, 579, 576, 579, 89, 90, true, "are", "are"], ["verb", "single-verb", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 6168252184209599630, 11624964441948682836, 18446744073709551615, 18446744073709551615, 584, 593, 584, 593, 91, 92, true, "rulebased", "rulebased"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 389609625631229034, 13897665868783500837, 18446744073709551615, 18446744073709551615, 16, 20, 16, 20, 4, 5, true, "that", "that"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541485678, 7262104301308027569, 18446744073709551615, 18446744073709551615, 190, 192, 190, 192, 31, 32, true, "on", "on"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 12178341415896456267, 11980837516659458599, 18446744073709551615, 18446744073709551615, 215, 218, 215, 218, 34, 35, true, "via", "via"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541485670, 7262104379839096841, 18446744073709551615, 18446744073709551615, 314, 316, 314, 316, 50, 51, true, "of", "of"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541485670, 7262104379839071574, 18446744073709551615, 18446744073709551615, 421, 423, 421, 423, 65, 66, true, "of", "of"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541486538, 7262104292909614113, 18446744073709551615, 18446744073709551615, 487, 489, 487, 489, 74, 75, true, "in", "in"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 16381206565712212855, 14516857138090705799, 18446744073709551615, 18446744073709551615, 539, 545, 539, 545, 82, 84, true, "of the", "of the"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541485865, 7262104290391125417, 18446744073709551615, 18446744073709551615, 271, 273, 271, 273, 43, 44, true, "to", "to"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 15441160910541485865, 7262104290391122531, 18446744073709551615, 18446744073709551615, 293, 295, 293, 295, 47, 48, true, "to", "to"], ["conn", "single-conn", 14959508657858158650, "TEXT", "#/texts/21", 1.0, 16381206519425733256, 12408277484933909023, 18446744073709551615, 18446744073709551615, 505, 511, 505, 511, 77, 79, true, "to the", "to the"], ["parenthesis", "round brackets", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6550638212396612728, 10771033758102967319, 18446744073709551615, 18446744073709551615, 359, 385, 359, 385, 65, 72, true, "(or a corpus of documents)", "(or a corpus of documents)"], ["parenthesis", "round brackets", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6314058359297877881, 9707414947244523127, 18446744073709551615, 18446744073709551615, 513, 578, 513, 578, 97, 110, true, "(e.g. scientific articles, patents, regulations, contracts, etc.)", "(e.g. scientific articles, patents, regulations, contracts, etc.)"], ["parenthesis", "round brackets", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8057413709704099528, 1050132540479424454, 18446744073709551615, 18446744073709551615, 723, 745, 723, 745, 135, 141, true, "(no matter its origin)", "(no matter its origin)"], ["expression", "common", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541487324, 2236281911783920424, 18446744073709551615, 18446744073709551615, 514, 518, 514, 518, 98, 99, true, "eg", "e.g."], ["expression", "common", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 12178341415895450733, 6844731054316482558, 18446744073709551615, 18446744073709551615, 573, 577, 573, 577, 108, 109, true, "etc", "etc."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 9012657468363944610, 8765370141450009242, 18446744073709551615, 18446744073709551615, 0, 216, 0, 216, 0, 35, true, "While the approach of swapping rule based solutions with machine learning solutions might appear very natural in the current era of artificial intelligence, it has some serious consequences with regard to its design.", "While the approach of swapping rule based solutions with machine learning solutions might appear very natural in the current era of artificial intelligence, it has some serious consequences with regard to its design."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 17113763778403085212, 5836750622876672848, 18446744073709551615, 18446744073709551615, 217, 291, 217, 291, 35, 52, true, "First of all, one can not think anymore at the level of a single document.", "First of all, one can not think anymore at the level of a single document."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 234003328160338467, 7991365496276900509, 18446744073709551615, 18446744073709551615, 292, 386, 292, 386, 52, 73, true, "Rather, one should think at the level of a collection of documents (or a corpus of documents).", "Rather, one should think at the level of a collection of documents (or a corpus of documents)."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 10764335410877048383, 9149814586004775117, 18446744073709551615, 18446744073709551615, 387, 592, 387, 592, 73, 113, true, "A machine learned model for a single document is not very useful, but a machine learned model for a certain type of documents (e.g. scientific articles, patents, regulations, contracts, etc.) obviously is.", "A machine learned model for a single document is not very useful, but a machine learned model for a certain type of documents (e.g. scientific articles, patents, regulations, contracts, etc.) obviously is."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 12007288182356882448, 17097814921561813617, 18446744073709551615, 18446744073709551615, 593, 788, 593, 788, 113, 150, true, "This is the first big distinction between the current existing solutions and ours: Existing solutions take one document at a time (no matter its origin) and convert it to a desired output format.", "This is the first big distinction between the current existing solutions and ours: Existing solutions take one document at a time (no matter its origin) and convert it to a desired output format."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 17200018804130084830, 4710316362795570196, 18446744073709551615, 18446744073709551615, 789, 895, 789, 895, 150, 169, true, "Our solution can ingest an entire collection of documents and build machine learned models on top of that.", "Our solution can ingest an entire collection of documents and build machine learned models on top of that."], ["sentence", "", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 2832657334028567700, 16598107722788837982, 18446744073709551615, 18446744073709551615, 896, 983, 896, 983, 169, 190, true, "Of course, once the the model is trained, one can convert documents one at a time, too.", "Of course, once the the model is trained, one can convert documents one at a time, too."], ["term", "enum-term-mark-3", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6940993203873682013, 11180826619488609321, 18446744073709551615, 18446744073709551615, 656, 674, 656, 674, 123, 126, true, "solutions and ours", "solutions and ours"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14803416660534245041, 14676542655277815623, 18446744073709551615, 18446744073709551615, 117, 128, 117, 128, 18, 20, true, "current era", "current era"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 12494727366192470008, 14865632197249103574, 18446744073709551615, 18446744073709551615, 132, 155, 132, 155, 21, 23, true, "artificial intelligence", "artificial intelligence"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 10965400769022712461, 10837956255692384921, 18446744073709551615, 18446744073709551615, 169, 189, 169, 189, 27, 29, true, "serious consequences", "serious consequences"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 17817695283880367270, 1923283193609983271, 18446744073709551615, 18446744073709551615, 275, 290, 275, 290, 49, 51, true, "single document", "single document"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 17817695283880367270, 1923283193609994591, 18446744073709551615, 18446744073709551615, 417, 432, 417, 432, 79, 81, true, "single document", "single document"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 5446369751016637748, 2451284751526455026, 18446744073709551615, 18446744073709551615, 487, 499, 487, 499, 93, 95, true, "certain type", "certain type"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 4921841042246041201, 10983409214681496834, 18446744073709551615, 18446744073709551615, 514, 538, 514, 538, 98, 101, true, "eg scientific articles", "e.g. scientific articles"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8853087811719781330, 14038095559600948860, 18446744073709551615, 18446744073709551615, 605, 626, 605, 626, 116, 119, true, "first big distinction", "first big distinction"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8644330667942088529, 12068348818841150869, 18446744073709551615, 18446744073709551615, 774, 787, 774, 787, 147, 149, true, "output format", "output format"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 9670577903281859793, 4784628170822326470, 18446744073709551615, 18446744073709551615, 816, 833, 816, 833, 155, 157, true, "entire collection", "entire collection"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14650448032998792781, 3638043485221670922, 18446744073709551615, 18446744073709551615, 10, 18, 10, 18, 2, 3, true, "approach", "approach"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625633008101, 5827484634985091758, 18446744073709551615, 18446744073709551615, 31, 35, 31, 35, 5, 6, true, "rule", "rule"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6168765157982633013, 17080776441400876326, 18446744073709551615, 18446744073709551615, 42, 51, 42, 51, 7, 8, true, "solutions", "solutions"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106464587473865376, 10863088468137730445, 18446744073709551615, 18446744073709551615, 57, 64, 57, 64, 9, 10, true, "machine", "machine"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6168765157982633013, 17080776441400829230, 18446744073709551615, 18446744073709551615, 74, 83, 74, 83, 11, 12, true, "solutions", "solutions"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206521526353544, 17837005405950977614, 18446744073709551615, 18446744073709551615, 195, 201, 195, 201, 30, 31, true, "regard", "regard"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206568241679420, 6463362562880610794, 18446744073709551615, 18446744073709551615, 209, 215, 209, 215, 33, 34, true, "design", "design"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161872148023, 4146693174861139295, 18446744073709551615, 18446744073709551615, 217, 222, 217, 222, 35, 36, true, "First", "First"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161602483077, 4501508833607620084, 18446744073709551615, 18446744073709551615, 264, 269, 264, 269, 46, 47, true, "level", "level"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161602483077, 4501508833607664683, 18446744073709551615, 18446744073709551615, 324, 329, 324, 329, 59, 60, true, "level", "level"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 2702984786539193186, 5625500215606040366, 18446744073709551615, 18446744073709551615, 335, 345, 335, 345, 62, 63, true, "collection", "collection"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6167933651658664291, 17130544635473657019, 18446744073709551615, 18446744073709551615, 349, 358, 349, 358, 64, 65, true, "documents", "documents"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206562408205435, 6662313161856348398, 18446744073709551615, 18446744073709551615, 365, 371, 365, 371, 68, 69, true, "corpus", "corpus"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6167933651658664291, 17130544635473655477, 18446744073709551615, 18446744073709551615, 375, 384, 375, 384, 70, 71, true, "documents", "documents"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106464587473865376, 10863088468137624072, 18446744073709551615, 18446744073709551615, 389, 396, 389, 396, 74, 75, true, "machine", "machine"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161610777240, 4148531692542071489, 18446744073709551615, 18446744073709551615, 405, 410, 405, 410, 76, 77, true, "model", "model"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106464587473865376, 10863088468137655677, 18446744073709551615, 18446744073709551615, 459, 466, 459, 466, 88, 89, true, "machine", "machine"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161610777240, 4148531692542068691, 18446744073709551615, 18446744073709551615, 475, 480, 475, 480, 90, 91, true, "model", "model"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6167933651658664291, 17130544635473631637, 18446744073709551615, 18446744073709551615, 503, 512, 503, 512, 96, 97, true, "documents", "documents"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106479143938802112, 7753532983827535730, 18446744073709551615, 18446744073709551615, 540, 547, 540, 547, 102, 103, true, "patents", "patents"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 4973525406703593304, 3125450270057085908, 18446744073709551615, 18446744073709551615, 549, 560, 549, 560, 104, 105, true, "regulations", "regulations"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 5947882010261766213, 2807085994503528596, 18446744073709551615, 18446744073709551615, 562, 571, 562, 571, 106, 107, true, "contracts", "contracts"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6168765157982633013, 17080776441399891140, 18446744073709551615, 18446744073709551615, 656, 665, 656, 665, 123, 124, true, "solutions", "solutions"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625618862505, 5824689271058039311, 18446744073709551615, 18446744073709551615, 670, 674, 670, 674, 125, 126, true, "ours", "ours"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6168765157982633013, 17080776441399897623, 18446744073709551615, 18446744073709551615, 685, 694, 685, 694, 128, 129, true, "solutions", "solutions"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14650401089286948001, 13693331144318097274, 18446744073709551615, 18446744073709551615, 704, 712, 704, 712, 131, 132, true, "document", "document"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625631241985, 5827513608331038644, 18446744073709551615, 18446744073709551615, 718, 722, 718, 722, 134, 135, true, "time", "time"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206594266096010, 12213703933534801602, 18446744073709551615, 18446744073709551615, 727, 733, 727, 733, 137, 138, true, "matter", "matter"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206566166820610, 6036520036764853357, 18446744073709551615, 18446744073709551615, 738, 744, 738, 744, 139, 140, true, "origin", "origin"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14635106751859230946, 8364773269850029437, 18446744073709551615, 18446744073709551615, 793, 801, 793, 801, 151, 152, true, "solution", "solution"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6167933651658664291, 17130544635473593661, 18446744073709551615, 18446744073709551615, 837, 846, 837, 846, 158, 159, true, "documents", "documents"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106464587473865376, 10863088468137810773, 18446744073709551615, 18446744073709551615, 857, 864, 857, 864, 161, 162, true, "machine", "machine"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206567230470443, 6024418531468018651, 18446744073709551615, 18446744073709551615, 873, 879, 873, 879, 163, 164, true, "models", "models"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 12178341415895527965, 6844715057235761660, 18446744073709551615, 18446744073709551615, 883, 886, 883, 886, 165, 166, true, "top", "top"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206562412792821, 6657216968808098043, 18446744073709551615, 18446744073709551615, 899, 905, 899, 905, 170, 171, true, "course", "course"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161610777240, 4148531692542039942, 18446744073709551615, 18446744073709551615, 920, 925, 920, 925, 175, 176, true, "model", "model"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6167933651658664291, 17130544635473603162, 18446744073709551615, 18446744073709551615, 954, 963, 954, 963, 182, 183, true, "documents", "documents"], ["term", "single-term", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625631241985, 5827513608331054986, 18446744073709551615, 18446744073709551615, 973, 977, 973, 977, 186, 187, true, "time", "time"], ["verb", "compound-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16649210781200388969, 11737823472753388342, 18446744073709551615, 18446744073709551615, 84, 101, 84, 101, 12, 15, true, "might appear very", "might appear very"], ["verb", "compound-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 5898608652259485892, 8273749972636893410, 18446744073709551615, 18446744073709551615, 243, 256, 243, 256, 42, 44, true, "think anymore", "think anymore"], ["verb", "compound-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 9168974707522175422, 15268643747920788932, 18446744073709551615, 18446744073709551615, 433, 444, 433, 444, 81, 84, true, "is not very", "is not very"], ["verb", "compound-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15603860833301917639, 10275051749542929918, 18446744073709551615, 18446744073709551615, 926, 936, 926, 936, 176, 178, true, "is trained", "is trained"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14634110115213467595, 3902842052710630018, 18446744073709551615, 18446744073709551615, 22, 30, 22, 30, 4, 5, true, "swapping", "swapping"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104159219515955, 4537815630564757009, 18446744073709551615, 18446744073709551615, 36, 41, 36, 41, 6, 7, true, "based", "based"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14639581097006750428, 17137957296901215778, 18446744073709551615, 18446744073709551615, 65, 73, 65, 73, 10, 11, true, "learning", "learning"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 12178341415895601584, 6844715253719182703, 18446744073709551615, 18446744073709551615, 160, 163, 160, 163, 25, 26, true, "has", "has"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 232434223375785884, 14053882561685075584, 18446744073709551615, 18446744073709551615, 304, 316, 304, 316, 55, 57, true, "should think", "should think"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106342444693204894, 15950879447442477786, 18446744073709551615, 18446744073709551615, 397, 404, 397, 404, 75, 76, true, "learned", "learned"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106342444693204894, 15950879447442486216, 18446744073709551615, 18446744073709551615, 467, 474, 467, 474, 89, 90, true, "learned", "learned"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541486535, 2236282123090341336, 18446744073709551615, 18446744073709551615, 589, 591, 589, 591, 111, 112, true, "is", "is"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541486535, 2236282123090341658, 18446744073709551615, 18446744073709551615, 598, 600, 598, 600, 114, 115, true, "is", "is"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14652255875895390162, 17681309128804460766, 18446744073709551615, 18446744073709551615, 647, 655, 647, 655, 122, 123, true, "existing", "existing"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14650277091100196516, 16447833424682109222, 18446744073709551615, 18446744073709551615, 676, 684, 676, 684, 127, 128, true, "Existing", "Existing"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625631208371, 5827506598805952653, 18446744073709551615, 18446744073709551615, 695, 699, 695, 699, 129, 130, true, "take", "take"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106398484416229602, 7463572213204385535, 18446744073709551615, 18446744073709551615, 750, 757, 750, 757, 142, 143, true, "convert", "convert"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106396517639247034, 13209657695880625575, 18446744073709551615, 18446744073709551615, 766, 773, 766, 773, 146, 147, true, "desired", "desired"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 2873440693780286732, 16051197931505562856, 18446744073709551615, 18446744073709551615, 802, 812, 802, 812, 152, 154, true, "can ingest", "can ingest"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104159303279946, 4494576876171974588, 18446744073709551615, 18446744073709551615, 851, 856, 851, 856, 160, 161, true, "build", "build"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106342444693204894, 15950879447442468527, 18446744073709551615, 18446744073709551615, 865, 872, 865, 872, 162, 163, true, "learned", "learned"], ["verb", "single-verb", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 14892592691705778163, 9208424151114403903, 18446744073709551615, 18446744073709551615, 942, 953, 942, 953, 180, 182, true, "can convert", "can convert"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 2452542797585455507, 2035939665976407914, 18446744073709551615, 18446744073709551615, 102, 112, 102, 112, 15, 17, true, "natural in", "natural in"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 6179252389649895475, 8191807611968745212, 18446744073709551615, 18446744073709551615, 0, 9, 0, 9, 0, 2, true, "While the", "While the"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485670, 2236282107242376600, 18446744073709551615, 18446744073709551615, 19, 21, 19, 21, 3, 4, true, "of", "of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625618037948, 5823902957159802163, 18446744073709551615, 18446744073709551615, 52, 56, 52, 56, 8, 9, true, "with", "with"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485670, 2236282107242384356, 18446744073709551615, 18446744073709551615, 129, 131, 129, 131, 20, 21, true, "of", "of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625618037948, 5823902957159794765, 18446744073709551615, 18446744073709551615, 190, 194, 190, 194, 29, 30, true, "with", "with"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206565712007226, 5986290651637987665, 18446744073709551615, 18446744073709551615, 223, 229, 223, 229, 36, 38, true, "of all", "of all"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206568372064271, 6462428766212968916, 18446744073709551615, 18446744073709551615, 257, 263, 257, 263, 44, 46, true, "at the", "at the"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625620237736, 5824650452540823625, 18446744073709551615, 18446744073709551615, 270, 274, 270, 274, 47, 49, true, "of a", "of a"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 16381206568372064271, 6462428766212960915, 18446744073709551615, 18446744073709551615, 317, 323, 317, 323, 57, 59, true, "at the", "at the"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625620237736, 5824650452540841788, 18446744073709551615, 18446744073709551615, 330, 334, 330, 334, 60, 62, true, "of a", "of a"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485670, 2236282107242430314, 18446744073709551615, 18446744073709551615, 346, 348, 346, 348, 63, 64, true, "of", "of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485670, 2236282107242428103, 18446744073709551615, 18446744073709551615, 372, 374, 372, 374, 69, 70, true, "of", "of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161711024499, 4147931113686279460, 18446744073709551615, 18446744073709551615, 411, 416, 411, 416, 77, 79, true, "for a", "for a"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 329104161711024499, 4147931113686284098, 18446744073709551615, 18446744073709551615, 481, 486, 481, 486, 91, 93, true, "for a", "for a"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485670, 2236282107242403559, 18446744073709551615, 18446744073709551615, 500, 502, 500, 502, 95, 96, true, "of", "of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 2011002864325523456, 7710761867679380926, 18446744073709551615, 18446744073709551615, 627, 638, 627, 638, 119, 121, true, "between the", "between the"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625700792947, 5823321478145663200, 18446744073709551615, 18446744073709551615, 713, 717, 713, 717, 132, 134, true, "at a", "at a"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485670, 2236282107240472436, 18446744073709551615, 18446744073709551615, 834, 836, 834, 836, 157, 158, true, "of", "of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485678, 2236282107108170895, 18446744073709551615, 18446744073709551615, 880, 882, 880, 882, 164, 165, true, "on", "on"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 8106342927224204147, 12374411364497639723, 18446744073709551615, 18446744073709551615, 887, 894, 887, 894, 166, 168, true, "of that", "of that"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541487694, 2236281866955609411, 18446744073709551615, 18446744073709551615, 896, 898, 896, 898, 169, 170, true, "Of", "Of"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625700792947, 5823321478145646816, 18446744073709551615, 18446744073709551615, 968, 972, 968, 972, 184, 186, true, "at a", "at a"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 15441160910541485865, 2236282073726956815, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 31, 32, true, "to", "to"], ["conn", "single-conn", 10379300903412882972, "TEXT", "#/texts/22", 1.0, 389609625631408052, 5827512620732698108, 18446744073709551615, 18446744073709551615, 761, 765, 761, 765, 144, 146, true, "to a", "to a"], ["expression", "word-concatenation", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 3753411203337468488, 10952332446895423423, 18446744073709551615, 18446744073709551615, 110, 122, 110, 122, 19, 20, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 3753411203337468488, 10952332446895315527, 18446744073709551615, 18446744073709551615, 383, 395, 383, 395, 70, 71, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15169931585135175826, 16208226134974990802, 18446744073709551615, 18446744073709551615, 814, 825, 814, 825, 151, 152, true, "cloud-based", "cloud-based"], ["sentence", "", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 5932144278161606896, 5324910960360793763, 18446744073709551615, 18446744073709551615, 0, 165, 0, 165, 0, 30, true, "A second discriminator between the existing solutions and ours is that we need to provide the tools to gather ground-truth, since no model can be trained without it.", "A second discriminator between the existing solutions and ours is that we need to provide the tools to gather ground-truth, since no model can be trained without it."], ["sentence", "", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16765361859705871972, 1374856743232605456, 18446744073709551615, 18446744073709551615, 166, 347, 166, 347, 30, 64, true, "Hence, not only do we need the ability to manage collections of documents, we also need the ability for people to annotate documents and store these annotations in an efficient way.", "Hence, not only do we need the ability to manage collections of documents, we also need the ability for people to annotate documents and store these annotations in an efficient way."], ["sentence", "", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 5468526205922080258, 1940547468406540901, 18446744073709551615, 18446744073709551615, 348, 417, 348, 417, 64, 76, true, "These annotations are then used as ground-truth data to train models.", "These annotations are then used as ground-truth data to train models."], ["sentence", "", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 7094528533515465642, 13371907670247459710, 18446744073709551615, 18446744073709551615, 418, 666, 418, 666, 76, 123, true, "It is clear then that ML models add an extra level of complexity: One has to provide the ability to store a collection of documents, annotate these documents, store the annotations, train a model and ultimately apply this model on unseen documents.", "It is clear then that ML models add an extra level of complexity: One has to provide the ability to store a collection of documents, annotate these documents, store the annotations, train a model and ultimately apply this model on unseen documents."], ["sentence", "", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 7539280031005994041, 11157353677670739305, 18446744073709551615, 18446744073709551615, 667, 776, 667, 776, 123, 143, true, "For the authors of this paper, it was therefore evident that our solution cannot be a monolithic application.", "For the authors of this paper, it was therefore evident that our solution cannot be a monolithic application."], ["sentence", "", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 12765583623534385435, 13069085147009366023, 18446744073709551615, 18446744073709551615, 777, 916, 777, 916, 143, 167, true, "It fits much better the concept of a cloud-based platform that can execute the previously mentioned tasks in an efficient and scalable way.", "It fits much better the concept of a cloud-based platform that can execute the previously mentioned tasks in an efficient and scalable way."], ["term", "enum-term-mark-1", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16627740256570112677, 4583225854042791398, 18446744073709551615, 18446744073709551615, 889, 915, 889, 915, 162, 166, true, "efficient and scalable way", "efficient and scalable way"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16649733772742282194, 16236977419730296615, 18446744073709551615, 18446744073709551615, 2, 22, 2, 22, 1, 3, true, "second discriminator", "second discriminator"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 41055560552253761, 8373548184892428504, 18446744073709551615, 18446744073709551615, 333, 346, 333, 346, 61, 63, true, "efficient way", "efficient way"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 7255057471482664248, 12334180322615590615, 18446744073709551615, 18446744073709551615, 383, 400, 383, 400, 70, 72, true, "ground-truth data", "ground-truth data"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6611502802514240854, 5277397327434795381, 18446744073709551615, 18446744073709551615, 440, 449, 440, 449, 81, 83, true, "ML models", "ML models"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 948606581867615032, 15713215180443530866, 18446744073709551615, 18446744073709551615, 457, 468, 457, 468, 85, 87, true, "extra level", "extra level"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 3290422095559676021, 14164410702381973396, 18446744073709551615, 18446744073709551615, 649, 665, 649, 665, 120, 122, true, "unseen documents", "unseen documents"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 2751272921550991289, 7405187680881990384, 18446744073709551615, 18446744073709551615, 753, 775, 753, 775, 140, 142, true, "monolithic application", "monolithic application"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 12206009578906402256, 4547981104066793380, 18446744073709551615, 18446744073709551615, 814, 834, 814, 834, 151, 153, true, "cloud-based platform", "cloud-based platform"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 4399570346043001090, 4520288225740080432, 18446744073709551615, 18446744073709551615, 903, 915, 903, 915, 164, 166, true, "scalable way", "scalable way"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6168765157982633013, 8701842085527306720, 18446744073709551615, 18446744073709551615, 44, 53, 44, 53, 6, 7, true, "solutions", "solutions"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104159242674854, 11255009296337504749, 18446744073709551615, 18446744073709551615, 94, 99, 94, 99, 16, 17, true, "tools", "tools"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161610777240, 6745146534538645369, 18446744073709551615, 18446744073709551615, 133, 138, 133, 138, 23, 24, true, "model", "model"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106397680705385749, 10181601809432141973, 18446744073709551615, 18446744073709551615, 197, 204, 197, 204, 38, 39, true, "ability", "ability"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14759757818438587716, 16662422774652150485, 18446744073709551615, 18446744073709551615, 215, 226, 215, 226, 41, 42, true, "collections", "collections"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6167933651658664291, 5250243073519619651, 18446744073709551615, 18446744073709551615, 230, 239, 230, 239, 43, 44, true, "documents", "documents"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106397680705385749, 10181601809432137128, 18446744073709551615, 18446744073709551615, 258, 265, 258, 265, 49, 50, true, "ability", "ability"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16381206523622331561, 13908633511487514086, 18446744073709551615, 18446744073709551615, 270, 276, 270, 276, 51, 52, true, "people", "people"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6167933651658664291, 5250243073519632368, 18446744073709551615, 18446744073709551615, 289, 298, 289, 298, 54, 55, true, "documents", "documents"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 1037258523789473353, 14806284881759797213, 18446744073709551615, 18446744073709551615, 315, 326, 315, 326, 58, 59, true, "annotations", "annotations"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 1037258523789473353, 14806284881759605125, 18446744073709551615, 18446744073709551615, 354, 365, 354, 365, 65, 66, true, "annotations", "annotations"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16381206567230470443, 12706729533498022281, 18446744073709551615, 18446744073709551615, 410, 416, 410, 416, 74, 75, true, "models", "models"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 2703018890300243966, 14454961743665027442, 18446744073709551615, 18446744073709551615, 472, 482, 472, 482, 88, 89, true, "complexity", "complexity"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106397680705385749, 10181601809432183454, 18446744073709551615, 18446744073709551615, 507, 514, 507, 514, 95, 96, true, "ability", "ability"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 2702984786539193186, 8321306199832412002, 18446744073709551615, 18446744073709551615, 526, 536, 526, 536, 99, 100, true, "collection", "collection"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6167933651658664291, 5250243073519635188, 18446744073709551615, 18446744073709551615, 540, 549, 540, 549, 101, 102, true, "documents", "documents"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6167933651658664291, 5250243073519124599, 18446744073709551615, 18446744073709551615, 566, 575, 566, 575, 105, 106, true, "documents", "documents"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 1037258523789473353, 14806284881759620361, 18446744073709551615, 18446744073709551615, 587, 598, 587, 598, 109, 110, true, "annotations", "annotations"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161610777240, 6745146534539012137, 18446744073709551615, 18446744073709551615, 608, 613, 608, 613, 113, 114, true, "model", "model"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161610777240, 6745146534539001905, 18446744073709551615, 18446744073709551615, 640, 645, 640, 645, 118, 119, true, "model", "model"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106397759446161562, 7436678281325725738, 18446744073709551615, 18446744073709551615, 675, 682, 675, 682, 125, 126, true, "authors", "authors"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161668023890, 5928708168625868047, 18446744073709551615, 18446744073709551615, 691, 696, 691, 696, 128, 129, true, "paper", "paper"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14635106751859230946, 3996697737790679411, 18446744073709551615, 18446744073709551615, 732, 740, 732, 740, 136, 137, true, "solution", "solution"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106398484423890147, 9444965948200230503, 18446744073709551615, 18446744073709551615, 801, 808, 801, 808, 148, 149, true, "concept", "concept"], ["term", "single-term", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104159214088329, 11255154458886190531, 18446744073709551615, 18446744073709551615, 877, 882, 877, 882, 159, 160, true, "tasks", "tasks"], ["verb", "compound-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 10428082093831915533, 17222226489144724218, 18446744073709551615, 18446744073709551615, 74, 89, 74, 89, 12, 15, true, "need to provide", "need to provide"], ["verb", "compound-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 7137504529039753077, 6726253915406803145, 18446744073709551615, 18446744073709551615, 139, 153, 139, 153, 24, 27, true, "can be trained", "can be trained"], ["verb", "compound-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 11484405948387455014, 6676833556353250435, 18446744073709551615, 18446744073709551615, 366, 379, 366, 379, 66, 69, true, "are then used", "are then used"], ["verb", "compound-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 13631356157997264976, 9759866056759424533, 18446744073709551615, 18446744073709551615, 488, 502, 488, 502, 91, 94, true, "has to provide", "has to provide"], ["verb", "compound-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 13060376269584473124, 17779441535114424202, 18446744073709551615, 18446744073709551615, 701, 714, 701, 714, 131, 133, true, "was therefore", "was therefore"], ["verb", "compound-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6187720399501537329, 12024430126344431887, 18446744073709551615, 18446744073709551615, 780, 789, 780, 789, 144, 146, true, "fits much", "fits much"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14652255875895390162, 58103428862529223, 18446744073709551615, 18446744073709551615, 35, 43, 35, 43, 5, 6, true, "existing", "existing"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541486535, 5881896978058135498, 18446744073709551615, 18446744073709551615, 63, 65, 63, 65, 9, 10, true, "is", "is"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16381206562264646932, 12839380256362217334, 18446744073709551615, 18446744073709551615, 103, 109, 103, 109, 18, 19, true, "gather", "gather"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541486853, 5881896976958836184, 18446744073709551615, 18446744073709551615, 182, 184, 182, 184, 34, 35, true, "do", "do"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 389609625621532398, 16554256739818959541, 18446744073709551615, 18446744073709551615, 188, 192, 188, 192, 36, 37, true, "need", "need"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 16381206594265787492, 13689520830003550219, 18446744073709551615, 18446744073709551615, 208, 214, 208, 214, 40, 41, true, "manage", "manage"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 389609625621532398, 16554256739818932222, 18446744073709551615, 18446744073709551615, 249, 253, 249, 253, 47, 48, true, "need", "need"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14650452911780017077, 13628007533839560087, 18446744073709551615, 18446744073709551615, 280, 288, 280, 288, 53, 54, true, "annotate", "annotate"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161640489114, 5930309379214940791, 18446744073709551615, 18446744073709551615, 303, 308, 303, 308, 56, 57, true, "store", "store"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104159241569908, 11255026260054777739, 18446744073709551615, 18446744073709551615, 404, 409, 404, 409, 73, 74, true, "train", "train"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541486535, 5881896978058160307, 18446744073709551615, 18446744073709551615, 421, 423, 421, 423, 77, 78, true, "is", "is"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 12178341415895571674, 4440978130441335086, 18446744073709551615, 18446744073709551615, 450, 453, 450, 453, 83, 84, true, "add", "add"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161640489114, 5930309379214737079, 18446744073709551615, 18446744073709551615, 518, 523, 518, 523, 97, 98, true, "store", "store"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14650452911780017077, 13628007533839577361, 18446744073709551615, 18446744073709551615, 551, 559, 551, 559, 103, 104, true, "annotate", "annotate"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161640489114, 5930309379214724500, 18446744073709551615, 18446744073709551615, 577, 582, 577, 582, 107, 108, true, "store", "store"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104159241569908, 11255026260054879024, 18446744073709551615, 18446744073709551615, 600, 605, 600, 605, 111, 112, true, "train", "train"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104159174415764, 11322547402815110125, 18446744073709551615, 18446744073709551615, 629, 634, 629, 634, 116, 117, true, "apply", "apply"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 5949058521807306708, 12297004478926220295, 18446744073709551615, 18446744073709551615, 741, 750, 741, 750, 137, 139, true, "cannot be", "cannot be"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14892762526608873515, 2571932422246093124, 18446744073709551615, 18446744073709551615, 840, 851, 840, 851, 154, 156, true, "can execute", "can execute"], ["verb", "single-verb", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 6182925164797550141, 11632110577094548091, 18446744073709551615, 18446744073709551615, 867, 876, 867, 876, 158, 159, true, "mentioned", "mentioned"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 3610960565517946112, 13846595853905053965, 18446744073709551615, 18446744073709551615, 715, 727, 715, 727, 133, 135, true, "evident that", "evident that"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 2011002864325523456, 11117571940292766640, 18446744073709551615, 18446744073709551615, 23, 34, 23, 34, 3, 5, true, "between the", "between the"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 389609625631229034, 16541888869203540071, 18446744073709551615, 18446744073709551615, 66, 70, 66, 70, 10, 11, true, "that", "that"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 14635108304726888554, 4994234956068005418, 18446744073709551615, 18446744073709551615, 124, 132, 124, 132, 21, 23, true, "since no", "since no"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106477988668695541, 8919684696585615962, 18446744073709551615, 18446744073709551615, 154, 161, 154, 161, 27, 28, true, "without", "without"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485670, 5881896997604331986, 18446744073709551615, 18446744073709551615, 227, 229, 227, 229, 42, 43, true, "of", "of"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 12178341415895625940, 4441099339010746501, 18446744073709551615, 18446744073709551615, 266, 269, 266, 269, 50, 51, true, "for", "for"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161828310801, 6278535120429486187, 18446744073709551615, 18446744073709551615, 327, 332, 327, 332, 59, 61, true, "in an", "in an"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541487053, 5881896969451503173, 18446744073709551615, 18446744073709551615, 380, 382, 380, 382, 69, 70, true, "as", "as"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 389609625631229034, 16541888869203466950, 18446744073709551615, 18446744073709551615, 435, 439, 435, 439, 80, 81, true, "that", "that"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485670, 5881896997604348786, 18446744073709551615, 18446744073709551615, 469, 471, 469, 471, 87, 88, true, "of", "of"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485670, 5881896997581907497, 18446744073709551615, 18446744073709551615, 537, 539, 537, 539, 100, 101, true, "of", "of"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485678, 5881897000469726729, 18446744073709551615, 18446744073709551615, 646, 648, 646, 648, 119, 120, true, "on", "on"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106351438779293396, 11391757905802355543, 18446744073709551615, 18446744073709551615, 667, 674, 667, 674, 123, 125, true, "For the", "For the"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 8106342927224204628, 1463267918920609598, 18446744073709551615, 18446744073709551615, 683, 690, 683, 690, 126, 128, true, "of this", "of this"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 389609625620237736, 16539075808312872292, 18446744073709551615, 18446744073709551615, 809, 813, 809, 813, 149, 151, true, "of a", "of a"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 329104161828310801, 6278535120429453110, 18446744073709551615, 18446744073709551615, 883, 888, 883, 888, 160, 162, true, "in an", "in an"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974655770, 18446744073709551615, 18446744073709551615, 79, 81, 79, 81, 13, 14, true, "to", "to"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974655477, 18446744073709551615, 18446744073709551615, 100, 102, 100, 102, 17, 18, true, "to", "to"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974647689, 18446744073709551615, 18446744073709551615, 205, 207, 205, 207, 39, 40, true, "to", "to"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974660107, 18446744073709551615, 18446744073709551615, 277, 279, 277, 279, 52, 53, true, "to", "to"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974652202, 18446744073709551615, 18446744073709551615, 401, 403, 401, 403, 72, 73, true, "to", "to"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974633331, 18446744073709551615, 18446744073709551615, 492, 494, 492, 494, 92, 93, true, "to", "to"], ["conn", "single-conn", 4994395008195818594, "TEXT", "#/texts/23", 1.0, 15441160910541485865, 5881896918974643919, 18446744073709551615, 18446744073709551615, 515, 517, 515, 517, 96, 97, true, "to", "to"], ["numval", "fval", 4203835122307823579, "TEXT", "#/texts/24", 1.0, 12178341415896435198, 13889986935520845304, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "3.1", "3.1"], ["parenthesis", "round brackets", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 8772307426918626408, 4362940361408460484, 18446744073709551615, 18446744073709551615, 152, 230, 152, 230, 27, 42, true, "(scanned or programmatically created PDF, bitmap images, Word documents, etc.)", "(scanned or programmatically created PDF, bitmap images, Word documents, etc.)"], ["parenthesis", "round brackets", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 10173882594842541874, 14046368793434208065, 18446744073709551615, 18446744073709551615, 261, 279, 261, 279, 47, 53, true, "(e.g. JSON or XML)", "(e.g. JSON or XML)"], ["expression", "common", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 15441160910541487324, 1240198994439187449, 18446744073709551615, 18446744073709551615, 262, 266, 262, 266, 48, 49, true, "eg", "e.g."], ["expression", "common", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 12178341415895450733, 13632414567216290152, 18446744073709551615, 18446744073709551615, 225, 229, 225, 229, 40, 41, true, "etc", "etc."], ["sentence", "", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 12910497814715733387, 16125145498285443347, 18446744073709551615, 18446744073709551615, 0, 280, 0, 280, 0, 54, true, "Our platform implements a processing pipeline to ingest, manage, parse, annotate, train and eventually convert the data contained in any type of format (scanned or programmatically created PDF, bitmap images, Word documents, etc.) into a structured data format (e.g. JSON or XML).", "Our platform implements a processing pipeline to ingest, manage, parse, annotate, train and eventually convert the data contained in any type of format (scanned or programmatically created PDF, bitmap images, Word documents, etc.) into a structured data format (e.g. JSON or XML)."], ["term", "enum-term-mark-4", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 11674491770136657522, 9761837904635132795, 18446744073709551615, 18446744073709551615, 267, 278, 267, 278, 49, 52, true, "JSON or XML", "JSON or XML"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 15641968049220564486, 3104330311046848316, 18446744073709551615, 18446744073709551615, 26, 45, 26, 45, 4, 6, true, "processing pipeline", "processing pipeline"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 7850715239909526655, 10564541552852027327, 18446744073709551615, 18446744073709551615, 194, 207, 194, 207, 34, 36, true, "bitmap images", "bitmap images"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 3850832741059734738, 14883287060771742157, 18446744073709551615, 18446744073709551615, 209, 223, 209, 223, 37, 39, true, "Word documents", "Word documents"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 18077282349116974352, 7474364917469372700, 18446744073709551615, 18446744073709551615, 238, 260, 238, 260, 44, 47, true, "structured data format", "structured data format"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 8106398377759843082, 9276542563935260018, 18446744073709551615, 18446744073709551615, 262, 271, 262, 271, 48, 50, true, "eg JSON", "e.g. JSON"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 14814125365076808131, 4535720155345020195, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 1, 2, true, "platform", "platform"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 329104161667983915, 17161321767071624063, 18446744073709551615, 18446744073709551615, 65, 70, 65, 70, 11, 12, true, "parse", "parse"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 14650452911780017077, 6687850948757700034, 18446744073709551615, 18446744073709551615, 72, 80, 72, 80, 13, 14, true, "annotate", "annotate"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 329104159241569908, 14824530430944127370, 18446744073709551615, 18446744073709551615, 82, 87, 82, 87, 15, 16, true, "train", "train"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 389609625696431489, 1561240778132274355, 18446744073709551615, 18446744073709551615, 115, 119, 115, 119, 20, 21, true, "data", "data"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 389609625631434316, 1567151555792390751, 18446744073709551615, 18446744073709551615, 137, 141, 137, 141, 24, 25, true, "type", "type"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 16381206548538896813, 9505947236812447429, 18446744073709551615, 18446744073709551615, 145, 151, 145, 151, 26, 27, true, "format", "format"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 12178341415896289890, 13633304747499991014, 18446744073709551615, 18446744073709551615, 189, 192, 189, 192, 32, 33, true, "PDF", "PDF"], ["term", "single-term", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 12178341415895541463, 13632419820854921580, 18446744073709551615, 18446744073709551615, 275, 278, 275, 278, 51, 52, true, "XML", "XML"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 5584174880054122043, 18300515772242224763, 18446744073709551615, 18446744073709551615, 13, 23, 13, 23, 2, 3, true, "implements", "implements"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 16381206560503286032, 10754571117046369273, 18446744073709551615, 18446744073709551615, 49, 55, 49, 55, 7, 8, true, "ingest", "ingest"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 16381206594265787492, 15598034062765456434, 18446744073709551615, 18446744073709551615, 57, 63, 57, 63, 9, 10, true, "manage", "manage"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 8106398484416229602, 1704556939207365093, 18446744073709551615, 18446744073709551615, 103, 110, 103, 110, 18, 19, true, "convert", "convert"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 5947879769709188533, 17603125067012762843, 18446744073709551615, 18446744073709551615, 120, 129, 120, 129, 21, 22, true, "contained", "contained"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 8106478648743879659, 297744879721386987, 18446744073709551615, 18446744073709551615, 153, 160, 153, 160, 28, 29, true, "scanned", "scanned"], ["verb", "single-verb", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 8106398513399298373, 11748900618323850732, 18446744073709551615, 18446744073709551615, 181, 188, 181, 188, 31, 32, true, "created", "created"], ["conn", "single-conn", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 16381206560519231294, 10754552323754925291, 18446744073709551615, 18446744073709551615, 130, 136, 130, 136, 22, 24, true, "in any", "in any"], ["conn", "single-conn", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 15441160910541485670, 1240196378682103108, 18446744073709551615, 18446744073709551615, 142, 144, 142, 144, 25, 26, true, "of", "of"], ["conn", "single-conn", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 16381206560517276114, 10755905799470375423, 18446744073709551615, 18446744073709551615, 231, 237, 231, 237, 42, 44, true, "into a", "into a"], ["conn", "single-conn", 13520362244078084911, "TEXT", "#/texts/25", 1.0, 15441160910541485865, 1240198973604396181, 18446744073709551615, 18446744073709551615, 46, 48, 46, 48, 6, 7, true, "to", "to"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235161, 16653745466189901500, 18446744073709551615, 18446744073709551615, 76, 77, 76, 77, 12, 13, true, "1", "1"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235161, 16653745466189901235, 18446744073709551615, 18446744073709551615, 80, 81, 80, 81, 15, 16, true, "1", "1"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235162, 16653745466240377931, 18446744073709551615, 18446744073709551615, 147, 148, 147, 148, 29, 30, true, "2", "2"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235163, 16653745466559202271, 18446744073709551615, 18446744073709551615, 208, 209, 208, 209, 40, 41, true, "3", "3"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235156, 16653745470875858812, 18446744073709551615, 18446744073709551615, 262, 263, 262, 263, 51, 52, true, "4", "4"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235157, 16653745466591923593, 18446744073709551615, 18446744073709551615, 299, 300, 299, 300, 60, 61, true, "5", "5"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235161, 16653745466190505939, 18446744073709551615, 18446744073709551615, 409, 410, 409, 410, 80, 81, true, "1", "1"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235156, 16653745470875849461, 18446744073709551615, 18446744073709551615, 412, 413, 412, 413, 82, 83, true, "4", "4"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235157, 16653745466591931154, 18446744073709551615, 18446744073709551615, 418, 419, 418, 419, 84, 85, true, "5", "5"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235162, 16653745466240134538, 18446744073709551615, 18446744073709551615, 558, 559, 558, 559, 107, 108, true, "2", "2"], ["numval", "ival", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17767354399704235163, 16653745466559183526, 18446744073709551615, 18446744073709551615, 564, 565, 564, 565, 109, 110, true, "3", "3"], ["parenthesis", "reference", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415896395122, 9951911260307984303, 18446744073709551615, 18446744073709551615, 79, 82, 79, 82, 14, 17, true, "(1)", "(1)"], ["parenthesis", "reference", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415896395187, 9951911292760260815, 18446744073709551615, 18446744073709551615, 146, 149, 146, 149, 28, 31, true, "(2)", "(2)"], ["parenthesis", "reference", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415896394992, 9951911291908364343, 18446744073709551615, 18446744073709551615, 207, 210, 207, 210, 39, 42, true, "(3)", "(3)"], ["parenthesis", "reference", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415896395057, 9951911280683983222, 18446744073709551615, 18446744073709551615, 261, 264, 261, 264, 50, 53, true, "(4)", "(4)"], ["parenthesis", "reference", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415896395383, 9951911289823187961, 18446744073709551615, 18446744073709551615, 298, 301, 298, 301, 59, 62, true, "(5)", "(5)"], ["expression", "word-concatenation", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 3753411203337468488, 9177120008899156041, 18446744073709551615, 18446744073709551615, 174, 186, 174, 186, 35, 36, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 7955973489010605030, 9831476142603144664, 18446744073709551615, 18446744073709551615, 463, 480, 463, 480, 94, 95, true, "template-specific", "template-specific"], ["expression", "word-concatenation", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 3753411203337468488, 9177120008899393700, 18446744073709551615, 18446744073709551615, 594, 606, 594, 606, 116, 117, true, "ground-truth", "ground-truth"], ["expression", "wtoken-concatenation", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14638289750758744304, 11302210173902484934, 18446744073709551615, 18446744073709551615, 288, 296, 288, 296, 57, 58, true, "model(s)", "model(s)"], ["expression", "wtoken-concatenation", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 1476026390672618576, 10107149952848687202, 18446744073709551615, 18446744073709551615, 317, 328, 317, 328, 64, 65, true, "document(s)", "document(s)"], ["sentence", "", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 17525205624056980079, 17040712729727507001, 18446744073709551615, 18446744073709551615, 0, 359, 0, 359, 0, 71, true, "This processing pipeline is formed by five components as depicted in Figure 1: (1) parsing of documents into an internal format optimised for ML, (2) Annotation of the label ground-truth in parsed documents (3) training ML models from the acquired annotations, (4) applying the custom ML model(s), (5) assembling the document(s) into a structured data format.", "This processing pipeline is formed by five components as depicted in Figure 1: (1) parsing of documents into an internal format optimised for ML, (2) Annotation of the label ground-truth in parsed documents (3) training ML models from the acquired annotations, (4) applying the custom ML model(s), (5) assembling the document(s) into a structured data format."], ["sentence", "", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 9237548311282946795, 9741605394929262654, 18446744073709551615, 18446744073709551615, 360, 456, 360, 456, 71, 92, true, "If a trained model is available, only components 1, 4 and 5 are needed to convert the documents.", "If a trained model is available, only components 1, 4 and 5 are needed to convert the documents."], ["sentence", "", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6944966380083130595, 5696095880668338595, 18446744073709551615, 18446744073709551615, 457, 631, 457, 631, 92, 122, true, "If no template-specific machine learned model is available yet, we provide two additional components 2 and 3, that allow users to gather ground-truth and train custom models.", "If no template-specific machine learned model is available yet, we provide two additional components 2 and 3, that allow users to gather ground-truth and train custom models."], ["sentence", "", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14539564690542389125, 5540857596080310327, 18446744073709551615, 18446744073709551615, 632, 799, 632, 799, 122, 153, true, "It is important to note that the platform comes with default models, so annotation and training are advised to retrieve the best quality output, yet they are optional.", "It is important to note that the platform comes with default models, so annotation and training are advised to retrieve the best quality output, yet they are optional."], ["term", "enum-term-mark-2", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 5647280319556365100, 12073018748361790650, 18446744073709551615, 18446744073709551615, 704, 727, 704, 727, 136, 139, true, "annotation and training", "annotation and training"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15641968049220564486, 15184810223045738752, 18446744073709551615, 18446744073709551615, 5, 24, 5, 24, 1, 3, true, "processing pipeline", "processing pipeline"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15437135447951449642, 4518564177144642967, 18446744073709551615, 18446744073709551615, 112, 127, 112, 127, 22, 24, true, "internal format", "internal format"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 843540523508195469, 1009968651563304041, 18446744073709551615, 18446744073709551615, 168, 186, 168, 186, 34, 36, true, "label ground-truth", "label ground-truth"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15116355445244524512, 7416607603607839333, 18446744073709551615, 18446744073709551615, 190, 206, 190, 206, 37, 39, true, "parsed documents", "parsed documents"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 7507517937852582487, 6078294739813943992, 18446744073709551615, 18446744073709551615, 211, 229, 211, 229, 42, 45, true, "training ML models", "training ML models"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 2737429882297243447, 13059840081028432667, 18446744073709551615, 18446744073709551615, 278, 296, 278, 296, 55, 58, true, "custom ML model(s)", "custom ML model(s)"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 18077282349116974352, 2593464952453242974, 18446744073709551615, 18446744073709551615, 336, 358, 336, 358, 67, 70, true, "structured data format", "structured data format"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 7516486339055917967, 10294393908560702715, 18446744073709551615, 18446744073709551615, 365, 378, 365, 378, 73, 75, true, "trained model", "trained model"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15134872732861191546, 14564992985051477580, 18446744073709551615, 18446744073709551615, 393, 408, 393, 408, 78, 80, true, "only components", "only components"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 10030042203366342768, 783598633112047048, 18446744073709551615, 18446744073709551615, 463, 488, 463, 488, 94, 96, true, "template-specific machine", "template-specific machine"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 9482497685613336800, 7352283876107266683, 18446744073709551615, 18446744073709551615, 536, 557, 536, 557, 105, 107, true, "additional components", "additional components"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6402004976666964284, 6933100789420329365, 18446744073709551615, 18446744073709551615, 611, 630, 611, 630, 118, 121, true, "train custom models", "train custom models"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 1915006193249717419, 16014958744068977698, 18446744073709551615, 18446744073709551615, 685, 699, 685, 699, 132, 134, true, "default models", "default models"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 16984543413455913769, 17986818399034101927, 18446744073709551615, 18446744073709551615, 756, 775, 756, 775, 144, 147, true, "best quality output", "best quality output"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 2703018952916355661, 13861502101887545388, 18446744073709551615, 18446744073709551615, 43, 53, 43, 53, 7, 8, true, "components", "components"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 16381206514091025767, 4885222411348045849, 18446744073709551615, 18446744073709551615, 69, 75, 69, 75, 11, 12, true, "Figure", "Figure"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 8106479143794098783, 2962541264367803251, 18446744073709551615, 18446744073709551615, 83, 90, 83, 90, 17, 18, true, "parsing", "parsing"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6167933651658664291, 11376723037997544694, 18446744073709551615, 18446744073709551615, 94, 103, 94, 103, 19, 20, true, "documents", "documents"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541480579, 13421765588580713285, 18446744073709551615, 18446744073709551615, 142, 144, 142, 144, 26, 27, true, "ML", "ML"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 7552769977713241504, 5898699418083397020, 18446744073709551615, 18446744073709551615, 150, 160, 150, 160, 31, 32, true, "Annotation", "Annotation"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 1037258523789473353, 7580283139140321310, 18446744073709551615, 18446744073709551615, 248, 259, 248, 259, 48, 49, true, "annotations", "annotations"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 1476026390672618576, 10107149952848687202, 18446744073709551615, 18446744073709551615, 317, 328, 317, 328, 64, 65, true, "document(s)", "document(s)"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6167933651658664291, 11376723037997543326, 18446744073709551615, 18446744073709551615, 446, 455, 446, 455, 90, 91, true, "documents", "documents"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 329104161610777240, 2839511462594090084, 18446744073709551615, 18446744073709551615, 497, 502, 497, 502, 97, 98, true, "model", "model"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 329104159157820437, 8191643342315584183, 18446744073709551615, 18446744073709551615, 578, 583, 578, 583, 113, 114, true, "users", "users"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14814125365076808131, 9337596647297514490, 18446744073709551615, 18446744073709551615, 665, 673, 665, 673, 129, 130, true, "platform", "platform"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15359807916847495711, 1545007371912531857, 18446744073709551615, 18446744073709551615, 704, 714, 704, 714, 136, 137, true, "annotation", "annotation"], ["term", "single-term", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14634153919632515335, 1746269937003899312, 18446744073709551615, 18446744073709551615, 719, 727, 719, 727, 138, 139, true, "training", "training"], ["verb", "compound-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6181919778911650791, 1457658887135264263, 18446744073709551615, 18446744073709551615, 25, 34, 25, 34, 3, 5, true, "is formed", "is formed"], ["verb", "compound-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6594130350774790903, 16739553497580608789, 18446744073709551615, 18446744073709551615, 420, 441, 420, 441, 85, 89, true, "are needed to convert", "are needed to convert"], ["verb", "compound-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 709385872197115477, 4590124741288282676, 18446744073709551615, 18446744073709551615, 728, 751, 728, 751, 139, 143, true, "are advised to retrieve", "are advised to retrieve"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14652261792406569736, 13894514311356629389, 18446744073709551615, 18446744073709551615, 57, 65, 57, 65, 9, 10, true, "depicted", "depicted"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 6167805666845656469, 5420622691594517541, 18446744073709551615, 18446744073709551615, 128, 137, 128, 137, 24, 25, true, "optimised", "optimised"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14650442552334623127, 8421427627003326802, 18446744073709551615, 18446744073709551615, 239, 247, 239, 247, 47, 48, true, "acquired", "acquired"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14650448030444381648, 18239180096597721029, 18446744073709551615, 18446744073709551615, 265, 273, 265, 273, 53, 54, true, "applying", "applying"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 5615554093848987331, 12626781818686368526, 18446744073709551615, 18446744073709551615, 302, 312, 302, 312, 62, 63, true, "assembling", "assembling"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541486535, 13421773584933919664, 18446744073709551615, 18446744073709551615, 379, 381, 379, 381, 75, 76, true, "is", "is"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 8106342444693204894, 10008695562509568866, 18446744073709551615, 18446744073709551615, 489, 496, 489, 496, 96, 97, true, "learned", "learned"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541486535, 13421773584933943769, 18446744073709551615, 18446744073709551615, 503, 505, 503, 505, 98, 99, true, "is", "is"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 8106476000214061408, 15857118004815849675, 18446744073709551615, 18446744073709551615, 524, 531, 524, 531, 103, 104, true, "provide", "provide"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 329104159171192019, 8192566990508309340, 18446744073709551615, 18446744073709551615, 572, 577, 572, 577, 112, 113, true, "allow", "allow"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 16381206562264646932, 1960462029052909015, 18446744073709551615, 18446744073709551615, 587, 593, 587, 593, 115, 116, true, "gather", "gather"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541486535, 13421773584933772912, 18446744073709551615, 18446744073709551615, 635, 637, 635, 637, 123, 124, true, "is", "is"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 389609625621163440, 7945956557482740818, 18446744073709551615, 18446744073709551615, 651, 655, 651, 655, 126, 127, true, "note", "note"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 329104161555284808, 2777260180261771726, 18446744073709551615, 18446744073709551615, 674, 679, 674, 679, 130, 131, true, "comes", "comes"], ["verb", "single-verb", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415895564896, 9951910993348590293, 18446744073709551615, 18446744073709551615, 786, 789, 786, 789, 150, 151, true, "are", "are"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541486989, 13421773491343548165, 18446744073709551615, 18446744073709551615, 35, 37, 35, 37, 5, 6, true, "by", "by"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541487053, 13421773623307651151, 18446744073709551615, 18446744073709551615, 54, 56, 54, 56, 8, 9, true, "as", "as"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541486538, 13421773580625251915, 18446744073709551615, 18446744073709551615, 66, 68, 66, 68, 10, 11, true, "in", "in"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541485670, 13421765268538655208, 18446744073709551615, 18446744073709551615, 91, 93, 91, 93, 18, 19, true, "of", "of"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 8106398347393280713, 1525760500098149661, 18446744073709551615, 18446744073709551615, 104, 111, 104, 111, 20, 22, true, "into an", "into an"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 12178341415895625940, 9951862331881338732, 18446744073709551615, 18446744073709551615, 138, 141, 138, 141, 25, 26, true, "for", "for"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 16381206565712212855, 8780603472327557404, 18446744073709551615, 18446744073709551615, 161, 167, 161, 167, 32, 34, true, "of the", "of the"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541486538, 13421773580625308724, 18446744073709551615, 18446744073709551615, 187, 189, 187, 189, 36, 37, true, "in", "in"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14637917359887717745, 4284944502519852047, 18446744073709551615, 18446744073709551615, 230, 238, 230, 238, 45, 47, true, "from the", "from the"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 16381206560517276114, 690701761066186570, 18446744073709551615, 18446744073709551615, 329, 335, 329, 335, 65, 67, true, "into a", "into a"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 389609625538087702, 7941525277449308498, 18446744073709551615, 18446744073709551615, 360, 364, 360, 364, 71, 73, true, "If a", "If a"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 329104161875330307, 1829521586099700438, 18446744073709551615, 18446744073709551615, 457, 462, 457, 462, 92, 94, true, "If no", "If no"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 14634130761162415388, 3953663377257032858, 18446744073709551615, 18446744073709551615, 656, 664, 656, 664, 127, 129, true, "that the", "that the"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 389609625618037948, 7945948305478382603, 18446744073709551615, 18446744073709551615, 680, 684, 680, 684, 131, 132, true, "with", "with"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541485930, 13421773574584698862, 18446744073709551615, 18446744073709551615, 701, 703, 701, 703, 135, 136, true, "so", "so"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541485865, 13421773580077175219, 18446744073709551615, 18446744073709551615, 431, 433, 431, 433, 87, 88, true, "to", "to"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541485865, 13421773580077214041, 18446744073709551615, 18446744073709551615, 584, 586, 584, 586, 114, 115, true, "to", "to"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541485865, 13421773580077226473, 18446744073709551615, 18446744073709551615, 648, 650, 648, 650, 125, 126, true, "to", "to"], ["conn", "single-conn", 1749622367305947670, "TEXT", "#/texts/26", 1.0, 15441160910541485865, 13421773580077221700, 18446744073709551615, 18446744073709551615, 740, 742, 740, 742, 141, 142, true, "to", "to"], ["sentence", "", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 10456209429844276823, 2422727482681724013, 18446744073709551615, 18446744073709551615, 0, 93, 0, 93, 0, 19, true, "Let us now elaborate on what each of the five components deliver in the rest of this section.", "Let us now elaborate on what each of the five components deliver in the rest of this section."], ["term", "single-term", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 2703018952916355661, 9708196146755666277, 18446744073709551615, 18446744073709551615, 46, 56, 46, 56, 10, 11, true, "components", "components"], ["term", "single-term", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 389609625632792118, 4197781341925173653, 18446744073709551615, 18446744073709551615, 72, 76, 72, 76, 14, 15, true, "rest", "rest"], ["term", "single-term", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 8106478708629288965, 3081332499878802976, 18446744073709551615, 18446744073709551615, 85, 92, 85, 92, 17, 18, true, "section", "section"], ["verb", "single-verb", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 12178341415896275389, 18145395793309844548, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "Let", "Let"], ["verb", "single-verb", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 6165947860431677587, 15989194838257465182, 18446744073709551615, 18446744073709551615, 11, 20, 11, 20, 3, 4, true, "elaborate", "elaborate"], ["verb", "single-verb", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 8106396542836595001, 13259224101982934496, 18446744073709551615, 18446744073709551615, 57, 64, 57, 64, 11, 12, true, "deliver", "deliver"], ["conn", "single-conn", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 15441160910541485678, 6207367339164111129, 18446744073709551615, 18446744073709551615, 21, 23, 21, 23, 4, 5, true, "on", "on"], ["conn", "single-conn", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 2283199098925706958, 7907397599804430129, 18446744073709551615, 18446744073709551615, 29, 40, 29, 40, 6, 9, true, "each of the", "each of the"], ["conn", "single-conn", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 16381206560518651853, 14564747107754323096, 18446744073709551615, 18446744073709551615, 65, 71, 65, 71, 12, 14, true, "in the", "in the"], ["conn", "single-conn", 11083736481641202939, "TEXT", "#/texts/27", 1.0, 8106342927224204628, 9005811432893922380, 18446744073709551615, 18446744073709551615, 77, 84, 77, 84, 15, 17, true, "of this", "of this"], ["numval", "fval", 15403141463083979171, "TEXT", "#/texts/28", 1.0, 12178341415896435199, 15281646599530735231, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "3.2", "3.2"], ["numval", "ival", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 17767354399704235162, 1424727048414549116, 18446744073709551615, 18446744073709551615, 590, 591, 590, 591, 108, 109, true, "2", "2"], ["expression", "word-concatenation", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 7265216487325416347, 18237727768635437499, 18446744073709551615, 18446744073709551615, 85, 96, 85, 96, 14, 15, true, "non-trivial", "non-trivial"], ["expression", "word-concatenation", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 2334432749592536458, 15832688675868318655, 18446744073709551615, 18446744073709551615, 134, 147, 134, 147, 23, 24, true, "text-snippets", "text-snippets"], ["expression", "word-concatenation", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 2334432749592536458, 15832688675868348537, 18446744073709551615, 18446744073709551615, 237, 250, 237, 250, 43, 44, true, "text-snippets", "text-snippets"], ["sentence", "", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 17930569148916488857, 5505536758637995970, 18446744073709551615, 18446744073709551615, 0, 177, 0, 177, 0, 31, true, "In the parsing component of the pipeline, we solve the following straightforward but non-trivial task: Find the bounding boxes of all text-snippets that appear on each PDF page.", "In the parsing component of the pipeline, we solve the following straightforward but non-trivial task: Find the bounding boxes of all text-snippets that appear on each PDF page."], ["sentence", "", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 5655297151045106724, 13262557470809669287, 18446744073709551615, 18446744073709551615, 178, 290, 178, 290, 31, 53, true, "For simplicity, we will refer to the bounding boxes of the text-snippets as cells in the remainder of the paper.", "For simplicity, we will refer to the bounding boxes of the text-snippets as cells in the remainder of the paper."], ["sentence", "", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14442010922272755354, 2304293999158723749, 18446744073709551615, 18446744073709551615, 291, 350, 291, 350, 53, 65, true, "There are two reasons why we are interested in these cells.", "There are two reasons why we are interested in these cells."], ["sentence", "", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 7272978202358758349, 9961870530541141609, 18446744073709551615, 18446744073709551615, 351, 501, 351, 501, 65, 91, true, "First, they provide us with the crucial geometric features which are later used in the machine learning models to determine the layout semantic label.", "First, they provide us with the crucial geometric features which are later used in the machine learning models to determine the layout semantic label."], ["sentence", "", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 13416184063657893995, 5559344282962348298, 18446744073709551615, 18446744073709551615, 502, 579, 502, 579, 91, 106, true, "Second, the concept of a cell can be easily transferred to scanned documents.", "Second, the concept of a cell can be easily transferred to scanned documents."], ["sentence", "", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 1652403861242351933, 11439629794277621034, 18446744073709551615, 18446744073709551615, 580, 669, 580, 669, 106, 125, true, "In Figure 2, we show the cells obtained from an example PDF page after the parsing stage.", "In Figure 2, we show the cells obtained from an example PDF page after the parsing stage."], ["term", "enum-term-mark-1", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14297716267651363259, 16404864194219706557, 18446744073709551615, 18446744073709551615, 65, 101, 65, 101, 12, 16, true, "straightforward but non-trivial task", "straightforward but non-trivial task"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 539708993415547268, 4705216367729054225, 18446744073709551615, 18446744073709551615, 7, 24, 7, 24, 2, 4, true, "parsing component", "parsing component"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 12768482189442203961, 3521963780408719451, 18446744073709551615, 18446744073709551615, 85, 101, 85, 101, 14, 16, true, "non-trivial task", "non-trivial task"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14650937348812924036, 7972671100203928321, 18446744073709551615, 18446744073709551615, 168, 176, 168, 176, 28, 30, true, "PDF page", "PDF page"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 13453730679222803232, 6614428335010656559, 18446744073709551615, 18446744073709551615, 383, 409, 383, 409, 72, 75, true, "crucial geometric features", "crucial geometric features"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 2317020437411802284, 13633764960020459386, 18446744073709551615, 18446744073709551615, 479, 500, 479, 500, 87, 90, true, "layout semantic label", "layout semantic label"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 6638406718593592815, 12534367881860700133, 18446744073709551615, 18446744073709551615, 628, 644, 628, 644, 117, 120, true, "example PDF page", "example PDF page"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 17743353868824691761, 7006821731505867769, 18446744073709551615, 18446744073709551615, 655, 668, 655, 668, 122, 124, true, "parsing stage", "parsing stage"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14814125852840540191, 10333222397369262494, 18446744073709551615, 18446744073709551615, 32, 40, 32, 40, 6, 7, true, "pipeline", "pipeline"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104159325617355, 2319011674175919451, 18446744073709551615, 18446744073709551615, 121, 126, 121, 126, 20, 21, true, "boxes", "boxes"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 2334432749592536458, 15832688675868318655, 18446744073709551615, 18446744073709551615, 134, 147, 134, 147, 23, 24, true, "text-snippets", "text-snippets"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14087388443212978183, 8710122583887711946, 18446744073709551615, 18446744073709551615, 182, 192, 182, 192, 32, 33, true, "simplicity", "simplicity"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104159325617355, 2319011674175930307, 18446744073709551615, 18446744073709551615, 224, 229, 224, 229, 40, 41, true, "boxes", "boxes"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 2334432749592536458, 15832688675868348537, 18446744073709551615, 18446744073709551615, 237, 250, 237, 250, 43, 44, true, "text-snippets", "text-snippets"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104161531686411, 13181154022391063384, 18446744073709551615, 18446744073709551615, 254, 259, 254, 259, 45, 46, true, "cells", "cells"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 6165970943308474352, 2869268154850983474, 18446744073709551615, 18446744073709551615, 267, 276, 267, 276, 48, 49, true, "remainder", "remainder"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104161668023890, 13177965549816359198, 18446744073709551615, 18446744073709551615, 284, 289, 284, 289, 51, 52, true, "paper", "paper"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 8106478449187889361, 2785113865213804946, 18446744073709551615, 18446744073709551615, 305, 312, 305, 312, 56, 57, true, "reasons", "reasons"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104161531686411, 13181154022391052173, 18446744073709551615, 18446744073709551615, 344, 349, 344, 349, 63, 64, true, "cells", "cells"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 8106464587473865376, 17953291432149571565, 18446744073709551615, 18446744073709551615, 438, 445, 438, 445, 81, 82, true, "machine", "machine"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206567230470443, 12165568246676867373, 18446744073709551615, 18446744073709551615, 455, 461, 455, 461, 83, 84, true, "models", "models"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 8106398484423890147, 17423301407248305912, 18446744073709551615, 18446744073709551615, 514, 521, 514, 521, 94, 95, true, "concept", "concept"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 389609625696024605, 9216907839507590200, 18446744073709551615, 18446744073709551615, 527, 531, 527, 531, 97, 98, true, "cell", "cell"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 6167933651658664291, 13123759922164485441, 18446744073709551615, 18446744073709551615, 569, 578, 569, 578, 104, 105, true, "documents", "documents"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206514091025767, 13570160145101069077, 18446744073709551615, 18446744073709551615, 583, 589, 583, 589, 107, 108, true, "Figure", "Figure"], ["term", "single-term", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104161531686411, 13181154022391035920, 18446744073709551615, 18446744073709551615, 605, 610, 605, 610, 113, 114, true, "cells", "cells"], ["verb", "compound-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 4427434396064538754, 12127654283877660135, 18446744073709551615, 18446744073709551615, 197, 210, 197, 210, 35, 38, true, "will refer to", "will refer to"], ["verb", "compound-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 6724950217998665324, 17396052864677910526, 18446744073709551615, 18446744073709551615, 416, 430, 416, 430, 76, 79, true, "are later used", "are later used"], ["verb", "compound-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 10525416859123113492, 14679514170366798433, 18446744073709551615, 18446744073709551615, 532, 568, 532, 568, 98, 104, true, "can be easily transferred to scanned", "can be easily transferred to scanned"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 329104161785912251, 13150765894503023181, 18446744073709551615, 18446744073709551615, 45, 50, 45, 50, 9, 10, true, "solve", "solve"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 6187675911162887333, 9639461483670525942, 18446744073709551615, 18446744073709551615, 55, 64, 55, 64, 11, 12, true, "following", "following"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 389609625538336045, 8446862929409352327, 18446744073709551615, 18446744073709551615, 103, 107, 103, 107, 17, 18, true, "Find", "Find"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14652253380850532610, 107046829378636663, 18446744073709551615, 18446744073709551615, 112, 120, 112, 120, 19, 20, true, "bounding", "bounding"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206574684919940, 5616786163860063725, 18446744073709551615, 18446744073709551615, 153, 159, 153, 159, 25, 26, true, "appear", "appear"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14652253380850532610, 107046829378695267, 18446744073709551615, 18446744073709551615, 215, 223, 215, 223, 39, 40, true, "bounding", "bounding"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 12178341415895564896, 14915893524086030278, 18446744073709551615, 18446744073709551615, 297, 300, 297, 300, 54, 55, true, "are", "are"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 12178341415895564896, 14915893524086019548, 18446744073709551615, 18446744073709551615, 320, 323, 320, 323, 59, 60, true, "are", "are"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 8106476000214061408, 4097314869569959791, 18446744073709551615, 18446744073709551615, 363, 370, 363, 370, 68, 69, true, "provide", "provide"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14639581097006750428, 8481916445851174431, 18446744073709551615, 18446744073709551615, 446, 454, 446, 454, 82, 83, true, "learning", "learning"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 6180169261969955564, 18129286266821845731, 18446744073709551615, 18446744073709551615, 465, 474, 465, 474, 85, 86, true, "determine", "determine"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 389609625741152123, 9215016449560601709, 18446744073709551615, 18446744073709551615, 596, 600, 596, 600, 111, 112, true, "show", "show"], ["verb", "single-verb", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14814126654807168093, 5783395789111073227, 18446744073709551615, 18446744073709551615, 611, 619, 611, 619, 114, 115, true, "obtained", "obtained"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 3083497604064482481, 11203131312199197259, 18446744073709551615, 18446744073709551615, 324, 337, 324, 337, 60, 62, true, "interested in", "interested in"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16380809977974811061, 446265425895612346, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 2, true, "In the", "In the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206565712212855, 12113567223054660961, 18446744073709551615, 18446744073709551615, 25, 31, 25, 31, 4, 6, true, "of the", "of the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206565712007226, 12113589112466741738, 18446744073709551615, 18446744073709551615, 127, 133, 127, 133, 21, 23, true, "of all", "of all"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 8106342614185119603, 14378991548749122310, 18446744073709551615, 18446744073709551615, 160, 167, 160, 167, 26, 28, true, "on each", "on each"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 12178341415896108722, 14915809969954693677, 18446744073709551615, 18446744073709551615, 178, 181, 178, 181, 31, 32, true, "For", "For"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206565712212855, 12113567223054649064, 18446744073709551615, 18446744073709551615, 230, 236, 230, 236, 41, 43, true, "of the", "of the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 15441160910541487053, 14737048381530263484, 18446744073709551615, 18446744073709551615, 251, 253, 251, 253, 44, 45, true, "as", "as"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206560518651853, 12195079447811016074, 18446744073709551615, 18446744073709551615, 260, 266, 260, 266, 46, 48, true, "in the", "in the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206565712212855, 12113567223054608798, 18446744073709551615, 18446744073709551615, 277, 283, 277, 283, 49, 51, true, "of the", "of the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 14638857868319795209, 699324563274466024, 18446744073709551615, 18446744073709551615, 374, 382, 374, 382, 70, 72, true, "with the", "with the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206560518651853, 12195079447810969129, 18446744073709551615, 18446744073709551615, 431, 437, 431, 437, 79, 81, true, "in the", "in the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 389609625620237736, 9181237261281377861, 18446744073709551615, 18446744073709551615, 522, 526, 522, 526, 95, 97, true, "of a", "of a"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 15441160910541480354, 14736971653023235653, 18446744073709551615, 18446744073709551615, 580, 582, 580, 582, 106, 107, true, "In", "In"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 8106397740404304256, 9502999072153915614, 18446744073709551615, 18446744073709551615, 620, 627, 620, 627, 115, 117, true, "from an", "from an"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 5948794771153674476, 3941442301932288916, 18446744073709551615, 18446744073709551615, 645, 654, 645, 654, 120, 122, true, "after the", "after the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 16381206519425733256, 11107177216059411514, 18446744073709551615, 18446744073709551615, 208, 214, 208, 214, 37, 39, true, "to the", "to the"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 15441160910541485865, 14736949766705825112, 18446744073709551615, 18446744073709551615, 462, 464, 462, 464, 84, 85, true, "to", "to"], ["conn", "single-conn", 12234429517419341922, "TEXT", "#/texts/29", 1.0, 15441160910541485865, 14736949766705835136, 18446744073709551615, 18446744073709551615, 558, 560, 558, 560, 102, 103, true, "to", "to"], ["numval", "ival", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 17767354399704235152, 12765861062670798554, 18446744073709551615, 18446744073709551615, 264, 265, 264, 265, 51, 52, true, "8", "8"], ["parenthesis", "round brackets", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 17960842653009747058, 2579946755923203289, 18446744073709551615, 18446744073709551615, 572, 600, 572, 600, 107, 115, true, "(e.g. the width of the cell)", "(e.g. the width of the cell)"], ["expression", "common", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541487324, 9140229821694613215, 18446744073709551615, 18446744073709551615, 573, 577, 573, 577, 108, 109, true, "eg", "e.g."], ["expression", "word-concatenation", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 17168373465524353870, 1775550960674726092, 18446744073709551615, 18446744073709551615, 251, 263, 251, 263, 50, 51, true, "ISO-standard", "ISO-standard"], ["expression", "word-concatenation", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 5748925445660418888, 5099648152124005842, 18446744073709551615, 18446744073709551615, 505, 515, 505, 515, 96, 97, true, "text-lines", "text-lines"], ["sentence", "", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 7415923518961690554, 5436017382399562501, 18446744073709551615, 18446744073709551615, 0, 184, 0, 184, 0, 37, true, "While the task of finding the cells might appear intuitive from a conceptual point of view, it is not in practice, since there does not exist a unique, precise definition of the cells.", "While the task of finding the cells might appear intuitive from a conceptual point of view, it is not in practice, since there does not exist a unique, precise definition of the cells."], ["sentence", "", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 10722424685754320142, 14939850258700553466, 18446744073709551615, 18446744073709551615, 185, 349, 185, 349, 37, 68, true, "This lack of a precise definition has its origins not only in the ISO-standard 8 detailing the PDF document code but also in the variability of the quality of PDFs.", "This lack of a precise definition has its origins not only in the ISO-standard 8 detailing the PDF document code but also in the variability of the quality of PDFs."], ["sentence", "", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 5840331794895575041, 7522048939256453486, 18446744073709551615, 18446744073709551615, 350, 516, 350, 516, 68, 98, true, "Older PDFs which were created from scanned images using OCR typically return cells for each word, while more recent PDFs allow us to create cells for full text-lines.", "Older PDFs which were created from scanned images using OCR typically return cells for each word, while more recent PDFs allow us to create cells for full text-lines."], ["sentence", "", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 9909804561214722407, 7606701336647946079, 18446744073709551615, 18446744073709551615, 517, 672, 517, 672, 98, 126, true, "This variability in the geometric features of the cell (e.g. the width of the cell) can negatively impact the performance of later machine learning models.", "This variability in the geometric features of the cell (e.g. the width of the cell) can negatively impact the performance of later machine learning models."], ["sentence", "", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 1905260306212934301, 17027778861887240638, 18446744073709551615, 18446744073709551615, 673, 763, 673, 763, 126, 143, true, "As a consequence, we reduce the variability of the geometric features as much as possible.", "As a consequence, we reduce the variability of the geometric features as much as possible."], ["sentence", "", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 11883795114712972606, 1100089006714247884, 18446744073709551615, 18446744073709551615, 764, 900, 764, 900, 143, 166, true, "The more consistent and homogeneous the geometric features of a cell are, the better the machine learning algorithms can do predictions.", "The more consistent and homogeneous the geometric features of a cell are, the better the machine learning algorithms can do predictions."], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 14888806260649526283, 9888391296230812238, 18446744073709551615, 18446744073709551615, 66, 82, 66, 82, 12, 14, true, "conceptual point", "conceptual point"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15237688208174812943, 9169750940975380140, 18446744073709551615, 18446744073709551615, 152, 170, 152, 170, 31, 33, true, "precise definition", "precise definition"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15237688208174812943, 9169750940975379272, 18446744073709551615, 18446744073709551615, 200, 218, 200, 218, 41, 43, true, "precise definition", "precise definition"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 18333734320849547479, 14786376426466981623, 18446744073709551615, 18446744073709551615, 280, 297, 280, 297, 54, 57, true, "PDF document code", "PDF document code"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 1742535906543983437, 5107611858547030995, 18446744073709551615, 18446744073709551615, 350, 360, 350, 360, 68, 70, true, "Older PDFs", "Older PDFs"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 4980127422887365844, 7914224627465909624, 18446744073709551615, 18446744073709551615, 459, 470, 459, 470, 87, 89, true, "recent PDFs", "recent PDFs"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 17387068897999710340, 583199993496143951, 18446744073709551615, 18446744073709551615, 500, 515, 500, 515, 95, 97, true, "full text-lines", "full text-lines"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 19581948089354274, 11863368431354531962, 18446744073709551615, 18446744073709551615, 541, 559, 541, 559, 102, 104, true, "geometric features", "geometric features"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 10797576170574569798, 16421164876638798879, 18446744073709551615, 18446744073709551615, 642, 655, 642, 655, 121, 123, true, "later machine", "later machine"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 19581948089354274, 11863368431354539464, 18446744073709551615, 18446744073709551615, 724, 742, 724, 742, 136, 138, true, "geometric features", "geometric features"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 19581948089354274, 11863368431354518484, 18446744073709551615, 18446744073709551615, 804, 822, 804, 822, 149, 151, true, "geometric features", "geometric features"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625631210899, 286282282783107526, 18446744073709551615, 18446744073709551615, 10, 14, 10, 14, 2, 3, true, "task", "task"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104161531686411, 1686298631271692268, 18446744073709551615, 18446744073709551615, 30, 35, 30, 35, 6, 7, true, "cells", "cells"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625619349298, 153488826964012383, 18446744073709551615, 18446744073709551615, 86, 90, 86, 90, 15, 16, true, "view", "view"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 14814125472896938138, 13265343424957278224, 18446744073709551615, 18446744073709551615, 105, 113, 105, 113, 21, 22, true, "practice", "practice"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104161531686411, 1686298631271808079, 18446744073709551615, 18446744073709551615, 178, 183, 178, 183, 35, 36, true, "cells", "cells"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625633345913, 286327708023170250, 18446744073709551615, 18446744073709551615, 190, 194, 190, 194, 38, 39, true, "lack", "lack"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 8106342536556065951, 5101417539304616523, 18446744073709551615, 18446744073709551615, 227, 234, 227, 234, 45, 46, true, "origins", "origins"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 11600564911974996302, 9772943102206687174, 18446744073709551615, 18446744073709551615, 314, 325, 314, 325, 61, 62, true, "variability", "variability"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 8106477781724488761, 8016550195084047451, 18446744073709551615, 18446744073709551615, 333, 340, 333, 340, 64, 65, true, "quality", "quality"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625526197745, 154178518742491636, 18446744073709551615, 18446744073709551615, 344, 348, 344, 348, 66, 67, true, "PDFs", "PDFs"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206560620045048, 10217682693125643903, 18446744073709551615, 18446744073709551615, 393, 399, 393, 399, 75, 76, true, "images", "images"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 12178341415896269066, 15144800269722583989, 18446744073709551615, 18446744073709551615, 406, 409, 406, 409, 77, 78, true, "OCR", "OCR"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104161531686411, 1686298631271792344, 18446744073709551615, 18446744073709551615, 427, 432, 427, 432, 80, 81, true, "cells", "cells"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625633592024, 286369527288178260, 18446744073709551615, 18446744073709551615, 442, 446, 442, 446, 83, 84, true, "word", "word"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104161531686411, 1686298631271788393, 18446744073709551615, 18446744073709551615, 490, 495, 490, 495, 93, 94, true, "cells", "cells"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 11600564911974996302, 9772943102206677434, 18446744073709551615, 18446744073709551615, 522, 533, 522, 533, 99, 100, true, "variability", "variability"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625696024605, 274735343390871718, 18446744073709551615, 18446744073709551615, 567, 571, 567, 571, 106, 107, true, "cell", "cell"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104158766048883, 14200427917049610175, 18446744073709551615, 18446744073709551615, 582, 587, 582, 587, 110, 111, true, "width", "width"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625696024605, 274735343390873519, 18446744073709551615, 18446744073709551615, 595, 599, 595, 599, 113, 114, true, "cell", "cell"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 5731695876385560379, 8735798418445737053, 18446744073709551615, 18446744073709551615, 627, 638, 627, 638, 119, 120, true, "performance", "performance"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206567230470443, 10197774938990653233, 18446744073709551615, 18446744073709551615, 665, 671, 665, 671, 124, 125, true, "models", "models"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 2343822922798056892, 10154671440985252928, 18446744073709551615, 18446744073709551615, 678, 689, 678, 689, 128, 129, true, "consequence", "consequence"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 11600564911974996302, 9772943102206681686, 18446744073709551615, 18446744073709551615, 705, 716, 705, 716, 133, 134, true, "variability", "variability"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625696024605, 274735343390824345, 18446744073709551615, 18446744073709551615, 828, 832, 828, 832, 153, 154, true, "cell", "cell"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 8106464587473865376, 8005470758012666235, 18446744073709551615, 18446744073709551615, 853, 860, 853, 860, 159, 160, true, "machine", "machine"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15359670209433732834, 8546369363095933532, 18446744073709551615, 18446744073709551615, 870, 880, 870, 880, 161, 162, true, "algorithms", "algorithms"], ["term", "single-term", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15175963360124346573, 7425656004405084691, 18446744073709551615, 18446744073709551615, 888, 899, 888, 899, 164, 165, true, "predictions", "predictions"], ["verb", "compound-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206478391039341, 13319343947608985919, 18446744073709551615, 18446744073709551615, 95, 101, 95, 101, 18, 20, true, "is not", "is not"], ["verb", "compound-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 17508027506047556020, 2482579808046990290, 18446744073709551615, 18446744073709551615, 127, 141, 127, 141, 25, 28, true, "does not exist", "does not exist"], ["verb", "compound-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 17858840657736432000, 827456240175881986, 18446744073709551615, 18446744073709551615, 367, 379, 367, 379, 71, 73, true, "were created", "were created"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 8106397466565237467, 977130001411655379, 18446744073709551615, 18446744073709551615, 18, 25, 18, 25, 4, 5, true, "finding", "finding"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 1410392834682309464, 3495073499889577510, 18446744073709551615, 18446744073709551615, 36, 48, 36, 48, 7, 9, true, "might appear", "might appear"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 12178341415895601584, 15143512592534806650, 18446744073709551615, 18446744073709551615, 219, 222, 219, 222, 43, 44, true, "has", "has"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 6180169263126451304, 3119480438606465672, 18446744073709551615, 18446744073709551615, 266, 275, 266, 275, 52, 53, true, "detailing", "detailing"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 8106478648743879659, 18054348966917680372, 18446744073709551615, 18446744073709551615, 385, 392, 385, 392, 74, 75, true, "scanned", "scanned"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104159157798023, 1678901618548373231, 18446744073709551615, 18446744073709551615, 400, 405, 400, 405, 76, 77, true, "using", "using"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206521510867710, 13465346086607517925, 18446744073709551615, 18446744073709551615, 420, 426, 420, 426, 79, 80, true, "return", "return"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104159171192019, 1680748189061438627, 18446744073709551615, 18446744073709551615, 471, 476, 471, 476, 89, 90, true, "allow", "allow"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206532661480265, 9531737447789598314, 18446744073709551615, 18446744073709551615, 483, 489, 483, 489, 92, 93, true, "create", "create"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206560633513421, 9503604932247872739, 18446744073709551615, 18446744073709551615, 616, 622, 616, 622, 117, 118, true, "impact", "impact"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 14639581097006750428, 14675095568206484231, 18446744073709551615, 18446744073709551615, 656, 664, 656, 664, 123, 124, true, "learning", "learning"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206521531524134, 13473141336162655057, 18446744073709551615, 18446744073709551615, 694, 700, 694, 700, 131, 132, true, "reduce", "reduce"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 12178341415895564896, 15143520438392043936, 18446744073709551615, 18446744073709551615, 833, 836, 833, 836, 154, 155, true, "are", "are"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 14639581097006750428, 14675095568205014589, 18446744073709551615, 18446744073709551615, 861, 869, 861, 869, 160, 161, true, "learning", "learning"], ["verb", "single-verb", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206563385633981, 13002579886209109896, 18446744073709551615, 18446744073709551615, 881, 887, 881, 887, 162, 164, true, "can do", "can do"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16949988767738090709, 100229038272389, 18446744073709551615, 18446744073709551615, 49, 63, 49, 63, 9, 11, true, "intuitive from", "intuitive from"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 8106464529736241562, 1453137548743953724, 18446744073709551615, 18446744073709551615, 746, 753, 746, 753, 139, 141, true, "much as", "much as"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 6179252389649895475, 3082759261078903372, 18446744073709551615, 18446744073709551615, 0, 9, 0, 9, 0, 2, true, "While the", "While the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541485670, 9140198027864825824, 18446744073709551615, 18446744073709551615, 15, 17, 15, 17, 3, 4, true, "of", "of"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541485670, 9140198027864794615, 18446744073709551615, 18446744073709551615, 83, 85, 83, 85, 14, 15, true, "of", "of"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541486538, 9140197839759264413, 18446744073709551615, 18446744073709551615, 102, 104, 102, 104, 20, 21, true, "in", "in"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104161786618045, 1529671095071243222, 18446744073709551615, 18446744073709551615, 115, 120, 115, 120, 23, 24, true, "since", "since"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206565712212855, 10162434354564300945, 18446744073709551615, 18446744073709551615, 171, 177, 171, 177, 33, 35, true, "of the", "of the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625620237736, 274280443846053091, 18446744073709551615, 18446744073709551615, 195, 199, 195, 199, 39, 41, true, "of a", "of a"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206560518651853, 9445866209727956530, 18446744073709551615, 18446744073709551615, 244, 250, 244, 250, 48, 50, true, "in the", "in the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206560518651853, 9445866209727960386, 18446744073709551615, 18446744073709551615, 307, 313, 307, 313, 59, 61, true, "in the", "in the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206565712212855, 10162434354564328048, 18446744073709551615, 18446744073709551615, 326, 332, 326, 332, 62, 64, true, "of the", "of the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541485670, 9140198027864778039, 18446744073709551615, 18446744073709551615, 341, 343, 341, 343, 65, 66, true, "of", "of"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625697843734, 276176066640977335, 18446744073709551615, 18446744073709551615, 380, 384, 380, 384, 73, 74, true, "from", "from"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 14637917333167503367, 15104975758825844690, 18446744073709551615, 18446744073709551615, 433, 441, 433, 441, 81, 83, true, "for each", "for each"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 329104161580427521, 1571916512453351057, 18446744073709551615, 18446744073709551615, 448, 453, 448, 453, 85, 86, true, "while", "while"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 12178341415895625940, 15143513577680482188, 18446744073709551615, 18446744073709551615, 496, 499, 496, 499, 94, 95, true, "for", "for"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206560518651853, 9445866209727942525, 18446744073709551615, 18446744073709551615, 534, 540, 534, 540, 100, 102, true, "in the", "in the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206565712212855, 10162434354564801127, 18446744073709551615, 18446744073709551615, 560, 566, 560, 566, 104, 106, true, "of the", "of the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206564601699726, 9376311130415997675, 18446744073709551615, 18446744073709551615, 573, 581, 573, 581, 108, 110, true, "eg the", "e.g. the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206565712212855, 10162434354564803006, 18446744073709551615, 18446744073709551615, 588, 594, 588, 594, 111, 113, true, "of the", "of the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541485670, 9140198027863323620, 18446744073709551615, 18446744073709551615, 639, 641, 639, 641, 120, 121, true, "of", "of"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625539850184, 154652053038545723, 18446744073709551615, 18446744073709551615, 673, 677, 673, 677, 126, 128, true, "As a", "As a"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 16381206565712212855, 10162434354564302848, 18446744073709551615, 18446744073709551615, 717, 723, 717, 723, 134, 136, true, "of the", "of the"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 389609625620237736, 274280443846028590, 18446744073709551615, 18446744073709551615, 823, 827, 823, 827, 151, 153, true, "of a", "of a"], ["conn", "single-conn", 16957857111665886816, "TEXT", "#/texts/30", 1.0, 15441160910541485865, 9140198005523690763, 18446744073709551615, 18446744073709551615, 480, 482, 480, 482, 91, 92, true, "to", "to"], ["expression", "latex", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 389609625699793082, 10849477641302803979, 18446744073709551615, 18446744073709551615, 192, 198, 192, 198, 33, 34, true, "^{9}", "$^{9}$"], ["sentence", "", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 14537379500392520301, 2641874782821116556, 18446744073709551615, 18446744073709551615, 0, 124, 0, 124, 0, 22, true, "For programmatic PDFs, the text cells are contructed from raw streams of symbols and transforms defined in the PDF document.", "For programmatic PDFs, the text cells are contructed from raw streams of symbols and transforms defined in the PDF document."], ["sentence", "", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 13624401348528241810, 18138670151212489595, 18446744073709551615, 18446744073709551615, 125, 199, 125, 199, 22, 35, true, "This operation relies on the iterators provided by the QPDF library$^{9}$.", "This operation relies on the iterators provided by the QPDF library$^{9}$."], ["term", "enum-term-mark-3", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 4523841464557345199, 10114780374881260645, 18446744073709551615, 18446744073709551615, 73, 95, 73, 95, 13, 16, true, "symbols and transforms", "symbols and transforms"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 1490421477877365637, 10634695956169887731, 18446744073709551615, 18446744073709551615, 4, 21, 4, 21, 1, 3, true, "programmatic PDFs", "programmatic PDFs"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 5748925367544727060, 5318612030524804463, 18446744073709551615, 18446744073709551615, 27, 37, 27, 37, 5, 7, true, "text cells", "text cells"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 10366280871409328423, 6450002708232284703, 18446744073709551615, 18446744073709551615, 58, 69, 58, 69, 10, 12, true, "raw streams", "raw streams"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 12366808243217836777, 18339274156313290686, 18446744073709551615, 18446744073709551615, 111, 123, 111, 123, 19, 21, true, "PDF document", "PDF document"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 17527422690611097285, 6107149276644244418, 18446744073709551615, 18446744073709551615, 180, 192, 180, 192, 31, 33, true, "QPDF library", "QPDF library"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 8106478574083600801, 3492130479069597648, 18446744073709551615, 18446744073709551615, 73, 80, 73, 80, 13, 14, true, "symbols", "symbols"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 8619280146728881072, 14423298271510896494, 18446744073709551615, 18446744073709551615, 85, 95, 85, 95, 15, 16, true, "transforms", "transforms"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 6167836358624304835, 2001895887803008895, 18446744073709551615, 18446744073709551615, 130, 139, 130, 139, 23, 24, true, "operation", "operation"], ["term", "single-term", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 6182474587515713435, 3040902962194794247, 18446744073709551615, 18446744073709551615, 154, 163, 154, 163, 27, 28, true, "iterators", "iterators"], ["verb", "compound-verb", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 653298976799407280, 12994946537251491253, 18446744073709551615, 18446744073709551615, 38, 52, 38, 52, 7, 9, true, "are contructed", "are contructed"], ["verb", "single-verb", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 8106396543067771897, 8988266389465152017, 18446744073709551615, 18446744073709551615, 96, 103, 96, 103, 16, 17, true, "defined", "defined"], ["verb", "single-verb", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 16381206521530126984, 2128490878609892302, 18446744073709551615, 18446744073709551615, 140, 146, 140, 146, 24, 25, true, "relies", "relies"], ["verb", "single-verb", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 14814125838089603136, 14570552129528981767, 18446744073709551615, 18446744073709551615, 164, 172, 164, 172, 28, 29, true, "provided", "provided"], ["conn", "single-conn", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 12178341415896108722, 16716331678697369730, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "For", "For"], ["conn", "single-conn", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 389609625697843734, 10745918863008613894, 18446744073709551615, 18446744073709551615, 53, 57, 53, 57, 9, 10, true, "from", "from"], ["conn", "single-conn", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 15441160910541485670, 11782782313518536809, 18446744073709551615, 18446744073709551615, 70, 72, 70, 72, 12, 13, true, "of", "of"], ["conn", "single-conn", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 16381206560518651853, 13454833510749227092, 18446744073709551615, 18446744073709551615, 104, 110, 104, 110, 17, 19, true, "in the", "in the"], ["conn", "single-conn", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 16381206566339127348, 13538838978812801477, 18446744073709551615, 18446744073709551615, 147, 153, 147, 153, 25, 27, true, "on the", "on the"], ["conn", "single-conn", 10390915169360946497, "TEXT", "#/texts/31", 1.0, 16381206574363061705, 7182896789576417479, 18446744073709551615, 18446744073709551615, 173, 179, 173, 179, 29, 31, true, "by the", "by the"], ["expression", "word-concatenation", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 2334432749592536458, 10462994002415974044, 18446744073709551615, 18446744073709551615, 165, 178, 165, 178, 32, 33, true, "text-snippets", "text-snippets"], ["sentence", "", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15260949785571891965, 11653374250913568665, 18446744073709551615, 18446744073709551615, 0, 262, 0, 262, 0, 46, true, "For scanned PDFs, we use a two step approach to find the cells by first running all bitmap resources in the PDF through an OCR engine and then merging the extracted text-snippets from the images with the remaining cells from the programmatically created content.", "For scanned PDFs, we use a two step approach to find the cells by first running all bitmap resources in the PDF through an OCR engine and then merging the extracted text-snippets from the images with the remaining cells from the programmatically created content."], ["sentence", "", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 13844944822277961427, 5220212344413701116, 18446744073709551615, 18446744073709551615, 263, 432, 263, 432, 46, 77, true, "Eventually, all the created cells and line paths are stored in an internal JSON format, which also keeps references to the bitmap resources embedded in the PDF document.", "Eventually, all the created cells and line paths are stored in an internal JSON format, which also keeps references to the bitmap resources embedded in the PDF document."], ["sentence", "", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 17672617811638114041, 1159640313158568858, 18446744073709551615, 18446744073709551615, 433, 542, 433, 542, 77, 95, true, "From this point, all further processing does not need to distinguish between scanned or programmatic sources.", "From this point, all further processing does not need to distinguish between scanned or programmatic sources."], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 9943751231165233071, 2473120546975171959, 18446744073709551615, 18446744073709551615, 31, 44, 31, 44, 8, 10, true, "step approach", "step approach"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15277331178077245503, 8655695116780644511, 18446744073709551615, 18446744073709551615, 84, 100, 84, 100, 18, 20, true, "bitmap resources", "bitmap resources"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 1743069044297951691, 8965951185434506549, 18446744073709551615, 18446744073709551615, 123, 133, 123, 133, 25, 27, true, "OCR engine", "OCR engine"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 16095915465433192910, 1057695382242219584, 18446744073709551615, 18446744073709551615, 301, 311, 301, 311, 53, 55, true, "line paths", "line paths"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 1613146853262082611, 6945903350931267856, 18446744073709551615, 18446744073709551615, 329, 349, 329, 349, 59, 62, true, "internal JSON format", "internal JSON format"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15277331178077245503, 8655695116780655631, 18446744073709551615, 18446744073709551615, 386, 402, 386, 402, 69, 71, true, "bitmap resources", "bitmap resources"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 12366808243217836777, 7405717181425166168, 18446744073709551615, 18446744073709551615, 419, 431, 419, 431, 74, 76, true, "PDF document", "PDF document"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 1813666098981047843, 5197711836231568924, 18446744073709551615, 18446744073709551615, 454, 472, 454, 472, 82, 84, true, "further processing", "further processing"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 4748498094194401130, 132855827826017730, 18446744073709551615, 18446744073709551615, 521, 541, 521, 541, 92, 94, true, "programmatic sources", "programmatic sources"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 389609625526197745, 6232643682528103190, 18446744073709551615, 18446744073709551615, 12, 16, 12, 16, 2, 3, true, "PDFs", "PDFs"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 329104161531686411, 13473927785000648089, 18446744073709551615, 18446744073709551615, 57, 62, 57, 62, 13, 14, true, "cells", "cells"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 12178341415896289890, 8091364884506794024, 18446744073709551615, 18446744073709551615, 108, 111, 108, 111, 22, 23, true, "PDF", "PDF"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 2334432749592536458, 10462994002415974044, 18446744073709551615, 18446744073709551615, 165, 178, 165, 178, 32, 33, true, "text-snippets", "text-snippets"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 16381206560620045048, 9011858595433045724, 18446744073709551615, 18446744073709551615, 188, 194, 188, 194, 35, 36, true, "images", "images"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 329104161531686411, 13473927785000686753, 18446744073709551615, 18446744073709551615, 214, 219, 214, 219, 39, 40, true, "cells", "cells"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106398484416916345, 6493205953920844274, 18446744073709551615, 18446744073709551615, 254, 261, 254, 261, 44, 45, true, "content", "content"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 329104161531686411, 13473927785000641518, 18446744073709551615, 18446744073709551615, 291, 296, 291, 296, 51, 52, true, "cells", "cells"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15984565858548749625, 6555376916269954109, 18446744073709551615, 18446744073709551615, 368, 378, 368, 378, 66, 67, true, "references", "references"], ["term", "single-term", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 329104161594416377, 13421450643931759595, 18446744073709551615, 18446744073709551615, 443, 448, 443, 448, 79, 80, true, "point", "point"], ["verb", "compound-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15388942590337907789, 13245487767265512912, 18446744073709551615, 18446744073709551615, 312, 322, 312, 322, 55, 57, true, "are stored", "are stored"], ["verb", "compound-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 2326544351310328000, 11496667878093951528, 18446744073709551615, 18446744073709551615, 473, 501, 473, 501, 84, 89, true, "does not need to distinguish", "does not need to distinguish"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106478648743879659, 11927965358765640838, 18446744073709551615, 18446744073709551615, 4, 11, 4, 11, 1, 2, true, "scanned", "scanned"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 12178341415895516060, 8089169012284556182, 18446744073709551615, 18446744073709551615, 21, 24, 21, 24, 5, 6, true, "use", "use"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 389609625697824147, 6281299199008773227, 18446744073709551615, 18446744073709551615, 48, 52, 48, 52, 11, 12, true, "find", "find"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106478500389476193, 5862158711018795994, 18446744073709551615, 18446744073709551615, 72, 79, 72, 79, 16, 17, true, "running", "running"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106464574161696199, 1599318992587414525, 18446744073709551615, 18446744073709551615, 143, 150, 143, 150, 29, 30, true, "merging", "merging"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 6168374324562720592, 11340980888213609541, 18446744073709551615, 18446744073709551615, 155, 164, 155, 164, 31, 32, true, "extracted", "extracted"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 6165970943308974402, 6183362660277321571, 18446744073709551615, 18446744073709551615, 204, 213, 204, 213, 38, 39, true, "remaining", "remaining"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106398513399298373, 12729825352779269268, 18446744073709551615, 18446744073709551615, 246, 253, 246, 253, 43, 44, true, "created", "created"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106398513399298373, 12729825352779201453, 18446744073709551615, 18446744073709551615, 283, 290, 283, 290, 50, 51, true, "created", "created"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 329104158690196448, 12090438731660771062, 18446744073709551615, 18446744073709551615, 362, 367, 362, 367, 65, 66, true, "keeps", "keeps"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 14652256356231447381, 6547058476669491030, 18446744073709551615, 18446744073709551615, 403, 411, 403, 411, 71, 72, true, "embedded", "embedded"], ["verb", "single-verb", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106478648743879659, 11927965358765867932, 18446744073709551615, 18446744073709551615, 510, 517, 510, 517, 90, 91, true, "scanned", "scanned"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 12178341415896108722, 8091477407578352430, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "For", "For"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15441160910541486989, 1560905956973584825, 18446744073709551615, 18446744073709551615, 63, 65, 63, 65, 14, 15, true, "by", "by"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 16381206560518651853, 9040886573940705408, 18446744073709551615, 18446744073709551615, 101, 107, 101, 107, 20, 22, true, "in the", "in the"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 5748881733723902671, 2744428132179375268, 18446744073709551615, 18446744073709551615, 112, 122, 112, 122, 23, 25, true, "through an", "through an"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 14637917359887717745, 2285679170166519016, 18446744073709551615, 18446744073709551615, 179, 187, 179, 187, 33, 35, true, "from the", "from the"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 14638857868319795209, 9986308683752228647, 18446744073709551615, 18446744073709551615, 195, 203, 195, 203, 36, 38, true, "with the", "with the"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 14637917359887717745, 2285679170166508726, 18446744073709551615, 18446744073709551615, 220, 228, 220, 228, 40, 42, true, "from the", "from the"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 329104161828310801, 9514998739209225941, 18446744073709551615, 18446744073709551615, 323, 328, 323, 328, 57, 59, true, "in an", "in an"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 16381206560518651853, 9040886573940702850, 18446744073709551615, 18446744073709551615, 412, 418, 412, 418, 72, 74, true, "in the", "in the"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 6560703375081670815, 13188537813218715189, 18446744073709551615, 18446744073709551615, 433, 442, 433, 442, 77, 79, true, "From this", "From this"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 8106397860038858133, 1895817873747457970, 18446744073709551615, 18446744073709551615, 502, 509, 502, 509, 89, 90, true, "between", "between"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15441160910541485865, 1560905894462741172, 18446744073709551615, 18446744073709551615, 45, 47, 45, 47, 10, 11, true, "to", "to"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 16381206519425733256, 8056542592271067263, 18446744073709551615, 18446744073709551615, 379, 385, 379, 385, 67, 69, true, "to the", "to the"], ["conn", "single-conn", 15254383206256494278, "TEXT", "#/texts/32", 1.0, 15441160910541485865, 1560905894463292709, 18446744073709551615, 18446744073709551615, 487, 489, 487, 489, 87, 88, true, "to", "to"], ["numval", "fval", 17759618186065566858, "TEXT", "#/texts/33", 1.0, 12178341415896435196, 2390434231117813361, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "3.3", "3.3"], ["expression", "word-concatenation", 17759618186065566858, "TEXT", "#/texts/33", 1.0, 2818878630166942113, 14739962831805467920, 18446744073709551615, 18446744073709551615, 4, 16, 4, 16, 1, 2, true, "Ground-truth", "Ground-truth"], ["expression", "word-concatenation", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 3753411203337468488, 13210849437960952407, 18446744073709551615, 18446744073709551615, 30, 42, 30, 42, 6, 7, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 3753411203337468488, 13210849437960910794, 18446744073709551615, 18446744073709551615, 115, 127, 115, 127, 19, 20, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 3753411203337468488, 13210849437960914871, 18446744073709551615, 18446744073709551615, 300, 312, 300, 312, 48, 49, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14635108738803425688, 7365949764326008316, 18446744073709551615, 18446744073709551615, 548, 556, 548, 556, 89, 90, true, "two-fold", "two-fold"], ["sentence", "", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 17583259158513687366, 4844263450613991747, 18446744073709551615, 18446744073709551615, 0, 99, 0, 99, 0, 18, true, "In this component, we collect ground-truth for the custom machine learning models to be trained on.", "In this component, we collect ground-truth for the custom machine learning models to be trained on."], ["sentence", "", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 5326083869552464270, 6000109878302441608, 18446744073709551615, 18446744073709551615, 100, 229, 100, 229, 18, 36, true, "Representative ground-truth data is of paramount importance to obtain machine learned models with excellent recall and precision.", "Representative ground-truth data is of paramount importance to obtain machine learned models with excellent recall and precision."], ["sentence", "", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 10760691172507406288, 16462010365433270400, 18446744073709551615, 18446744073709551615, 230, 393, 230, 393, 36, 63, true, "Unfortunately, it is often very hard to obtain lots of representative ground-truth data, primarily due the the enormous variability across the layout of documents.", "Unfortunately, it is often very hard to obtain lots of representative ground-truth data, primarily due the the enormous variability across the layout of documents."], ["sentence", "", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8873628216442269211, 15169897531659573964, 18446744073709551615, 18446744073709551615, 394, 512, 394, 512, 63, 83, true, "As a consequence, the concept of annotators for documents were incorporated into the platform from the very beginning.", "As a consequence, the concept of annotators for documents were incorporated into the platform from the very beginning."], ["sentence", "", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 7092604321314445148, 6011191642401048334, 18446744073709551615, 18446744073709551615, 513, 557, 513, 557, 83, 91, true, "The purpose of these annotators is two-fold.", "The purpose of these annotators is two-fold."], ["term", "enum-term-mark-2", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 11037453576911667853, 13165947129504054427, 18446744073709551615, 18446744073709551615, 208, 228, 208, 228, 32, 35, true, "recall and precision", "recall and precision"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 808436161032208500, 7814799285540693049, 18446744073709551615, 18446744073709551615, 51, 65, 51, 65, 9, 11, true, "custom machine", "custom machine"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 12419378180983228278, 392973101999502154, 18446744073709551615, 18446744073709551615, 100, 132, 100, 132, 18, 21, true, "Representative ground-truth data", "Representative ground-truth data"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 3376407656379762908, 1362351500590231707, 18446744073709551615, 18446744073709551615, 139, 159, 139, 159, 23, 25, true, "paramount importance", "paramount importance"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 16814682987492505919, 13217553285104349249, 18446744073709551615, 18446744073709551615, 198, 214, 198, 214, 31, 33, true, "excellent recall", "excellent recall"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 11730809760258185856, 5794808849754265728, 18446744073709551615, 18446744073709551615, 285, 317, 285, 317, 47, 50, true, "representative ground-truth data", "representative ground-truth data"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14929125759175486455, 10500241043325238885, 18446744073709551615, 18446744073709551615, 341, 361, 341, 361, 55, 57, true, "enormous variability", "enormous variability"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 4671466949882320018, 14567088525314735297, 18446744073709551615, 18446744073709551615, 497, 511, 497, 511, 80, 82, true, "very beginning", "very beginning"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 5947879501615734370, 3587833538502150180, 18446744073709551615, 18446744073709551615, 8, 17, 8, 17, 2, 3, true, "component", "component"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 16381206567230470443, 11589242443361386039, 18446744073709551615, 18446744073709551615, 75, 81, 75, 81, 12, 13, true, "models", "models"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8106464587473865376, 9393655970867981228, 18446744073709551615, 18446744073709551615, 170, 177, 170, 177, 27, 28, true, "machine", "machine"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 16381206567230470443, 11589242443360492541, 18446744073709551615, 18446744073709551615, 186, 192, 186, 192, 29, 30, true, "models", "models"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 6184954595655792282, 8083671121965931318, 18446744073709551615, 18446744073709551615, 219, 228, 219, 228, 34, 35, true, "precision", "precision"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 389609625633531007, 11139648504492918540, 18446744073709551615, 18446744073709551615, 277, 281, 277, 281, 45, 46, true, "lots", "lots"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 16381206590620761857, 2390360296848922245, 18446744073709551615, 18446744073709551615, 373, 379, 373, 379, 59, 60, true, "layout", "layout"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 6167933651658664291, 6008261537128558327, 18446744073709551615, 18446744073709551615, 383, 392, 383, 392, 61, 62, true, "documents", "documents"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 2343822922798056892, 1513785689321948444, 18446744073709551615, 18446744073709551615, 399, 410, 399, 410, 65, 66, true, "consequence", "consequence"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8106398484423890147, 15438783781988491310, 18446744073709551615, 18446744073709551615, 416, 423, 416, 423, 68, 69, true, "concept", "concept"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15359807916847569012, 8985499154594338495, 18446744073709551615, 18446744073709551615, 427, 437, 427, 437, 70, 71, true, "annotators", "annotators"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 6167933651658664291, 6008261537128554278, 18446744073709551615, 18446744073709551615, 442, 451, 442, 451, 72, 73, true, "documents", "documents"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14814125365076808131, 17209393146457670947, 18446744073709551615, 18446744073709551615, 479, 487, 479, 487, 77, 78, true, "platform", "platform"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8106479265948440982, 10714368050200949681, 18446744073709551615, 18446744073709551615, 517, 524, 517, 524, 84, 85, true, "purpose", "purpose"], ["term", "single-term", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15359807916847569012, 8985499154594181730, 18446744073709551615, 18446744073709551615, 534, 544, 534, 544, 87, 88, true, "annotators", "annotators"], ["verb", "compound-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 7108090617469355457, 1912699042415883107, 18446744073709551615, 18446744073709551615, 85, 95, 85, 95, 14, 16, true, "be trained", "be trained"], ["verb", "compound-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 12385838741613342316, 15032477692672067044, 18446744073709551615, 18446744073709551615, 248, 261, 248, 261, 39, 42, true, "is often very", "is often very"], ["verb", "compound-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 892343280790899680, 7500746613947828378, 18446744073709551615, 18446744073709551615, 452, 469, 452, 469, 73, 75, true, "were incorporated", "were incorporated"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8106398484822949544, 15441707406508006491, 18446744073709551615, 18446744073709551615, 22, 29, 22, 29, 5, 6, true, "collect", "collect"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14639581097006750428, 11409172441407676517, 18446744073709551615, 18446744073709551615, 66, 74, 66, 74, 11, 12, true, "learning", "learning"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541486535, 1572664503296569454, 18446744073709551615, 18446744073709551615, 133, 135, 133, 135, 21, 22, true, "is", "is"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 16381206566454849358, 12105593666577949867, 18446744073709551615, 18446744073709551615, 163, 169, 163, 169, 26, 27, true, "obtain", "obtain"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8106342444693204894, 14875576246519152573, 18446744073709551615, 18446744073709551615, 178, 185, 178, 185, 28, 29, true, "learned", "learned"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 16381206566454849358, 12105593666577949565, 18446744073709551615, 18446744073709551615, 270, 276, 270, 276, 44, 45, true, "obtain", "obtain"], ["verb", "single-verb", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541486535, 1572664503296459901, 18446744073709551615, 18446744073709551615, 545, 547, 545, 547, 88, 89, true, "is", "is"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 17583629327895598301, 13459834771297717800, 18446744073709551615, 18446744073709551615, 30, 46, 30, 46, 6, 8, true, "ground-truth for", "ground-truth for"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 8106396862006371970, 7459930432539401525, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 2, true, "In this", "In this"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485678, 1572664558755839209, 18446744073709551615, 18446744073709551615, 96, 98, 96, 98, 16, 17, true, "on", "on"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485670, 1572664558081613030, 18446744073709551615, 18446744073709551615, 136, 138, 136, 138, 22, 23, true, "of", "of"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 389609625618037948, 12415310863134701485, 18446744073709551615, 18446744073709551615, 193, 197, 193, 197, 30, 31, true, "with", "with"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485670, 1572664558081615219, 18446744073709551615, 18446744073709551615, 282, 284, 282, 284, 46, 47, true, "of", "of"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15359550767359331054, 17736639368994315199, 18446744073709551615, 18446744073709551615, 362, 372, 362, 372, 57, 59, true, "across the", "across the"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485670, 1572664558081629607, 18446744073709551615, 18446744073709551615, 380, 382, 380, 382, 60, 61, true, "of", "of"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 389609625539850184, 12416294209159103980, 18446744073709551615, 18446744073709551615, 394, 398, 394, 398, 63, 65, true, "As a", "As a"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485670, 1572664558081628846, 18446744073709551615, 18446744073709551615, 424, 426, 424, 426, 69, 70, true, "of", "of"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 12178341415895625940, 15392261357260739020, 18446744073709551615, 18446744073709551615, 438, 441, 438, 441, 71, 72, true, "for", "for"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14637953883063114384, 17588754078703253842, 18446744073709551615, 18446744073709551615, 470, 478, 470, 478, 75, 77, true, "into the", "into the"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14637917359887717745, 12604191960591849593, 18446744073709551615, 18446744073709551615, 488, 496, 488, 496, 78, 80, true, "from the", "from the"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 14814148868025447689, 1566470717057299206, 18446744073709551615, 18446744073709551615, 525, 533, 525, 533, 85, 87, true, "of these", "of these"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485865, 1572664555486701809, 18446744073709551615, 18446744073709551615, 82, 84, 82, 84, 13, 14, true, "to", "to"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485865, 1572664555486707009, 18446744073709551615, 18446744073709551615, 160, 162, 160, 162, 25, 26, true, "to", "to"], ["conn", "single-conn", 11638821473906997927, "TEXT", "#/texts/34", 1.0, 15441160910541485865, 1572664555486699317, 18446744073709551615, 18446744073709551615, 267, 269, 267, 269, 43, 44, true, "to", "to"], ["numval", "ival", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 17767354399704235162, 14146447032891005863, 18446744073709551615, 18446744073709551615, 255, 256, 255, 256, 44, 45, true, "2", "2"], ["numval", "ival", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 17767354399704235163, 14146447035751187062, 18446744073709551615, 18446744073709551615, 673, 674, 673, 674, 130, 131, true, "3", "3"], ["numval", "ival", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541481851, 14395253567444244807, 18446744073709551615, 18446744073709551615, 874, 876, 874, 876, 168, 169, true, "30", "30"], ["parenthesis", "round brackets", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 7105842701545078035, 8408400604204305824, 18446744073709551615, 18446744073709551615, 243, 257, 243, 257, 41, 46, true, "(see Figure 2)", "(see Figure 2)"], ["parenthesis", "round brackets", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 8106340679880086785, 13947230042842640243, 18446744073709551615, 18446744073709551615, 275, 282, 275, 282, 51, 54, true, "(human)", "(human)"], ["expression", "word-concatenation", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 3753411203337468488, 16963906746658391185, 18446744073709551615, 18446744073709551615, 70, 82, 70, 82, 13, 14, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 3481880259565470086, 2454306430075603922, 18446744073709551615, 18446744073709551615, 100, 114, 100, 114, 18, 19, true, "crowd-sourcing", "crowd-sourcing"], ["expression", "word-concatenation", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15274658437030291237, 927761389620237631, 18446744073709551615, 18446744073709551615, 632, 646, 632, 646, 122, 123, true, "colouring-task", "colouring-task"], ["expression", "wtoken-concatenation", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14652257787682118593, 2235344648887433985, 18446744073709551615, 18446744073709551615, 434, 444, 434, 444, 86, 87, true, "etc^{10}", "etc$^{10}$"], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 9726701973674527424, 5629316039426793264, 18446744073709551615, 18446744073709551615, 0, 124, 0, 124, 0, 21, true, "First and foremost, the annotators on the platform allow us to gather ground-truth at scale using a crowd-sourcing approach.", "First and foremost, the annotators on the platform allow us to gather ground-truth at scale using a crowd-sourcing approach."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15691131460025387221, 12159716655828567313, 18446744073709551615, 18446744073709551615, 125, 258, 125, 258, 21, 47, true, "In each annotation task, we retrieve the original PDF page and its associated parsed components, containing the cells (see Figure 2).", "In each annotation task, we retrieve the original PDF page and its associated parsed components, containing the cells (see Figure 2)."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 2641958415274525762, 9846787274480004683, 18446744073709551615, 18446744073709551615, 259, 337, 259, 337, 47, 64, true, "We then ask the (human) annotator to assign each cell a layout semantic label.", "We then ask the (human) annotator to assign each cell a layout semantic label."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 1037767454801566655, 17259389722144888034, 18446744073709551615, 18446744073709551615, 338, 445, 338, 445, 64, 88, true, "Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$.", "Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 10111701075959805159, 5133545533971093986, 18446744073709551615, 18446744073709551615, 446, 532, 446, 532, 88, 104, true, "In the annotator tool, each layout semantic label is visually represented by a colour.", "In the annotator tool, each layout semantic label is visually represented by a colour."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 4706201309352513459, 17185467944624883260, 18446744073709551615, 18446744073709551615, 533, 675, 533, 675, 104, 132, true, "By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3.", "By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 2594226177845736721, 3469738757049287491, 18446744073709551615, 18446744073709551615, 676, 766, 676, 766, 132, 149, true, "Since humans are very efficient in visual recognition, this task comes very natural to us.", "Since humans are very efficient in visual recognition, this task comes very natural to us."], ["sentence", "", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 18263945833780251048, 18046267097228773723, 18446744073709551615, 18446744073709551615, 767, 919, 767, 919, 149, 175, true, "The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns.", "The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns."], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 5981390564575261606, 5462827921468056185, 18446744073709551615, 18446744073709551615, 100, 123, 100, 123, 18, 20, true, "crowd-sourcing approach", "crowd-sourcing approach"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 4147688156856812386, 14660217840011410416, 18446744073709551615, 18446744073709551615, 133, 148, 133, 148, 23, 25, true, "annotation task", "annotation task"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 11734732391183296006, 1599617040730216806, 18446744073709551615, 18446744073709551615, 166, 183, 166, 183, 29, 32, true, "original PDF page", "original PDF page"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 5845623659139499376, 18290324680406483857, 18446744073709551615, 18446744073709551615, 203, 220, 203, 220, 35, 37, true, "parsed components", "parsed components"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 2317020437411802284, 2857760401781024162, 18446744073709551615, 18446744073709551615, 315, 336, 315, 336, 60, 63, true, "layout semantic label", "layout semantic label"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 17144395416522725511, 17753778739971755617, 18446744073709551615, 18446744073709551615, 350, 365, 350, 365, 66, 68, true, "semantic labels", "semantic labels"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 6408516478084086022, 17282278894039682530, 18446744073709551615, 18446744073709551615, 453, 467, 453, 467, 90, 92, true, "annotator tool", "annotator tool"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 2317020437411802284, 2857760401780966284, 18446744073709551615, 18446744073709551615, 474, 495, 474, 495, 94, 97, true, "layout semantic label", "layout semantic label"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 11173100292227021015, 10629231856798201869, 18446744073709551615, 18446744073709551615, 563, 577, 563, 577, 110, 112, true, "semantic label", "semantic label"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 3203380946006439274, 14675068709317928424, 18446744073709551615, 18446744073709551615, 591, 610, 591, 610, 116, 118, true, "semantic annotation", "semantic annotation"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 5999195606993327398, 6806903084840068180, 18446744073709551615, 18446744073709551615, 711, 729, 711, 729, 138, 140, true, "visual recognition", "visual recognition"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16216055950284707729, 1614911910001461455, 18446744073709551615, 18446744073709551615, 771, 784, 771, 784, 150, 152, true, "required time", "required time"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 1353284443403550494, 16957154834908790928, 18446744073709551615, 18446744073709551615, 805, 816, 805, 816, 156, 158, true, "single page", "single page"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14195411118943606613, 11725530869998519128, 18446744073709551615, 18446744073709551615, 835, 849, 835, 849, 161, 163, true, "parsing output", "parsing output"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 9136485266740328691, 5311838821730551171, 18446744073709551615, 18446744073709551615, 890, 918, 890, 918, 171, 174, true, "various annotation campaigns", "various annotation campaigns"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15359807916847569012, 6832664458179175310, 18446744073709551615, 18446744073709551615, 24, 34, 24, 34, 5, 6, true, "annotators", "annotators"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14814125365076808131, 13227537830384402784, 18446744073709551615, 18446744073709551615, 42, 50, 42, 50, 8, 9, true, "platform", "platform"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 3753411203337468488, 16963906746658391185, 18446744073709551615, 18446744073709551615, 70, 82, 70, 82, 13, 14, true, "ground-truth", "ground-truth"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104161785194305, 16177406997540344708, 18446744073709551615, 18446744073709551615, 86, 91, 86, 91, 15, 16, true, "scale", "scale"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104161531686411, 16177740231384398405, 18446744073709551615, 18446744073709551615, 237, 242, 237, 242, 40, 41, true, "cells", "cells"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206514091025767, 138645421409664679, 18446744073709551615, 18446744073709551615, 248, 254, 248, 254, 43, 44, true, "Figure", "Figure"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 5946726816546568920, 5690589095203344212, 18446744073709551615, 18446744073709551615, 283, 292, 283, 292, 54, 55, true, "annotator", "annotator"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625696024605, 2623686961522394102, 18446744073709551615, 18446744073709551615, 308, 312, 308, 312, 58, 59, true, "cell", "cell"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14650277098690689540, 14175748967298263712, 18446744073709551615, 18446744073709551615, 338, 346, 338, 346, 64, 65, true, "Examples", "Examples"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104161841334670, 4540659438624295673, 18446744073709551615, 18446744073709551615, 371, 376, 371, 376, 70, 71, true, "Title", "Title"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14650447666970618949, 5549516867519665861, 18446744073709551615, 18446744073709551615, 378, 386, 378, 386, 72, 73, true, "Abstract", "Abstract"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 8106479192428836136, 9415558511939706336, 18446744073709551615, 18446744073709551615, 388, 395, 388, 395, 74, 75, true, "Authors", "Authors"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14652314692921799233, 13219745338982113946, 18446744073709551615, 18446744073709551615, 397, 405, 397, 405, 76, 77, true, "Subtitle", "Subtitle"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625541629035, 2600754505563366440, 18446744073709551615, 18446744073709551615, 407, 411, 407, 411, 78, 79, true, "Text", "Text"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104161846359995, 4541367182255891997, 18446744073709551615, 18446744073709551615, 413, 418, 413, 418, 80, 81, true, "Table", "Table"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206514091025767, 138645421409679550, 18446744073709551615, 18446744073709551615, 420, 426, 420, 426, 82, 83, true, "Figure", "Figure"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625527096807, 2600425877064879199, 18446744073709551615, 18446744073709551615, 428, 432, 428, 432, 84, 85, true, "List", "List"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14652257787682118593, 2235344648887433985, 18446744073709551615, 18446744073709551615, 434, 444, 434, 444, 86, 87, true, "etc^{10}", "etc$^{10}$"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206562405951200, 14915916748921937907, 18446744073709551615, 18446744073709551615, 525, 531, 525, 531, 102, 103, true, "colour", "colour"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206562405951200, 14915916748921935127, 18446744073709551615, 18446744073709551615, 548, 554, 548, 554, 107, 108, true, "colour", "colour"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625631210899, 2869741906537984803, 18446744073709551615, 18446744073709551615, 583, 587, 583, 587, 114, 115, true, "task", "task"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15274658437030291237, 927761389620237631, 18446744073709551615, 18446744073709551615, 632, 646, 632, 646, 122, 123, true, "colouring-task", "colouring-task"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206514091025767, 138645421409695848, 18446744073709551615, 18446744073709551615, 666, 672, 666, 672, 129, 130, true, "Figure", "Figure"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206562125478786, 14844805221869100354, 18446744073709551615, 18446744073709551615, 682, 688, 682, 688, 133, 134, true, "humans", "humans"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625631210899, 2869741906537807335, 18446744073709551615, 18446744073709551615, 736, 740, 736, 740, 142, 143, true, "task", "task"], ["term", "single-term", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 8106478708554912027, 15651220002486280438, 18446744073709551615, 18446744073709551615, 877, 884, 877, 884, 169, 170, true, "seconds", "seconds"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 17379018868500585066, 7887869988328938557, 18446744073709551615, 18446744073709551615, 496, 519, 496, 519, 97, 100, true, "is visually represented", "is visually represented"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 12222886291842911511, 14164925570223305777, 18446744073709551615, 18446744073709551615, 611, 624, 611, 624, 118, 120, true, "is translated", "is translated"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14892762836247367071, 8594133885493293021, 18446744073709551615, 18446744073709551615, 651, 662, 651, 662, 125, 128, true, "can be seen", "can be seen"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14650447943360509516, 9369867654851497889, 18446744073709551615, 18446744073709551615, 689, 697, 689, 697, 134, 136, true, "are very", "are very"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 2702989145586421307, 16288047702767501090, 18446744073709551615, 18446744073709551615, 741, 751, 741, 751, 143, 145, true, "comes very", "comes very"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 12716975807947326132, 18394114003906370195, 18446744073709551615, 18446744073709551615, 785, 802, 785, 802, 152, 155, true, "spent to annotate", "spent to annotate"], ["verb", "compound-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 7762624089155089838, 8059148629484155796, 18446744073709551615, 18446744073709551615, 850, 870, 850, 870, 163, 167, true, "has shown to average", "has shown to average"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104159171192019, 4339284832956404466, 18446744073709551615, 18446744073709551615, 51, 56, 51, 56, 9, 10, true, "allow", "allow"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206562264646932, 14865734342971057341, 18446744073709551615, 18446744073709551615, 63, 69, 63, 69, 12, 13, true, "gather", "gather"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104159157798023, 4075838060624060091, 18446744073709551615, 18446744073709551615, 92, 97, 92, 97, 16, 17, true, "using", "using"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14634109585341561832, 11176599631065673692, 18446744073709551615, 18446744073709551615, 153, 161, 153, 161, 27, 28, true, "retrieve", "retrieve"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 5615021626537608757, 17448469512561363698, 18446744073709551615, 18446744073709551615, 192, 202, 192, 202, 34, 35, true, "associated", "associated"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 2703017932388060560, 7117070694088785565, 18446744073709551615, 18446744073709551615, 222, 232, 222, 232, 38, 39, true, "containing", "containing"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 12178341415895638617, 2314208399419917922, 18446744073709551615, 18446744073709551615, 244, 247, 244, 247, 42, 43, true, "see", "see"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 12178341415895564320, 2314153029143332235, 18446744073709551615, 18446744073709551615, 267, 270, 267, 270, 49, 50, true, "ask", "ask"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206568833706309, 76439633065696099, 18446744073709551615, 18446744073709551615, 296, 302, 296, 302, 56, 57, true, "assign", "assign"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 12178341415895564896, 2314153075758350109, 18446744073709551615, 18446744073709551615, 366, 369, 366, 369, 68, 69, true, "are", "are"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 5950066704821957614, 2855124977157713060, 18446744073709551615, 18446744073709551615, 536, 545, 536, 545, 105, 106, true, "assigning", "assigning"], ["verb", "single-verb", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14635107449930294178, 18245738378700485826, 18446744073709551615, 18446744073709551615, 817, 825, 817, 825, 158, 159, true, "starting", "starting"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 13874442862817243077, 5577683104197750470, 18446744073709551615, 18446744073709551615, 698, 710, 698, 710, 136, 138, true, "efficient in", "efficient in"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206566339127348, 14821225559023260074, 18446744073709551615, 18446744073709551615, 35, 41, 35, 41, 6, 8, true, "on the", "on the"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541487054, 14395248044803713066, 18446744073709551615, 18446744073709551615, 83, 85, 83, 85, 14, 15, true, "at", "at"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 8106396862068141297, 11468319027671071218, 18446744073709551615, 18446744073709551615, 125, 132, 125, 132, 21, 23, true, "In each", "In each"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485670, 14395248211619962859, 18446744073709551615, 18446744073709551615, 347, 349, 347, 349, 65, 66, true, "of", "of"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16380809977974811061, 16784953016435174527, 18446744073709551615, 18446744073709551615, 446, 452, 446, 452, 88, 90, true, "In the", "In the"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625686288966, 2869870603595820956, 18446744073709551615, 18446744073709551615, 520, 524, 520, 524, 100, 102, true, "by a", "by a"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541480853, 14395253412858146951, 18446744073709551615, 18446744073709551615, 533, 535, 533, 535, 104, 105, true, "By", "By"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485670, 14395248211619800045, 18446744073709551615, 18446744073709551615, 588, 590, 588, 590, 115, 116, true, "of", "of"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 16381206560517276114, 14905918454097336053, 18446744073709551615, 18446744073709551615, 625, 631, 625, 631, 120, 122, true, "into a", "into a"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541487053, 14395248044754679793, 18446744073709551615, 18446744073709551615, 648, 650, 648, 650, 124, 125, true, "as", "as"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541486538, 14395247967935705607, 18446744073709551615, 18446744073709551615, 663, 665, 663, 665, 128, 129, true, "in", "in"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 329104162323265917, 4542723028457504319, 18446744073709551615, 18446744073709551615, 676, 681, 676, 681, 132, 133, true, "Since", "Since"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 14637917359887717745, 9802156572988376526, 18446744073709551615, 18446744073709551615, 826, 834, 826, 834, 159, 161, true, "from the", "from the"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541487054, 14395248044803775593, 18446744073709551615, 18446744073709551615, 871, 873, 871, 873, 167, 168, true, "at", "at"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 389609625618865305, 2626455388543067312, 18446744073709551615, 18446744073709551615, 885, 889, 885, 889, 170, 171, true, "over", "over"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485865, 14395248221468713743, 18446744073709551615, 18446744073709551615, 60, 62, 60, 62, 11, 12, true, "to", "to"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485865, 14395248221468711840, 18446744073709551615, 18446744073709551615, 293, 295, 293, 295, 55, 56, true, "to", "to"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 8106351192289801590, 15640680202175036929, 18446744073709551615, 18446744073709551615, 555, 562, 555, 562, 108, 110, true, "to each", "to each"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485865, 14395248221468734066, 18446744073709551615, 18446744073709551615, 760, 762, 760, 762, 146, 147, true, "to", "to"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485865, 14395248221468732296, 18446744073709551615, 18446744073709551615, 791, 793, 791, 793, 153, 154, true, "to", "to"], ["conn", "single-conn", 13020065077657899116, "TEXT", "#/texts/35", 1.0, 15441160910541485865, 14395248221468253143, 18446744073709551615, 18446744073709551615, 860, 862, 860, 862, 165, 166, true, "to", "to"], ["expression", "common", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541486545, 5648627953971592423, 18446744073709551615, 18446744073709551615, 170, 174, 170, 174, 32, 33, true, "ie", "i.e."], ["expression", "common", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541486545, 5648627953971505641, 18446744073709551615, 18446744073709551615, 565, 569, 565, 569, 105, 106, true, "ie", "i.e."], ["expression", "word-concatenation", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 3753411203337468488, 2887415461401166935, 18446744073709551615, 18446744073709551615, 988, 1000, 988, 1000, 178, 179, true, "ground-truth", "ground-truth"], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8811629219803841493, 764401587528891604, 18446744073709551615, 18446744073709551615, 0, 102, 0, 102, 0, 18, true, "The second purpose of the annotators is to visually inspect the quality of our machine learned models.", "The second purpose of the annotators is to visually inspect the quality of our machine learned models."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 17074610120338710338, 17138558258330657336, 18446744073709551615, 18446744073709551615, 103, 222, 103, 222, 18, 43, true, "The goal of the models is to emulate the action of the annotators, i.e. to assign a layout semantic label to each cell.", "The goal of the models is to emulate the action of the annotators, i.e. to assign a layout semantic label to each cell."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 13952947067572963915, 14804888992560412829, 18446744073709551615, 18446744073709551615, 223, 332, 223, 332, 43, 65, true, "Clearly, the result of a prediction for each page can therefore be displayed as if it were an annotated page.", "Clearly, the result of a prediction for each page can therefore be displayed as if it were an annotated page."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 3886548767560778611, 15171206665642280195, 18446744073709551615, 18446744073709551615, 333, 417, 333, 417, 65, 81, true, "This allows the users to directly inspect the results of the models on unseen pages.", "This allows the users to directly inspect the results of the models on unseen pages."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 7265618967841439802, 9171259371202189029, 18446744073709551615, 18446744073709551615, 418, 645, 418, 645, 81, 118, true, "A direct consequence of this inspection capability in the annotators is that the annotation task can be transformed easily into a correction task, i.e. the human annotators only need to correct the incorrectly predicted labels.", "A direct consequence of this inspection capability in the annotators is that the annotation task can be transformed easily into a correction task, i.e. the human annotators only need to correct the incorrectly predicted labels."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 13719400799583725468, 8197048490410940553, 18446744073709551615, 18446744073709551615, 646, 761, 646, 761, 118, 142, true, "Of course, as the models become better over time, the number of corrections needed to be made become less and less.", "Of course, as the models become better over time, the number of corrections needed to be made become less and less."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 11311841238554173343, 1992585178053613626, 18446744073709551615, 18446744073709551615, 762, 834, 762, 834, 142, 154, true, "This allows us to significantly reduce the annotation time per document.", "This allows us to significantly reduce the annotation time per document."], ["sentence", "", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 11029085865310589014, 140652675393977829, 18446744073709551615, 18446744073709551615, 835, 1011, 835, 1011, 154, 181, true, "Since annotations are typically created by professionals with a high hourly rate, the colouring technique allowed us to significantly reduce the cost of ground-truth gathering.", "Since annotations are typically created by professionals with a high hourly rate, the colouring technique allowed us to significantly reduce the cost of ground-truth gathering."], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1265616008757051900, 16887628514756530891, 18446744073709551615, 18446744073709551615, 4, 18, 4, 18, 1, 3, true, "second purpose", "second purpose"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 2317020437411802284, 18255726684800034363, 18446744073709551615, 18446744073709551615, 187, 208, 187, 208, 36, 39, true, "layout semantic label", "layout semantic label"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 6406713431887338480, 7916422400711570126, 18446744073709551615, 18446744073709551615, 317, 331, 317, 331, 62, 64, true, "annotated page", "annotated page"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 2348752337749164990, 14543923397071441531, 18446744073709551615, 18446744073709551615, 404, 416, 404, 416, 78, 80, true, "unseen pages", "unseen pages"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15605685416779062553, 8687993225293924380, 18446744073709551615, 18446744073709551615, 420, 438, 420, 438, 82, 84, true, "direct consequence", "direct consequence"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 5762122585570374444, 14528211899176051482, 18446744073709551615, 18446744073709551615, 447, 468, 447, 468, 86, 88, true, "inspection capability", "inspection capability"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 4147688156856812386, 6490015423550437879, 18446744073709551615, 18446744073709551615, 499, 514, 499, 514, 94, 96, true, "annotation task", "annotation task"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1447735027206064326, 4580102222737649898, 18446744073709551615, 18446744073709551615, 548, 563, 548, 563, 102, 104, true, "correction task", "correction task"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1385949438436713657, 10394950804393440811, 18446744073709551615, 18446744073709551615, 574, 590, 574, 590, 107, 109, true, "human annotators", "human annotators"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 4147688156856784008, 6490015274061652606, 18446744073709551615, 18446744073709551615, 805, 820, 805, 820, 149, 151, true, "annotation time", "annotation time"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 12538595938735813332, 3845550124263106024, 18446744073709551615, 18446744073709551615, 899, 915, 899, 915, 163, 166, true, "high hourly rate", "high hourly rate"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1591019414094504294, 10764400280857424150, 18446744073709551615, 18446744073709551615, 988, 1010, 988, 1010, 178, 180, true, "ground-truth gathering", "ground-truth gathering"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15359807916847569012, 2773765720706928008, 18446744073709551615, 18446744073709551615, 26, 36, 26, 36, 5, 6, true, "annotators", "annotators"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106477781724488761, 877124515430637025, 18446744073709551615, 18446744073709551615, 64, 71, 64, 71, 11, 12, true, "quality", "quality"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106464587473865376, 17528532861678646004, 18446744073709551615, 18446744073709551615, 79, 86, 79, 86, 14, 15, true, "machine", "machine"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206567230470443, 7370067777486058567, 18446744073709551615, 18446744073709551615, 95, 101, 95, 101, 16, 17, true, "models", "models"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625699055241, 14717516376643823052, 18446744073709551615, 18446744073709551615, 107, 111, 107, 111, 19, 20, true, "goal", "goal"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206567230470443, 7370067777486035497, 18446744073709551615, 18446744073709551615, 119, 125, 119, 125, 22, 23, true, "models", "models"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206569104268492, 6068740663996960621, 18446744073709551615, 18446744073709551615, 144, 150, 144, 150, 27, 28, true, "action", "action"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15359807916847569012, 2773765720706953011, 18446744073709551615, 18446744073709551615, 158, 168, 158, 168, 30, 31, true, "annotators", "annotators"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625696024605, 14717680184571495356, 18446744073709551615, 18446744073709551615, 217, 221, 217, 221, 41, 42, true, "cell", "cell"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206521509536706, 8483052701360678843, 18446744073709551615, 18446744073709551615, 236, 242, 236, 242, 46, 47, true, "result", "result"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 14103651237077221583, 4734852607792192719, 18446744073709551615, 18446744073709551615, 248, 258, 248, 258, 49, 50, true, "prediction", "prediction"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625632301461, 14734624151323869703, 18446744073709551615, 18446744073709551615, 268, 272, 268, 272, 52, 53, true, "page", "page"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 329104159157820437, 5473671238311420391, 18446744073709551615, 18446744073709551615, 349, 354, 349, 354, 68, 69, true, "users", "users"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106478445190161533, 12197236091761950611, 18446744073709551615, 18446744073709551615, 379, 386, 379, 386, 73, 74, true, "results", "results"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206567230470443, 7370067777485849666, 18446744073709551615, 18446744073709551615, 394, 400, 394, 400, 76, 77, true, "models", "models"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15359807916847569012, 2773765720706781412, 18446744073709551615, 18446744073709551615, 476, 486, 476, 486, 90, 91, true, "annotators", "annotators"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206590740615814, 4654449512501997811, 18446744073709551615, 18446744073709551615, 638, 644, 638, 644, 116, 117, true, "labels", "labels"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206562412792821, 7448141254981419885, 18446744073709551615, 18446744073709551615, 649, 655, 649, 655, 119, 120, true, "course", "course"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206567230470443, 7370067777485806355, 18446744073709551615, 18446744073709551615, 664, 670, 664, 670, 123, 124, true, "models", "models"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625631241985, 14734405111978592850, 18446744073709551615, 18446744073709551615, 690, 694, 690, 694, 127, 128, true, "time", "time"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206574973295053, 5687305569601072199, 18446744073709551615, 18446744073709551615, 700, 706, 700, 706, 130, 131, true, "number", "number"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 2993400436190652919, 10582887119126517774, 18446744073709551615, 18446744073709551615, 710, 721, 710, 721, 132, 133, true, "corrections", "corrections"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 14650401089286948001, 1771869589109122009, 18446744073709551615, 18446744073709551615, 825, 833, 825, 833, 152, 153, true, "document", "document"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1037258523789473353, 10472772837010545151, 18446744073709551615, 18446744073709551615, 841, 852, 841, 852, 155, 156, true, "annotations", "annotations"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 9754205718486487036, 4571850225111332849, 18446744073709551615, 18446744073709551615, 878, 891, 878, 891, 160, 161, true, "professionals", "professionals"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 3503953938541970428, 5150439481258416324, 18446744073709551615, 18446744073709551615, 931, 940, 931, 940, 169, 170, true, "technique", "technique"], ["term", "single-term", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625695918775, 14717671066676122635, 18446744073709551615, 18446744073709551615, 980, 984, 980, 984, 176, 177, true, "cost", "cost"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1733018218539366667, 11085627304462353310, 18446744073709551615, 18446744073709551615, 37, 59, 37, 59, 6, 10, true, "is to visually inspect", "is to visually inspect"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 13692217046483889459, 9755057425888296816, 18446744073709551615, 18446744073709551615, 126, 139, 126, 139, 23, 26, true, "is to emulate", "is to emulate"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8336379008036663285, 2252560075242747349, 18446744073709551615, 18446744073709551615, 170, 184, 170, 184, 32, 35, true, "ie to assign", "i.e. to assign"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 14444118778847914736, 5678933160321683165, 18446744073709551615, 18446744073709551615, 287, 299, 287, 299, 55, 57, true, "be displayed", "be displayed"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 13727039347256328143, 14963124457720431052, 18446744073709551615, 18446744073709551615, 515, 540, 515, 540, 96, 100, true, "can be transformed easily", "can be transformed easily"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 10428062617433318222, 17753611624855826478, 18446744073709551615, 18446744073709551615, 596, 611, 596, 611, 110, 113, true, "need to correct", "need to correct"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 17328653366627729324, 9156057289599737447, 18446744073709551615, 18446744073709551615, 722, 746, 722, 746, 133, 138, true, "needed to be made become", "needed to be made become"], ["verb", "compound-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 1919368618428862817, 2645599353882990224, 18446744073709551615, 18446744073709551615, 853, 874, 853, 874, 156, 159, true, "are typically created", "are typically created"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106342444693204894, 15282897598607271867, 18446744073709551615, 18446744073709551615, 87, 94, 87, 94, 15, 16, true, "learned", "learned"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625633616262, 14734510420580904856, 18446744073709551615, 18446744073709551615, 309, 313, 309, 313, 60, 61, true, "were", "were"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206569317834029, 6192068890556919984, 18446744073709551615, 18446744073709551615, 338, 344, 338, 344, 66, 67, true, "allows", "allows"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106398347643660299, 15243737699270700259, 18446744073709551615, 18446744073709551615, 367, 374, 367, 374, 71, 72, true, "inspect", "inspect"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541486535, 5648627928025204004, 18446744073709551615, 18446744073709551615, 487, 489, 487, 489, 91, 92, true, "is", "is"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541486545, 5648627953971505641, 18446744073709551615, 18446744073709551615, 565, 569, 565, 569, 105, 106, true, "ie", "i.e."], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 6184954633443293966, 6201163133929267929, 18446744073709551615, 18446744073709551615, 628, 637, 628, 637, 115, 116, true, "predicted", "predicted"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206574366219255, 6162855425004985556, 18446744073709551615, 18446744073709551615, 671, 677, 671, 677, 124, 125, true, "become", "become"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206569317834029, 6192068890556892683, 18446744073709551615, 18446744073709551615, 767, 773, 767, 773, 143, 144, true, "allows", "allows"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206521531524134, 8487629705353323144, 18446744073709551615, 18446744073709551615, 794, 800, 794, 800, 147, 148, true, "reduce", "reduce"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 5947874710666698111, 7433459475979555695, 18446744073709551615, 18446744073709551615, 921, 930, 921, 930, 168, 169, true, "colouring", "colouring"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106397564189393266, 10189283544961513918, 18446744073709551615, 18446744073709551615, 941, 948, 941, 948, 170, 171, true, "allowed", "allowed"], ["verb", "single-verb", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206521531524134, 8487629705353346362, 18446744073709551615, 18446744073709551615, 969, 975, 969, 975, 174, 175, true, "reduce", "reduce"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206565712212855, 5007444638099411779, 18446744073709551615, 18446744073709551615, 19, 25, 19, 25, 3, 5, true, "of the", "of the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485670, 5648627965386078792, 18446744073709551615, 18446744073709551615, 72, 74, 72, 74, 12, 13, true, "of", "of"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206565712212855, 5007444638099311142, 18446744073709551615, 18446744073709551615, 112, 118, 112, 118, 20, 22, true, "of the", "of the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206565712212855, 5007444638099321250, 18446744073709551615, 18446744073709551615, 151, 157, 151, 157, 28, 30, true, "of the", "of the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625620237736, 14734126707883533061, 18446744073709551615, 18446744073709551615, 243, 247, 243, 247, 47, 49, true, "of a", "of a"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 14637917333167503367, 798190401077603715, 18446744073709551615, 18446744073709551615, 259, 267, 259, 267, 50, 52, true, "for each", "for each"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541487053, 5648627952979693361, 18446744073709551615, 18446744073709551615, 300, 302, 300, 302, 57, 58, true, "as", "as"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541486546, 5648627954249524212, 18446744073709551615, 18446744073709551615, 303, 305, 303, 305, 58, 59, true, "if", "if"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206565712212855, 5007444638099430551, 18446744073709551615, 18446744073709551615, 387, 393, 387, 393, 74, 76, true, "of the", "of the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485678, 5648627990571398259, 18446744073709551615, 18446744073709551615, 401, 403, 401, 403, 77, 78, true, "on", "on"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106342927224204628, 9808316297265099098, 18446744073709551615, 18446744073709551615, 439, 446, 439, 446, 84, 86, true, "of this", "of this"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206560518651853, 4762915631712600090, 18446744073709551615, 18446744073709551615, 469, 475, 469, 475, 88, 90, true, "in the", "in the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 14634130761162415388, 81576451573710762, 18446744073709551615, 18446744073709551615, 490, 498, 490, 498, 92, 94, true, "that the", "that the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206560517276114, 4764682161644402103, 18446744073709551615, 18446744073709551615, 541, 547, 541, 547, 100, 102, true, "into a", "into a"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541487694, 5648628413298466486, 18446744073709551615, 18446744073709551615, 646, 648, 646, 648, 118, 119, true, "Of", "Of"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206568455155979, 7352846837303360973, 18446744073709551615, 18446744073709551615, 657, 663, 657, 663, 121, 123, true, "as the", "as the"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 389609625618865305, 14762415042042723791, 18446744073709551615, 18446744073709551615, 685, 689, 685, 689, 126, 127, true, "over", "over"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485670, 5648627965385789683, 18446744073709551615, 18446744073709551615, 707, 709, 707, 709, 131, 132, true, "of", "of"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 12178341415895635383, 522863227394400205, 18446744073709551615, 18446744073709551615, 821, 824, 821, 824, 151, 152, true, "per", "per"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 329104162323265917, 703542606745908864, 18446744073709551615, 18446744073709551615, 835, 840, 835, 840, 154, 155, true, "Since", "Since"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541486989, 5648627977530603539, 18446744073709551615, 18446744073709551615, 875, 877, 875, 877, 159, 160, true, "by", "by"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 16381206557726458966, 4826134415899722071, 18446744073709551615, 18446744073709551615, 892, 898, 892, 898, 161, 163, true, "with a", "with a"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485670, 5648627965385872694, 18446744073709551615, 18446744073709551615, 985, 987, 985, 987, 177, 178, true, "of", "of"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167410249, 18446744073709551615, 18446744073709551615, 40, 42, 40, 42, 7, 8, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167418813, 18446744073709551615, 18446744073709551615, 129, 131, 129, 131, 24, 25, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167419001, 18446744073709551615, 18446744073709551615, 175, 177, 175, 177, 33, 34, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 8106351192289801590, 3283401131731009010, 18446744073709551615, 18446744073709551615, 209, 216, 209, 216, 39, 41, true, "to each", "to each"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167626806, 18446744073709551615, 18446744073709551615, 355, 357, 355, 357, 69, 70, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167658087, 18446744073709551615, 18446744073709551615, 601, 603, 601, 603, 111, 112, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167600455, 18446744073709551615, 18446744073709551615, 729, 731, 729, 731, 134, 135, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167591235, 18446744073709551615, 18446744073709551615, 777, 779, 777, 779, 145, 146, true, "to", "to"], ["conn", "single-conn", 10103841011442966464, "TEXT", "#/texts/36", 1.0, 15441160910541485865, 5648627990167603037, 18446744073709551615, 18446744073709551615, 952, 954, 952, 954, 172, 173, true, "to", "to"], ["numval", "ival", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 17767354399704235163, 8981450943146377597, 18446744073709551615, 18446744073709551615, 10, 11, 10, 11, 2, 3, true, "3", "3"], ["expression", "word-concatenation", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 4147688168886302397, 4316020704427245043, 18446744073709551615, 18446744073709551615, 25, 40, 25, 40, 7, 8, true, "annotation-rate", "annotation-rate"], ["expression", "word-concatenation", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 10203919108291344398, 6932446687114432211, 18446744073709551615, 18446744073709551615, 44, 68, 44, 68, 9, 10, true, "number-of-annotatedpages", "number-of-annotatedpages"], ["sentence", "", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 1209815263017833313, 7894248700334870994, 18446744073709551615, 18446744073709551615, 0, 80, 0, 80, 0, 13, true, "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute.", "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute."], ["term", "single-term", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 3197976581661651446, 17591908013383021851, 18446744073709551615, 18446744073709551615, 85, 103, 85, 103, 14, 17, true, "vertical red lines", "vertical red lines"], ["term", "single-term", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 4147505635383066832, 7737381165149079765, 18446744073709551615, 18446744073709551615, 150, 165, 150, 165, 25, 27, true, "annotated pages", "annotated pages"], ["term", "single-term", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 12895837410855552806, 17306921502480655741, 18446744073709551615, 18446744073709551615, 178, 192, 178, 192, 32, 34, true, "improved model", "improved model"], ["term", "single-term", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 16381206514091025767, 6269509740307391107, 18446744073709551615, 18446744073709551615, 3, 9, 3, 9, 1, 2, true, "Figure", "Figure"], ["term", "single-term", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 4147688168886302397, 4316020704427245043, 18446744073709551615, 18446744073709551615, 25, 40, 25, 40, 7, 8, true, "annotation-rate", "annotation-rate"], ["term", "single-term", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 10203919108291344398, 6932446687114432211, 18446744073709551615, 18446744073709551615, 44, 68, 44, 68, 9, 10, true, "number-of-annotatedpages", "number-of-annotatedpages"], ["term", "single-term", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 16381206594557227155, 607213422089229276, 18446744073709551615, 18446744073709551615, 73, 79, 73, 79, 11, 12, true, "minute", "minute"], ["term", "single-term", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 14634153919632515335, 1857646059609777193, 18446744073709551615, 18446744073709551615, 120, 128, 120, 128, 20, 21, true, "training", "training"], ["verb", "compound-verb", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 13034167073558276041, 14872878930615633276, 18446744073709551615, 18446744073709551615, 129, 142, 129, 142, 21, 23, true, "was performed", "was performed"], ["verb", "single-verb", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 389609625741152123, 14211761448341366960, 18446744073709551615, 18446744073709551615, 16, 20, 16, 20, 5, 6, true, "show", "show"], ["verb", "single-verb", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 14637951605983202826, 7813818474500339205, 18446744073709551615, 18446744073709551615, 104, 112, 104, 112, 17, 18, true, "indicate", "indicate"], ["verb", "single-verb", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 15441160910541486535, 2695916097507178008, 18446744073709551615, 18446744073709551615, 193, 195, 193, 195, 34, 35, true, "is", "is"], ["conn", "single-conn", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 15441160910541480354, 2695916118355464437, 18446744073709551615, 18446744073709551615, 0, 2, 0, 2, 0, 1, true, "In", "In"], ["conn", "single-conn", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 15441160910541486538, 2695916097612071945, 18446744073709551615, 18446744073709551615, 41, 43, 41, 43, 8, 9, true, "in", "in"], ["conn", "single-conn", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 12178341415895635383, 15900505804860594428, 18446744073709551615, 18446744073709551615, 69, 72, 69, 72, 10, 11, true, "per", "per"], ["conn", "single-conn", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 16381206519429333259, 6325354017576163482, 18446744073709551615, 18446744073709551615, 113, 119, 113, 119, 18, 20, true, "that a", "that a"], ["conn", "single-conn", 10982401368140758581, "TEXT", "#/texts/37", 1.0, 16381206566339127348, 1754758722220302331, 18446744073709551615, 18446744073709551615, 143, 149, 143, 149, 23, 25, true, "on the", "on the"], ["numval", "ival", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541481982, 11951507606951917585, 18446744073709551615, 18446744073709551615, 508, 510, 508, 510, 92, 93, true, "10", "10"], ["parenthesis", "round brackets", 887751753527930563, "TEXT", "#/texts/38", 1.0, 8040598725736414260, 16009765337602090817, 18446744073709551615, 18446744073709551615, 185, 218, 185, 218, 35, 43, true, "(based on annotated ground-truth)", "(based on annotated ground-truth)"], ["parenthesis", "round brackets", 887751753527930563, "TEXT", "#/texts/38", 1.0, 5092507472812879080, 14553694282484972012, 18446744073709551615, 18446744073709551615, 296, 391, 296, 391, 55, 71, true, "(submitting page-annotations, training the model, applying the model for predicting the labels)", "(submitting page-annotations, training the model, applying the model for predicting the labels)"], ["expression", "word-concatenation", 887751753527930563, "TEXT", "#/texts/38", 1.0, 3005486399909847392, 14682678718727362625, 18446744073709551615, 18446744073709551615, 152, 165, 152, 165, 31, 32, true, "inter-leaving", "inter-leaving"], ["expression", "word-concatenation", 887751753527930563, "TEXT", "#/texts/38", 1.0, 12771775017586419952, 2802304582408659586, 18446744073709551615, 18446744073709551615, 308, 324, 308, 324, 57, 58, true, "page-annotations", "page-annotations"], ["expression", "word-concatenation", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14635107217315999975, 15420785844354915058, 18446744073709551615, 18446744073709551615, 484, 492, 484, 492, 87, 88, true, "speed-up", "speed-up"], ["expression", "word-concatenation", 887751753527930563, "TEXT", "#/texts/38", 1.0, 3753411203337468488, 12096051488974704464, 18446744073709551615, 18446744073709551615, 515, 527, 515, 527, 94, 95, true, "ground-truth", "ground-truth"], ["sentence", "", 887751753527930563, "TEXT", "#/texts/38", 1.0, 6272342027418177618, 7231857899686407766, 18446744073709551615, 18446744073709551615, 44, 119, 44, 119, 9, 24, true, "Since the corrections become less and less, the rate of annotation goes up.", "Since the corrections become less and less, the rate of annotation goes up."], ["sentence", "", 887751753527930563, "TEXT", "#/texts/38", 1.0, 10373341387652950101, 4296463374728196626, 18446744073709551615, 18446744073709551615, 120, 445, 120, 445, 24, 81, true, "It is needless to say that this inter-leaving of training models (based on annotated ground-truth) and annotation benefits directly from our platform approach, since each task (submitting page-annotations, training the model, applying the model for predicting the labels) comes down to an asynchronous call to a microservice.", "It is needless to say that this inter-leaving of training models (based on annotated ground-truth) and annotation benefits directly from our platform approach, since each task (submitting page-annotations, training the model, applying the model for predicting the labels) comes down to an asynchronous call to a microservice."], ["sentence", "", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14172981653758283269, 8881727103428706752, 18446744073709551615, 18446744073709551615, 446, 539, 446, 539, 81, 97, true, "The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection.", "The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection."], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 1075818440275369468, 17454316887235878242, 18446744073709551615, 18446744073709551615, 169, 184, 169, 184, 33, 35, true, "training models", "training models"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 9789767543968526861, 314805434658016224, 18446744073709551615, 18446744073709551615, 223, 242, 223, 242, 44, 46, true, "annotation benefits", "annotation benefits"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 17553665798218401963, 3726396549088745766, 18446744073709551615, 18446744073709551615, 261, 278, 261, 278, 49, 51, true, "platform approach", "platform approach"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 6824561449593020406, 2835024862878445208, 18446744073709551615, 18446744073709551615, 409, 426, 409, 426, 75, 77, true, "asynchronous call", "asynchronous call"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15545443248697733320, 10164958119644670876, 18446744073709551615, 18446744073709551615, 450, 472, 450, 472, 82, 84, true, "accelerated annotation", "accelerated annotation"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 6431940343880246726, 1708248768847169986, 18446744073709551615, 18446744073709551615, 515, 538, 515, 538, 94, 96, true, "ground-truth collection", "ground-truth collection"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 329104161594416377, 13413315670796926229, 18446744073709551615, 18446744073709551615, 15, 20, 15, 20, 3, 4, true, "point", "point"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 16381206590740615814, 5715153085670543197, 18446744073709551615, 18446744073709551615, 36, 42, 36, 42, 7, 8, true, "labels", "labels"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 2993400436190652919, 13692484191386832624, 18446744073709551615, 18446744073709551615, 54, 65, 54, 65, 11, 12, true, "corrections", "corrections"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625632775616, 7525501055193045867, 18446744073709551615, 18446744073709551615, 92, 96, 92, 96, 18, 19, true, "rate", "rate"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15359807916847495711, 11270264014125951727, 18446744073709551615, 18446744073709551615, 100, 110, 100, 110, 20, 21, true, "annotation", "annotation"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 3005486399909847392, 14682678718727362625, 18446744073709551615, 18446744073709551615, 152, 165, 152, 165, 31, 32, true, "inter-leaving", "inter-leaving"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 16381206541509431009, 14577680045959261972, 18446744073709551615, 18446744073709551615, 205, 211, 205, 211, 39, 40, true, "ground", "ground"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 329104159241711235, 5593828058814821597, 18446744073709551615, 18446744073709551615, 212, 217, 212, 217, 41, 42, true, "truth", "truth"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625631210899, 7525474213247124703, 18446744073709551615, 18446744073709551615, 291, 295, 291, 295, 54, 55, true, "task", "task"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 12771775017586419952, 2802304582408659586, 18446744073709551615, 18446744073709551615, 308, 324, 308, 324, 57, 58, true, "page-annotations", "page-annotations"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 329104161610777240, 13417175623690781227, 18446744073709551615, 18446744073709551615, 339, 344, 339, 344, 61, 62, true, "model", "model"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 329104161610777240, 13417175623690783224, 18446744073709551615, 18446744073709551615, 359, 364, 359, 364, 65, 66, true, "model", "model"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 16381206590740615814, 5715153085670598580, 18446744073709551615, 18446744073709551615, 384, 390, 384, 390, 69, 70, true, "labels", "labels"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 16682817150367627875, 14157104143939096698, 18446744073709551615, 18446744073709551615, 432, 444, 432, 444, 79, 80, true, "microservice", "microservice"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14635107217315999975, 15420785844354915058, 18446744073709551615, 18446744073709551615, 484, 492, 484, 492, 87, 88, true, "speed-up", "speed-up"], ["term", "single-term", 887751753527930563, "TEXT", "#/texts/38", 1.0, 16381206548642682247, 14381169330489265135, 18446744073709551615, 18446744073709551615, 498, 504, 498, 504, 90, 91, true, "factor", "factor"], ["verb", "compound-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14639581096777419601, 11157428250198373143, 18446744073709551615, 18446744073709551615, 473, 481, 473, 481, 84, 86, true, "leads to", "leads to"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625632179144, 7525453844240816178, 18446744073709551615, 18446744073709551615, 0, 4, 0, 4, 0, 1, true, "used", "used"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 8106476016678293182, 14705001208406550988, 18446744073709551615, 18446744073709551615, 24, 31, 24, 31, 5, 6, true, "predict", "predict"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 16381206574366219255, 14308904875579874986, 18446744073709551615, 18446744073709551615, 66, 72, 66, 72, 12, 13, true, "become", "become"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625699055541, 7971072571524745502, 18446744073709551615, 18446744073709551615, 111, 115, 111, 115, 21, 22, true, "goes", "goes"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541486535, 11951507066520328741, 18446744073709551615, 18446744073709551615, 123, 125, 123, 125, 25, 26, true, "is", "is"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 12178341415895645562, 13102495715066849378, 18446744073709551615, 18446744073709551615, 138, 141, 138, 141, 28, 29, true, "say", "say"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 329104159219515955, 5597411499285869731, 18446744073709551615, 18446744073709551615, 186, 191, 186, 191, 36, 37, true, "based", "based"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 5946726816546568286, 1477520919743645775, 18446744073709551615, 18446744073709551615, 195, 204, 195, 204, 38, 39, true, "annotated", "annotated"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14109055745804186414, 4901536807348624447, 18446744073709551615, 18446744073709551615, 297, 307, 297, 307, 56, 57, true, "submitting", "submitting"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14634153919632515335, 4255451725012375348, 18446744073709551615, 18446744073709551615, 326, 334, 326, 334, 59, 60, true, "training", "training"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14650448030444381648, 2869960803513685796, 18446744073709551615, 18446744073709551615, 346, 354, 346, 354, 63, 64, true, "applying", "applying"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14103651237077222912, 3033995743627716102, 18446744073709551615, 18446744073709551615, 369, 379, 369, 379, 67, 68, true, "predicting", "predicting"], ["verb", "single-verb", 887751753527930563, "TEXT", "#/texts/38", 1.0, 329104161555284808, 5558370405433613923, 18446744073709551615, 18446744073709551615, 392, 397, 392, 397, 71, 72, true, "comes", "comes"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 6184771668677947934, 317856897517446338, 18446744073709551615, 18446744073709551615, 5, 14, 5, 14, 1, 3, true, "from that", "from that"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 5959619225047725157, 1767038672352581109, 18446744073709551615, 18446744073709551615, 44, 53, 44, 53, 9, 11, true, "Since the", "Since the"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541485670, 11951507671817644057, 18446744073709551615, 18446744073709551615, 97, 99, 97, 99, 19, 20, true, "of", "of"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 3504047303127782210, 16099388320343716523, 18446744073709551615, 18446744073709551615, 142, 151, 142, 151, 29, 31, true, "that this", "that this"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541485670, 11951507671817649898, 18446744073709551615, 18446744073709551615, 166, 168, 166, 168, 32, 33, true, "of", "of"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541485678, 11951507647334320637, 18446744073709551615, 18446744073709551615, 192, 194, 192, 194, 37, 38, true, "on", "on"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625697843734, 7970779865719157582, 18446744073709551615, 18446744073709551615, 252, 256, 252, 256, 47, 48, true, "from", "from"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 14091433066300748251, 6432975356795058417, 18446744073709551615, 18446744073709551615, 280, 290, 280, 290, 52, 54, true, "since each", "since each"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 12178341415895625940, 13102496268306370799, 18446744073709551615, 18446744073709551615, 365, 368, 365, 368, 66, 67, true, "for", "for"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625620237736, 7524977021781763492, 18446744073709551615, 18446744073709551615, 493, 497, 493, 497, 88, 90, true, "of a", "of a"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541485670, 11951507671817651167, 18446744073709551615, 18446744073709551615, 505, 507, 505, 507, 91, 92, true, "of", "of"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 12178341415895625940, 13102496268307349332, 18446744073709551615, 18446744073709551615, 511, 514, 511, 514, 93, 94, true, "for", "for"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541485865, 11951507086803887753, 18446744073709551615, 18446744073709551615, 21, 23, 21, 23, 4, 5, true, "to", "to"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 15441160910541485865, 11951507086803862758, 18446744073709551615, 18446744073709551615, 135, 137, 135, 137, 27, 28, true, "to", "to"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 329104159243175056, 5593839863116247665, 18446744073709551615, 18446744073709551615, 403, 408, 403, 408, 73, 75, true, "to an", "to an"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625631408052, 7525477393951992167, 18446744073709551615, 18446744073709551615, 427, 431, 427, 431, 77, 79, true, "to a", "to a"], ["conn", "single-conn", 887751753527930563, "TEXT", "#/texts/38", 1.0, 389609625631408052, 7525477393952012376, 18446744073709551615, 18446744073709551615, 479, 483, 479, 483, 85, 87, true, "to a", "to a"], ["numval", "fval", 4695688617288377564, "TEXT", "#/texts/39", 1.0, 12178341415896435197, 3724559431380620971, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "3.4", "3.4"], ["expression", "common", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 12178341415895450733, 17146743324715295976, 18446744073709551615, 18446744073709551615, 268, 272, 268, 272, 54, 55, true, "etc", "etc."], ["expression", "word-concatenation", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 6307689511527468252, 11049638471994231619, 18446744073709551615, 18446744073709551615, 47, 63, 47, 63, 10, 11, true, "machine-learning", "machine-learning"], ["sentence", "", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 13517213129330321847, 18127661899519764164, 18446744073709551615, 18446744073709551615, 0, 71, 0, 71, 0, 13, true, "In the CCS, there are essentially two types of machine-learning models.", "In the CCS, there are essentially two types of machine-learning models."], ["sentence", "", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 12886030746334264259, 13723940064220454406, 18446744073709551615, 18446744073709551615, 72, 157, 72, 157, 13, 31, true, "On the one hand, we have default models, which are designed to be layout independent.", "On the one hand, we have default models, which are designed to be layout independent."], ["sentence", "", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 14013456187441881338, 17239504930887350611, 18446744073709551615, 18446744073709551615, 158, 520, 158, 520, 31, 100, true, "They take a raster image of the page to identify and locate basic objects, such as tables, figures, formulas, etc. On the other hand, we also support the training of custom, templatespecific models, which are designed to specialize on a particular layout template and allow us to convert and extract the data out of documents with very high precision and recall.", "They take a raster image of the page to identify and locate basic objects, such as tables, figures, formulas, etc. On the other hand, we also support the training of custom, templatespecific models, which are designed to specialize on a particular layout template and allow us to convert and extract the data out of documents with very high precision and recall."], ["sentence", "", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 17780227530484624695, 18273401801814716252, 18446744073709551615, 18446744073709551615, 521, 605, 521, 605, 100, 116, true, "They will classify each cell in the page with regard to their layout semantic label.", "They will classify each cell in the page with regard to their layout semantic label."], ["term", "enum-term-mark-2", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 767578358531619449, 16632720521972427975, 18446744073709551615, 18446744073709551615, 499, 519, 499, 519, 96, 99, true, "precision and recall", "precision and recall"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 7873664013415410219, 4430601135045369937, 18446744073709551615, 18446744073709551615, 47, 70, 47, 70, 10, 12, true, "machine-learning models", "machine-learning models"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 1915006193249717419, 1875271127477933001, 18446744073709551615, 18446744073709551615, 97, 111, 97, 111, 20, 22, true, "default models", "default models"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16239183518337686478, 11101556195438910420, 18446744073709551615, 18446744073709551615, 170, 182, 170, 182, 34, 36, true, "raster image", "raster image"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15816118739913983357, 4910490026828256135, 18446744073709551615, 18446744073709551615, 218, 231, 218, 231, 43, 45, true, "basic objects", "basic objects"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 14046205808324278415, 17119448556130976489, 18446744073709551615, 18446744073709551615, 280, 290, 280, 290, 57, 59, true, "other hand", "other hand"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 2583917242234592483, 374576428698011758, 18446744073709551615, 18446744073709551615, 332, 355, 332, 355, 68, 70, true, "templatespecific models", "templatespecific models"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 1409002602794608575, 12647829849056583196, 18446744073709551615, 18446744073709551615, 395, 421, 395, 421, 78, 81, true, "particular layout template", "particular layout template"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 334743981239923851, 9786444577548631916, 18446744073709551615, 18446744073709551615, 494, 508, 494, 508, 95, 97, true, "high precision", "high precision"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 2317020437411802284, 6661739231635438995, 18446744073709551615, 18446744073709551615, 583, 604, 583, 604, 112, 115, true, "layout semantic label", "layout semantic label"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 12178341415896221596, 17145631232582541696, 18446744073709551615, 18446744073709551615, 7, 10, 7, 10, 2, 3, true, "CCS", "CCS"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 329104159243796903, 9793326042416365976, 18446744073709551615, 18446744073709551615, 38, 43, 38, 43, 8, 9, true, "types", "types"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625695385072, 5116635824435316109, 18446744073709551615, 18446744073709551615, 83, 87, 83, 87, 16, 17, true, "hand", "hand"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625632301461, 5151166762899359442, 18446744073709551615, 18446744073709551615, 190, 194, 190, 194, 38, 39, true, "page", "page"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206513098478539, 13904355123067777732, 18446744073709551615, 18446744073709551615, 241, 247, 241, 247, 48, 49, true, "tables", "tables"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 8106397480533647371, 3439539142366206574, 18446744073709551615, 18446744073709551615, 249, 256, 249, 256, 50, 51, true, "figures", "figures"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 14637917332659859466, 11680350228874501365, 18446744073709551615, 18446744073709551615, 258, 266, 258, 266, 52, 53, true, "formulas", "formulas"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 14634153919632515335, 2062260761097997133, 18446744073709551615, 18446744073709551615, 312, 320, 312, 320, 64, 65, true, "training", "training"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206559341571450, 10891618514124079785, 18446744073709551615, 18446744073709551615, 324, 330, 324, 330, 66, 67, true, "custom", "custom"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625696431489, 5116475966440691304, 18446744073709551615, 18446744073709551615, 462, 466, 462, 466, 89, 90, true, "data", "data"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 6167933651658664291, 1969766390996936239, 18446744073709551615, 18446744073709551615, 474, 483, 474, 483, 92, 93, true, "documents", "documents"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206521531485437, 15754120034665078572, 18446744073709551615, 18446744073709551615, 513, 519, 513, 519, 98, 99, true, "recall", "recall"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625696024605, 5116487879327823312, 18446744073709551615, 18446744073709551615, 545, 549, 545, 549, 104, 105, true, "cell", "cell"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625632301461, 5151166762899537630, 18446744073709551615, 18446744073709551615, 557, 561, 557, 561, 107, 108, true, "page", "page"], ["term", "single-term", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206521526353544, 15751299097077186113, 18446744073709551615, 18446744073709551615, 567, 573, 567, 573, 109, 110, true, "regard", "regard"], ["verb", "compound-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 2291392503341416825, 11195031169916444600, 18446744073709551615, 18446744073709551615, 18, 33, 18, 33, 5, 7, true, "are essentially", "are essentially"], ["verb", "compound-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 12310758631134579429, 6123985748511190566, 18446744073709551615, 18446744073709551615, 119, 144, 119, 144, 24, 29, true, "are designed to be layout", "are designed to be layout"], ["verb", "compound-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 4898731964974610599, 9536147348813194873, 18446744073709551615, 18446744073709551615, 363, 389, 363, 389, 72, 76, true, "are designed to specialize", "are designed to specialize"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625695387621, 5116635804435289206, 18446744073709551615, 18446744073709551615, 92, 96, 92, 96, 19, 20, true, "have", "have"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625631208371, 5151807067501724158, 18446744073709551615, 18446744073709551615, 163, 167, 163, 167, 32, 33, true, "take", "take"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 14637940110145064744, 2106064499720351293, 18446744073709551615, 18446744073709551615, 198, 206, 198, 206, 40, 41, true, "identify", "identify"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206567157578886, 10902549664873100786, 18446744073709551615, 18446744073709551615, 211, 217, 211, 217, 42, 43, true, "locate", "locate"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 8106478689608778321, 3710394550719923132, 18446744073709551615, 18446744073709551615, 300, 307, 300, 307, 62, 63, true, "support", "support"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 329104159171192019, 9796554067934784618, 18446744073709551615, 18446744073709551615, 426, 431, 426, 431, 82, 83, true, "allow", "allow"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 8106398484416229602, 17326137764355875282, 18446744073709551615, 18446744073709551615, 438, 445, 438, 445, 85, 86, true, "convert", "convert"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 8106397496930289884, 10559828500844892971, 18446744073709551615, 18446744073709551615, 450, 457, 450, 457, 87, 88, true, "extract", "extract"], ["verb", "single-verb", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 4427431036151074662, 16906017590074365227, 18446744073709551615, 18446744073709551615, 526, 539, 526, 539, 101, 103, true, "will classify", "will classify"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 8106478685702231057, 2830627525930358532, 18446744073709551615, 18446744073709551615, 233, 240, 233, 240, 46, 48, true, "such as", "such as"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16380809977974811061, 4113701743493453883, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 2, true, "In the", "In the"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485670, 17273985970400205401, 18446744073709551615, 18446744073709551615, 44, 46, 44, 46, 9, 10, true, "of", "of"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206532384237233, 17053737365407359035, 18446744073709551615, 18446744073709551615, 72, 78, 72, 78, 13, 15, true, "On the", "On the"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206565712212855, 9404609303836786535, 18446744073709551615, 18446744073709551615, 183, 189, 183, 189, 36, 38, true, "of the", "of the"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206532384237233, 17053737365407379411, 18446744073709551615, 18446744073709551615, 273, 279, 273, 279, 55, 57, true, "On the", "On the"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485670, 17273985970400249568, 18446744073709551615, 18446744073709551615, 321, 323, 321, 323, 65, 66, true, "of", "of"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625618762887, 5116378122940476765, 18446744073709551615, 18446744073709551615, 390, 394, 390, 394, 76, 78, true, "on a", "on a"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 12178341415895623120, 17146120863816560875, 18446744073709551615, 18446744073709551615, 467, 470, 467, 470, 90, 91, true, "out", "out"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485670, 17273985970400259382, 18446744073709551615, 18446744073709551615, 471, 473, 471, 473, 91, 92, true, "of", "of"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625618037948, 5116501620067563988, 18446744073709551615, 18446744073709551615, 484, 488, 484, 488, 93, 94, true, "with", "with"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 16381206560518651853, 9497734443008541647, 18446744073709551615, 18446744073709551615, 550, 556, 550, 556, 105, 107, true, "in the", "in the"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 389609625618037948, 5116501620067789933, 18446744073709551615, 18446744073709551615, 562, 566, 562, 566, 108, 109, true, "with", "with"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485865, 17273985947860325671, 18446744073709551615, 18446744073709551615, 132, 134, 132, 134, 26, 27, true, "to", "to"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485865, 17273985947860327104, 18446744073709551615, 18446744073709551615, 195, 197, 195, 197, 39, 40, true, "to", "to"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485865, 17273985947860314144, 18446744073709551615, 18446744073709551615, 376, 378, 376, 378, 74, 75, true, "to", "to"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485865, 17273985947860311804, 18446744073709551615, 18446744073709551615, 435, 437, 435, 437, 84, 85, true, "to", "to"], ["conn", "single-conn", 3275001812318455279, "TEXT", "#/texts/40", 1.0, 15441160910541485865, 17273985947860221241, 18446744073709551615, 18446744073709551615, 574, 576, 574, 576, 110, 111, true, "to", "to"], ["expression", "common", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15441160910541486545, 217027369193113293, 18446744073709551615, 18446744073709551615, 276, 280, 276, 280, 50, 51, true, "ie", "i.e."], ["expression", "wtoken-concatenation", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 329104147725158906, 15860090129804914924, 18446744073709551615, 18446744073709551615, 0, 5, 0, 5, 0, 1, true, "3.4.1", "3.4.1"], ["sentence", "", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 11192516552703949087, 2492940853653053327, 18446744073709551615, 18446744073709551615, 0, 14, 0, 14, 0, 3, true, "3.4.1 Metrics.", "3.4.1 Metrics."], ["sentence", "", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 5597992645106161612, 18212052448285647499, 18446744073709551615, 18446744073709551615, 15, 146, 15, 146, 3, 26, true, "Before discussing the performance of the models, let us first define the precision and recall metrics used to evaluate the results.", "Before discussing the performance of the models, let us first define the precision and recall metrics used to evaluate the results."], ["sentence", "", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 17501061903424888556, 2513848896063536453, 18446744073709551615, 18446744073709551615, 147, 325, 147, 325, 26, 61, true, "The first observation is that the output of a machine learned model is exactly the same of what a human annotator would produce, i.e. it will assign a text cell a semantic label.", "The first observation is that the output of a machine learned model is exactly the same of what a human annotator would produce, i.e. it will assign a text cell a semantic label."], ["sentence", "", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 11474071216785359264, 2522141362145198185, 18446744073709551615, 18446744073709551615, 326, 420, 326, 420, 61, 79, true, "The correctness of this label is what we aim to measure with the recall and precision metrics.", "The correctness of this label is what we aim to measure with the recall and precision metrics."], ["term", "enum-term-mark-2", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 767578358531619449, 4617639867407406559, 18446744073709551615, 18446744073709551615, 88, 108, 88, 108, 16, 19, true, "precision and recall", "precision and recall"], ["term", "enum-term-mark-2", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 11037453576911667853, 13989172036358784933, 18446744073709551615, 18446744073709551615, 391, 411, 391, 411, 74, 77, true, "recall and precision", "recall and precision"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16904814960714419182, 11528817755660542225, 18446744073709551615, 18446744073709551615, 102, 116, 102, 116, 18, 20, true, "recall metrics", "recall metrics"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 2298135982047686680, 2518908270667636880, 18446744073709551615, 18446744073709551615, 151, 168, 151, 168, 27, 29, true, "first observation", "first observation"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 13314981802876368811, 8098139644132232215, 18446744073709551615, 18446744073709551615, 245, 260, 245, 260, 45, 47, true, "human annotator", "human annotator"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 3503955255877193443, 16168589224721760141, 18446744073709551615, 18446744073709551615, 298, 307, 298, 307, 55, 57, true, "text cell", "text cell"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 11173100292227021015, 1800543351623843241, 18446744073709551615, 18446744073709551615, 310, 324, 310, 324, 58, 60, true, "semantic label", "semantic label"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 13620323371457554126, 458926269169904744, 18446744073709551615, 18446744073709551615, 402, 419, 402, 419, 76, 78, true, "precision metrics", "precision metrics"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 10318747471677033167, 11847785680345282579, 18446744073709551615, 18446744073709551615, 425, 443, 425, 443, 80, 82, true, "second observation", "second observation"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 8106471246351785636, 976374676032317796, 18446744073709551615, 18446744073709551615, 6, 13, 6, 13, 1, 2, true, "Metrics", "Metrics"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 5731695876385560379, 811697740203155724, 18446744073709551615, 18446744073709551615, 37, 48, 37, 48, 6, 7, true, "performance", "performance"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16381206567230470443, 12418767995683661148, 18446744073709551615, 18446744073709551615, 56, 62, 56, 62, 9, 10, true, "models", "models"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 6184954595655792282, 14903161230079425690, 18446744073709551615, 18446744073709551615, 88, 97, 88, 97, 16, 17, true, "precision", "precision"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 8106478445190161533, 1918860944869567675, 18446744073709551615, 18446744073709551615, 138, 145, 138, 145, 24, 25, true, "results", "results"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16381206566212127622, 12760563398286649502, 18446744073709551615, 18446744073709551615, 181, 187, 181, 187, 32, 33, true, "output", "output"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 8106464587473865376, 11906315049643178602, 18446744073709551615, 18446744073709551615, 193, 200, 193, 200, 35, 36, true, "machine", "machine"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 329104161610777240, 17091366051023556973, 18446744073709551615, 18446744073709551615, 209, 214, 209, 214, 37, 38, true, "model", "model"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 2993400436143573854, 11462282328537020132, 18446744073709551615, 18446744073709551615, 330, 341, 330, 341, 62, 63, true, "correctness", "correctness"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 329104161624445793, 17107701638781466857, 18446744073709551615, 18446744073709551615, 350, 355, 350, 355, 65, 66, true, "label", "label"], ["term", "single-term", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16381206521531485437, 11959686269616181622, 18446744073709551615, 18446744073709551615, 391, 397, 391, 397, 74, 75, true, "recall", "recall"], ["verb", "compound-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 5912573161125631318, 18389017722629856917, 18446744073709551615, 18446744073709551615, 117, 133, 117, 133, 20, 23, true, "used to evaluate", "used to evaluate"], ["verb", "compound-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15603910262549214758, 16583885660288754437, 18446744073709551615, 18446744073709551615, 215, 225, 215, 225, 38, 40, true, "is exactly", "is exactly"], ["verb", "compound-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 4896725137073128692, 11944200149874396120, 18446744073709551615, 18446744073709551615, 367, 381, 367, 381, 69, 72, true, "aim to measure", "aim to measure"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 5314857828561765555, 16399137808174602665, 18446744073709551615, 18446744073709551615, 22, 32, 22, 32, 4, 5, true, "discussing", "discussing"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 12178341415895617983, 13147520311847756339, 18446744073709551615, 18446744073709551615, 64, 67, 64, 67, 11, 12, true, "let", "let"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16381206567815771749, 12306163915021708527, 18446744073709551615, 18446744073709551615, 77, 83, 77, 83, 14, 15, true, "define", "define"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15441160910541486535, 217027369425741123, 18446744073709551615, 18446744073709551615, 169, 171, 169, 171, 29, 30, true, "is", "is"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 8106342444693204894, 8432795443147196977, 18446744073709551615, 18446744073709551615, 201, 208, 201, 208, 36, 37, true, "learned", "learned"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 12860895623677311427, 17253192036505745074, 18446744073709551615, 18446744073709551615, 261, 274, 261, 274, 47, 49, true, "would produce", "would produce"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15441160910541486545, 217027369193113293, 18446744073709551615, 18446744073709551615, 276, 280, 276, 280, 50, 51, true, "ie", "i.e."], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 3080311951459850033, 533771465723720685, 18446744073709551615, 18446744073709551615, 284, 295, 284, 295, 52, 54, true, "will assign", "will assign"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15441160910541486535, 217027369425752515, 18446744073709551615, 18446744073709551615, 356, 358, 356, 358, 66, 67, true, "is", "is"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15441160910541486535, 217027369425791545, 18446744073709551615, 18446744073709551615, 444, 446, 444, 446, 82, 83, true, "is", "is"], ["verb", "single-verb", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 389609625696287852, 94271090942472495, 18446744073709551615, 18446744073709551615, 455, 459, 455, 459, 85, 86, true, "deal", "deal"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 8106475344576462148, 12913315537205632230, 18446744073709551615, 18446744073709551615, 230, 237, 230, 237, 41, 43, true, "same of", "same of"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16381206535679983326, 13147770262986613637, 18446744073709551615, 18446744073709551615, 15, 21, 15, 21, 3, 4, true, "Before", "Before"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16381206565712212855, 13758401303862020825, 18446744073709551615, 18446744073709551615, 49, 55, 49, 55, 7, 9, true, "of the", "of the"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 14634130761162415388, 14555384046902233320, 18446744073709551615, 18446744073709551615, 172, 180, 172, 180, 30, 32, true, "that the", "that the"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 389609625620237736, 16645129036916663308, 18446744073709551615, 18446744073709551615, 188, 192, 188, 192, 33, 35, true, "of a", "of a"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 8106342927224204628, 1697921553677515464, 18446744073709551615, 18446744073709551615, 342, 349, 342, 349, 63, 65, true, "of this", "of this"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 14638857868319795209, 1013014462354929148, 18446744073709551615, 18446744073709551615, 382, 390, 382, 390, 72, 74, true, "with the", "with the"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 389609625631229034, 52497997003113109, 18446744073709551615, 18446744073709551615, 447, 451, 447, 451, 83, 84, true, "that", "that"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 16381206557726458966, 12614731555033482050, 18446744073709551615, 18446744073709551615, 460, 466, 460, 466, 86, 88, true, "with a", "with a"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15441160910541485865, 217027973200879925, 18446744073709551615, 18446744073709551615, 122, 124, 122, 124, 21, 22, true, "to", "to"], ["conn", "single-conn", 15354930767839681193, "TEXT", "#/texts/41", 1.0, 15441160910541485865, 217027973200576949, 18446744073709551615, 18446744073709551615, 371, 373, 371, 373, 70, 71, true, "to", "to"], ["expression", "common", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 15441160910541486545, 567598813997742384, 18446744073709551615, 18446744073709551615, 36, 40, 36, 40, 4, 5, true, "ie", "i.e."], ["expression", "apostrophe", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 389609625696231302, 15358368697525810593, 18446744073709551615, 18446744073709551615, 44, 49, 44, 49, 6, 7, true, "dont", "don't"], ["expression", "word-concatenation", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 1304401478471854224, 649631639388333518, 18446744073709551615, 18446744073709551615, 0, 11, 0, 11, 0, 1, true, "multi-class", "multi-class"], ["sentence", "", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 15490331838172880166, 6727043395560011683, 18446744073709551615, 18446744073709551615, 0, 199, 0, 199, 0, 35, true, "multi-class classification problem, i.e. we don't have only two labels, but many possible semantic labels, hence the performance result will be the average of the recall and precision for each label.", "multi-class classification problem, i.e. we don't have only two labels, but many possible semantic labels, hence the performance result will be the average of the recall and precision for each label."], ["term", "enum-term-mark-2", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 11037453576911667853, 6817453896806379289, 18446744073709551615, 18446744073709551615, 163, 183, 163, 183, 28, 31, true, "recall and precision", "recall and precision"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 8860580720669525337, 5603592342532464212, 18446744073709551615, 18446744073709551615, 0, 34, 0, 34, 0, 3, true, "multi-class classification problem", "multi-class classification problem"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 11769591311949881693, 6939363705971315498, 18446744073709551615, 18446744073709551615, 76, 105, 76, 105, 13, 17, true, "many possible semantic labels", "many possible semantic labels"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 2685619966605559755, 15479784634162924092, 18446744073709551615, 18446744073709551615, 117, 135, 117, 135, 20, 22, true, "performance result", "performance result"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 16381206590740615814, 15595426310372972978, 18446744073709551615, 18446744073709551615, 64, 70, 64, 70, 10, 11, true, "labels", "labels"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 8106397775114664992, 12360006154142338319, 18446744073709551615, 18446744073709551615, 148, 155, 148, 155, 25, 26, true, "average", "average"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 16381206521531485437, 2808816684408088237, 18446744073709551615, 18446744073709551615, 163, 169, 163, 169, 28, 29, true, "recall", "recall"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 6184954595655792282, 7514668760154149307, 18446744073709551615, 18446744073709551615, 174, 183, 174, 183, 30, 31, true, "precision", "precision"], ["term", "single-term", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 329104161624445793, 17860511514209003623, 18446744073709551615, 18446744073709551615, 193, 198, 193, 198, 33, 34, true, "label", "label"], ["verb", "compound-verb", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 6184629322009942858, 1696555540029622570, 18446744073709551615, 18446744073709551615, 50, 59, 50, 59, 7, 9, true, "have only", "have only"], ["verb", "single-verb", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 15441160910541486545, 567598813997742384, 18446744073709551615, 18446744073709551615, 36, 40, 36, 40, 4, 5, true, "ie", "i.e."], ["verb", "single-verb", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 8106477985499172124, 7535163702770855737, 18446744073709551615, 18446744073709551615, 136, 143, 136, 143, 22, 24, true, "will be", "will be"], ["conn", "single-conn", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 16381206565712212855, 15841842752973774978, 18446744073709551615, 18446744073709551615, 156, 162, 156, 162, 26, 28, true, "of the", "of the"], ["conn", "single-conn", 6337233386759158728, "TEXT", "#/texts/42", 1.0, 14637917333167503367, 1995163399168108017, 18446744073709551615, 18446744073709551615, 184, 192, 184, 192, 31, 33, true, "for each", "for each"], ["parenthesis", "round brackets", 2249972239307071508, "TEXT", "#/texts/43", 1.0, 329104053186083887, 849114158930843840, 18446744073709551615, 18446744073709551615, 11, 18, 11, 16, 2, 6, true, "(= \u211b)", "(= \u211b)"], ["parenthesis", "round brackets", 2249972239307071508, "TEXT", "#/texts/43", 1.0, 16380808301981907129, 17658555346938479763, 18446744073709551615, 18446744073709551615, 33, 41, 31, 36, 8, 12, true, "(= \ud835\udcab)", "(= \ud835\udcab)"], ["numval", "ival", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 17767354399704235161, 6099910645632343085, 18446744073709551615, 18446744073709551615, 70, 71, 65, 66, 15, 16, true, "1", "1"], ["parenthesis", "reference", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 12178341415896395122, 16140836621700527287, 18446744073709551615, 18446744073709551615, 69, 72, 64, 67, 14, 17, true, "(1)", "(1)"], ["expression", "wtoken-concatenation", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 329104159213418835, 12512684746394364905, 18446744073709551615, 18446744073709551615, 6, 13, 4, 11, 2, 3, true, "t_{p}", "t$_{p}$"], ["expression", "wtoken-concatenation", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 329104159213418835, 12512684746394365415, 18446744073709551615, 18446744073709551615, 14, 21, 12, 19, 3, 4, true, "t_{p}", "t$_{p}$"], ["expression", "wtoken-concatenation", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 329104161698390847, 17326553453012765569, 18446744073709551615, 18446744073709551615, 24, 31, 22, 29, 5, 6, true, "f_{p}", "f$_{p}$"], ["expression", "wtoken-concatenation", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 329104159213418835, 12512684746394305740, 18446744073709551615, 18446744073709551615, 41, 48, 36, 43, 9, 10, true, "t_{p}", "t$_{p}$"], ["expression", "wtoken-concatenation", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 329104159213418835, 12512684746394367710, 18446744073709551615, 18446744073709551615, 49, 56, 44, 51, 10, 11, true, "t_{p}", "t$_{p}$"], ["expression", "wtoken-concatenation", 12383805870947794174, "TEXT", "#/texts/44", 1.0, 329104161698393277, 17326553835857512588, 18446744073709551615, 18446744073709551615, 59, 66, 54, 61, 12, 13, true, "f_{n}", "f$_{n}$"], ["expression", "wtoken-concatenation", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 329104159213418835, 14787525895513303260, 18446744073709551615, 18446744073709551615, 6, 13, 6, 13, 1, 2, true, "t_{p}", "t$_{p}$"], ["expression", "wtoken-concatenation", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 329104161698390847, 13988743388207353010, 18446744073709551615, 18446744073709551615, 15, 22, 15, 22, 3, 4, true, "f_{p}", "f$_{p}$"], ["expression", "wtoken-concatenation", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 329104161698393277, 13988741372871105719, 18446744073709551615, 18446744073709551615, 27, 34, 27, 34, 5, 6, true, "f_{n}", "f$_{n}$"], ["sentence", "", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 7335149866647742481, 5389973564291704178, 18446744073709551615, 18446744073709551615, 6, 124, 6, 124, 1, 19, true, "t$_{p}$, f$_{p}$ and f$_{n}$ represent respectively true positive, false positive and false negative predicted labels.", "t$_{p}$, f$_{p}$ and f$_{n}$ represent respectively true positive, false positive and false negative predicted labels."], ["term", "enum-term-mark-2", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 9949181435334963834, 13863161906120910343, 18446744073709551615, 18446744073709551615, 6, 34, 6, 34, 1, 6, true, "t_{p}, f_{p} and f_{n}", "t$_{p}$, f$_{p}$ and f$_{n}$"], ["term", "single-term", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 329104159213418835, 14787525895513303260, 18446744073709551615, 18446744073709551615, 6, 13, 6, 13, 1, 2, true, "t_{p}", "t$_{p}$"], ["term", "single-term", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 329104161698390847, 13988743388207353010, 18446744073709551615, 18446744073709551615, 15, 22, 15, 22, 3, 4, true, "f_{p}", "f$_{p}$"], ["term", "single-term", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 329104161698393277, 13988741372871105719, 18446744073709551615, 18446744073709551615, 27, 34, 27, 34, 5, 6, true, "f_{n}", "f$_{n}$"], ["term", "single-term", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 16381206590740615814, 634376818699750368, 18446744073709551615, 18446744073709551615, 117, 123, 117, 123, 17, 18, true, "labels", "labels"], ["verb", "compound-verb", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 12161488772614107065, 10356018545425099123, 18446744073709551615, 18446744073709551615, 35, 57, 35, 57, 6, 8, true, "represent respectively", "represent respectively"], ["verb", "single-verb", 7053654953998543393, "TEXT", "#/texts/45", 1.0, 6184954633443293966, 1327321667481246505, 18446744073709551615, 18446744073709551615, 107, 116, 107, 116, 16, 17, true, "predicted", "predicted"], ["numval", "ival", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17767354399704235157, 12704387693977461708, 18446744073709551615, 18446744073709551615, 501, 502, 501, 502, 92, 93, true, "5", "5"], ["numval", "ival", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17767354399704235158, 12704387694225961167, 18446744073709551615, 18446744073709551615, 504, 505, 504, 505, 94, 95, true, "6", "6"], ["numval", "ival", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15441160910541481982, 10124025090905978449, 18446744073709551615, 18446744073709551615, 507, 509, 507, 509, 96, 97, true, "10", "10"], ["numval", "ival", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17767354399704235152, 12704387694582788969, 18446744073709551615, 18446744073709551615, 535, 536, 535, 536, 103, 104, true, "8", "8"], ["numval", "ival", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17767354399704235153, 12704387694366902016, 18446744073709551615, 18446744073709551615, 538, 539, 538, 539, 105, 106, true, "9", "9"], ["parenthesis", "reference", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895577775, 10221660528452945315, 18446744073709551615, 18446744073709551615, 562, 565, 562, 565, 111, 112, true, "[7]", "[7]"], ["parenthesis", "reference", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 389609625697296215, 2696768862675468030, 18446744073709551615, 18446744073709551615, 609, 613, 609, 613, 121, 122, true, "[10]", "[10]"], ["parenthesis", "reference", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895577640, 10221660531532098661, 18446744073709551615, 18446744073709551615, 629, 632, 629, 632, 125, 126, true, "[9]", "[9]"], ["parenthesis", "round brackets", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 3545569348905905585, 4469200219510189414, 18446744073709551615, 18446744073709551615, 454, 499, 454, 499, 80, 91, true, "(and their derivatives Fast-and Faster-R-CNN)", "(and their derivatives Fast-and Faster-R-CNN)"], ["parenthesis", "square brackets", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15965486372532711702, 9304269592494963167, 18446744073709551615, 18446744073709551615, 500, 510, 500, 510, 91, 98, true, "[5, 6, 10]", "[5, 6, 10]"], ["parenthesis", "square brackets", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206575391579989, 5003543538146822835, 18446744073709551615, 18446744073709551615, 534, 540, 534, 540, 102, 107, true, "[8, 9]", "[8, 9]"], ["expression", "common", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895450733, 10221669895136519552, 18446744073709551615, 18446744073709551615, 199, 203, 199, 203, 35, 36, true, "etc", "etc."], ["expression", "word-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206484692763269, 3909475141979723724, 18446744073709551615, 18446744073709551615, 447, 453, 447, 453, 79, 80, true, "R-CNNs", "R-CNNs"], ["expression", "word-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14650433109252770301, 8107892129208874535, 18446744073709551615, 18446744073709551615, 477, 485, 477, 485, 84, 85, true, "Fast-and", "Fast-and"], ["expression", "wtoken-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 329104147725158907, 15783524442332654082, 18446744073709551615, 18446744073709551615, 0, 5, 0, 5, 0, 1, true, "3.4.2", "3.4.2"], ["expression", "wtoken-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895577775, 10221660528452945315, 18446744073709551615, 18446744073709551615, 562, 565, 562, 565, 111, 112, true, "[7]", "[7]"], ["expression", "wtoken-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 5328308949596420596, 16816517518101354522, 18446744073709551615, 18446744073709551615, 596, 608, 596, 608, 120, 121, true, "Faster-R-CNN", "Faster-R-CNN"], ["expression", "wtoken-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 389609625697296215, 2696768862675468030, 18446744073709551615, 18446744073709551615, 609, 613, 609, 613, 121, 122, true, "[10]", "[10]"], ["expression", "wtoken-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206533950151485, 7429344052728955394, 18446744073709551615, 18446744073709551615, 622, 628, 622, 628, 124, 125, true, "YOLOv2", "YOLOv2"], ["expression", "wtoken-concatenation", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895577640, 10221660531532098661, 18446744073709551615, 18446744073709551615, 629, 632, 629, 632, 125, 126, true, "[9]", "[9]"], ["sentence", "", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 13862970203854964234, 1000664251196474029, 18446744073709551615, 18446744073709551615, 0, 21, 0, 21, 0, 4, true, "3.4.2 Default Models.", "3.4.2 Default Models."], ["sentence", "", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15397362661759974852, 12298157670836849129, 18446744073709551615, 18446744073709551615, 22, 109, 22, 109, 4, 20, true, "The aim of the default models is to identify specific, ubiquitous objects in documents.", "The aim of the default models is to identify specific, ubiquitous objects in documents."], ["sentence", "", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16425179928293029532, 1769621671439790257, 18446744073709551615, 18446744073709551615, 110, 356, 110, 356, 20, 64, true, "Examples of such objects are tables, figures with their captions, mathematical formulas, etc. Due to the high variability in both the document layout as well as in the representation of these objects, we need very robust object detection methods.", "Examples of such objects are tables, figures with their captions, mathematical formulas, etc. Due to the high variability in both the document layout as well as in the representation of these objects, we need very robust object detection methods."], ["sentence", "", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 9742780938207760057, 12486707672052556921, 18446744073709551615, 18446744073709551615, 357, 566, 357, 566, 64, 113, true, "Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7].", "Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7]."], ["sentence", "", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16770266057504597285, 13963861550769241956, 18446744073709551615, 18446744073709551615, 567, 715, 567, 715, 113, 138, true, "On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions.", "On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions."], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 18025670271476196185, 4076525407823276511, 18446744073709551615, 18446744073709551615, 6, 20, 6, 20, 1, 3, true, "Default Models", "Default Models"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 1915006193249717419, 11522134204391288466, 18446744073709551615, 18446744073709551615, 37, 51, 37, 51, 8, 10, true, "default models", "default models"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 5609921068371406163, 9046228298730371624, 18446744073709551615, 18446744073709551615, 77, 95, 77, 95, 15, 17, true, "ubiquitous objects", "ubiquitous objects"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14475299778579205125, 8777768897758483305, 18446744073709551615, 18446744073709551615, 122, 134, 122, 134, 22, 24, true, "such objects", "such objects"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 1883638592780196390, 3583719480229507642, 18446744073709551615, 18446744073709551615, 176, 197, 176, 197, 32, 34, true, "mathematical formulas", "mathematical formulas"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 13067270324954730530, 3518300029937178041, 18446744073709551615, 18446744073709551615, 215, 231, 215, 231, 39, 41, true, "high variability", "high variability"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16647295474405589964, 3738165022613409525, 18446744073709551615, 18446744073709551615, 244, 259, 244, 259, 44, 46, true, "document layout", "document layout"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14113886516804568139, 16613561485635018399, 18446744073709551615, 18446744073709551615, 324, 355, 324, 355, 59, 63, true, "robust object detection methods", "robust object detection methods"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 1993812675333266626, 1904048806204470733, 18446744073709551615, 18446744073709551615, 377, 391, 377, 391, 68, 70, true, "robust methods", "robust methods"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17376821969571444655, 10906121291496328649, 18446744073709551615, 18446744073709551615, 418, 438, 418, 438, 74, 77, true, "deep neural networks", "deep neural networks"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 10062108462976606414, 4739790499633724912, 18446744073709551615, 18446744073709551615, 465, 492, 465, 492, 83, 86, true, "derivatives Fast-and Faster", "derivatives Fast-and Faster"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17384399425913386842, 17600753885763817486, 18446744073709551615, 18446744073709551615, 516, 535, 516, 535, 100, 103, true, "YOLO architecture [", "YOLO architecture ["], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 13690465142419804728, 5041912981728959292, 18446744073709551615, 18446744073709551615, 549, 561, 549, 561, 109, 111, true, "SSD networks", "SSD networks"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15318930646636167014, 16480554943289876127, 18446744073709551615, 18446744073709551615, 655, 679, 655, 679, 129, 131, true, "individual microservices", "individual microservices"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895571887, 10221689572436439446, 18446744073709551615, 18446744073709551615, 26, 29, 26, 29, 5, 6, true, "aim", "aim"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 6167933651658664291, 6518863807379418823, 18446744073709551615, 18446744073709551615, 99, 108, 99, 108, 18, 19, true, "documents", "documents"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14650277098690689540, 4238826485163265018, 18446744073709551615, 18446744073709551615, 110, 118, 110, 118, 20, 21, true, "Examples", "Examples"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206513098478539, 7158084506369383330, 18446744073709551615, 18446744073709551615, 139, 145, 139, 145, 25, 26, true, "tables", "tables"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 8106397480533647371, 6864720578269745032, 18446744073709551615, 18446744073709551615, 147, 154, 147, 154, 27, 28, true, "figures", "figures"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14652289689770638970, 18248014564023136762, 18446744073709551615, 18446744073709551615, 166, 174, 166, 174, 30, 31, true, "captions", "captions"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415896115241, 10214876492138438607, 18446744073709551615, 18446744073709551615, 204, 207, 204, 207, 36, 37, true, "Due", "Due"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 11387678566946341343, 17478644430819869889, 18446744073709551615, 18446744073709551615, 278, 292, 278, 292, 51, 52, true, "representation", "representation"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 8106342034010873556, 852442964076649594, 18446744073709551615, 18446744073709551615, 302, 309, 302, 309, 54, 55, true, "objects", "objects"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 8106342034010873556, 852442964076641960, 18446744073709551615, 18446744073709551615, 406, 413, 406, 413, 72, 73, true, "objects", "objects"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206484692763269, 3909475141979723724, 18446744073709551615, 18446744073709551615, 447, 453, 447, 453, 79, 80, true, "R-CNNs", "R-CNNs"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415896234584, 10214872764581833763, 18446744073709551615, 18446744073709551615, 495, 498, 495, 498, 89, 90, true, "CNN", "CNN"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14814125365076808131, 16189701360705085149, 18446744073709551615, 18446744073709551615, 574, 582, 574, 582, 115, 116, true, "platform", "platform"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 5328308949596420596, 16816517518101354522, 18446744073709551615, 18446744073709551615, 596, 608, 596, 608, 120, 121, true, "Faster-R-CNN", "Faster-R-CNN"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206533950151485, 7429344052728955394, 18446744073709551615, 18446744073709551615, 622, 628, 622, 628, 124, 125, true, "YOLOv2", "YOLOv2"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14814151113413570861, 12769628045261514081, 18446744073709551615, 18446744073709551615, 633, 641, 633, 641, 126, 127, true, "networks", "networks"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14634153919632515335, 18400654478213785826, 18446744073709551615, 18446744073709551615, 690, 698, 690, 698, 134, 135, true, "training", "training"], ["term", "single-term", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15175963360124346573, 3242350763380404235, 18446744073709551615, 18446744073709551615, 703, 714, 703, 714, 136, 137, true, "predictions", "predictions"], ["verb", "compound-verb", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 17731448345306221383, 8279129691342552404, 18446744073709551615, 18446744073709551615, 52, 66, 52, 66, 10, 13, true, "is to identify", "is to identify"], ["verb", "compound-verb", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 6169327273053177421, 16918164282889950953, 18446744073709551615, 18446744073709551615, 314, 323, 314, 323, 57, 59, true, "need very", "need very"], ["verb", "single-verb", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895564896, 10221689629778288871, 18446744073709551615, 18446744073709551615, 135, 138, 135, 138, 24, 25, true, "are", "are"], ["verb", "single-verb", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 6180169258394806855, 12614592480509617382, 18446744073709551615, 18446744073709551615, 396, 405, 396, 405, 71, 72, true, "detecting", "detecting"], ["verb", "single-verb", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895564896, 10221689629778338900, 18446744073709551615, 18446744073709551615, 414, 417, 414, 417, 73, 74, true, "are", "are"], ["verb", "single-verb", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 389609625695387621, 2696595208771284580, 18446744073709551615, 18446744073709551615, 587, 591, 587, 591, 118, 119, true, "have", "have"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 8106478685702231057, 12497800819898333301, 18446744073709551615, 18446744073709551615, 439, 446, 439, 446, 77, 79, true, "such as", "such as"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15601168207941442599, 1060530269192402463, 18446744073709551615, 18446744073709551615, 642, 654, 642, 654, 127, 129, true, "available as", "available as"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206565712212855, 12447021742026355211, 18446744073709551615, 18446744073709551615, 30, 36, 30, 36, 6, 8, true, "of the", "of the"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15441160910541486538, 10124025012681555291, 18446744073709551615, 18446744073709551615, 96, 98, 96, 98, 17, 18, true, "in", "in"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15441160910541485670, 10124025032663008686, 18446744073709551615, 18446744073709551615, 119, 121, 119, 121, 21, 22, true, "of", "of"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 389609625618037948, 2698070654931797643, 18446744073709551615, 18446744073709551615, 155, 159, 155, 159, 28, 29, true, "with", "with"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15357604611893232445, 12706581294679878362, 18446744073709551615, 18446744073709551615, 232, 243, 232, 243, 41, 44, true, "in both the", "in both the"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15441160910541487053, 10124024937119636968, 18446744073709551615, 18446744073709551615, 268, 270, 268, 270, 48, 49, true, "as", "as"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206560518651853, 6740683993471155777, 18446744073709551615, 18446744073709551615, 271, 277, 271, 277, 49, 51, true, "in the", "in the"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14814148868025447689, 13785663703275405723, 18446744073709551615, 18446744073709551615, 293, 301, 293, 301, 52, 54, true, "of these", "of these"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 12178341415895625940, 10221692560851504811, 18446744073709551615, 18446744073709551615, 392, 395, 392, 395, 70, 71, true, "for", "for"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15441160910541487702, 10124024927677174728, 18446744073709551615, 18446744073709551615, 567, 569, 567, 569, 113, 114, true, "On", "On"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 14652253381802754387, 7964973155712271593, 18446744073709551615, 18446744073709551615, 681, 689, 681, 689, 132, 134, true, "both for", "both for"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 15441160910541485865, 10124025024834353152, 18446744073709551615, 18446744073709551615, 55, 57, 55, 57, 11, 12, true, "to", "to"], ["conn", "single-conn", 15921044595687116426, "TEXT", "#/texts/46", 1.0, 16381206519425733256, 6892863425976124668, 18446744073709551615, 18446744073709551615, 208, 214, 208, 214, 37, 39, true, "to the", "to the"], ["sentence", "", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 13907813772802190178, 7003946510300018767, 18446744073709551615, 18446744073709551615, 0, 172, 0, 172, 0, 33, true, "In this paper, we will focus only on the detection of table objects, but the same principles described in the following analysis are also applied for other type of objects.", "In this paper, we will focus only on the detection of table objects, but the same principles described in the following analysis are also applied for other type of objects."], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 9383287618729376838, 4119329632487579731, 18446744073709551615, 18446744073709551615, 54, 67, 54, 67, 12, 14, true, "table objects", "table objects"], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 19890884327632451, 8285658877583341965, 18446744073709551615, 18446744073709551615, 77, 92, 77, 92, 17, 19, true, "same principles", "same principles"], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 16919282435020496358, 3695407906295049397, 18446744073709551615, 18446744073709551615, 110, 128, 110, 128, 22, 24, true, "following analysis", "following analysis"], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 14046205808336657429, 12297163905961192123, 18446744073709551615, 18446744073709551615, 150, 160, 150, 160, 28, 30, true, "other type", "other type"], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 329104161668023890, 15034731120442627912, 18446744073709551615, 18446744073709551615, 8, 13, 8, 13, 2, 3, true, "paper", "paper"], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 6180169258394806776, 9258215019222553593, 18446744073709551615, 18446744073709551615, 41, 50, 41, 50, 10, 11, true, "detection", "detection"], ["term", "single-term", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 8106342034010873556, 5785162319445905699, 18446744073709551615, 18446744073709551615, 164, 171, 164, 171, 31, 32, true, "objects", "objects"], ["verb", "compound-verb", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 332064237701512405, 14438730281518682242, 18446744073709551615, 18446744073709551615, 18, 33, 18, 33, 5, 8, true, "will focus only", "will focus only"], ["verb", "compound-verb", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 15596128382219940825, 312116650770694261, 18446744073709551615, 18446744073709551615, 129, 145, 129, 145, 24, 27, true, "are also applied", "are also applied"], ["verb", "single-verb", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 6180169740129371114, 10720671592342542201, 18446744073709551615, 18446744073709551615, 93, 102, 93, 102, 19, 20, true, "described", "described"], ["conn", "single-conn", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 8106396862006371970, 7588613479518108209, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 2, true, "In this", "In this"], ["conn", "single-conn", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 16381206566339127348, 9743263998949637888, 18446744073709551615, 18446744073709551615, 34, 40, 34, 40, 8, 10, true, "on the", "on the"], ["conn", "single-conn", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 15441160910541485670, 16075120093636142673, 18446744073709551615, 18446744073709551615, 51, 53, 51, 53, 11, 12, true, "of", "of"], ["conn", "single-conn", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 16381206560518651853, 10949484449484932256, 18446744073709551615, 18446744073709551615, 103, 109, 103, 109, 20, 22, true, "in the", "in the"], ["conn", "single-conn", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 12178341415895625940, 13938643598911286725, 18446744073709551615, 18446744073709551615, 146, 149, 146, 149, 27, 28, true, "for", "for"], ["conn", "single-conn", 12234068400463628788, "TEXT", "#/texts/47", 1.0, 15441160910541485670, 16075120093636131136, 18446744073709551615, 18446744073709551615, 161, 163, 161, 163, 30, 31, true, "of", "of"], ["numval", "ival", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 329104147714235827, 16237474479634274377, 18446744073709551615, 18446744073709551615, 97, 102, 97, 102, 16, 17, true, "30000", "30000"], ["expression", "wtoken-concatenation", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 6165495739602921837, 9340964570503238985, 18446744073709551615, 18446744073709551615, 66, 77, 66, 77, 11, 12, true, "data^{11}", "data$^{11}$"], ["sentence", "", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 10610641527733228652, 13865291180638734476, 18446744073709551615, 18446744073709551615, 0, 78, 0, 78, 0, 13, true, "The networks available on our platform have been trained on arXiv data$^{11}$.", "The networks available on our platform have been trained on arXiv data$^{11}$."], ["term", "single-term", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 16274809754904284981, 14150942809734201297, 18446744073709551615, 18446744073709551615, 60, 77, 60, 77, 10, 12, true, "arXiv data^{11}", "arXiv data$^{11}$"], ["term", "single-term", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 2903324788977241891, 494345879165015724, 18446744073709551615, 18446744073709551615, 103, 112, 103, 112, 17, 19, true, "PDF pages", "PDF pages"], ["term", "single-term", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 14814151113413570861, 13612463695668145248, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 1, 2, true, "networks", "networks"], ["term", "single-term", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 14814125365076808131, 15422858865778697355, 18446744073709551615, 18446744073709551615, 30, 38, 30, 38, 5, 6, true, "platform", "platform"], ["verb", "compound-verb", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 15334498195961772498, 2558436440940418273, 18446744073709551615, 18446744073709551615, 39, 56, 39, 56, 6, 9, true, "have been trained", "have been trained"], ["verb", "compound-verb", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 9561950597095011783, 3531415358081330379, 18446744073709551615, 18446744073709551615, 82, 96, 82, 96, 14, 16, true, "have annotated", "have annotated"], ["verb", "single-verb", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 389609625632539415, 12849636877128501627, 18446744073709551615, 18446744073709551615, 117, 121, 117, 121, 20, 21, true, "know", "know"], ["conn", "single-conn", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 15601168207941439665, 9881833882850079373, 18446744073709551615, 18446744073709551615, 13, 25, 13, 25, 2, 4, true, "available on", "available on"], ["conn", "single-conn", 4628466594790006384, "TEXT", "#/texts/48", 1.0, 15441160910541485678, 11845235562561279624, 18446744073709551615, 18446744073709551615, 57, 59, 57, 59, 9, 10, true, "on", "on"], ["numval", "ival", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 329104147714235827, 3899762460303934761, 18446744073709551615, 18446744073709551615, 56, 61, 56, 61, 12, 13, true, "30000", "30000"], ["numval", "ival", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 329104147765109382, 3508537341518270119, 18446744073709551615, 18446744073709551615, 82, 87, 82, 87, 18, 19, true, "25000", "25000"], ["numval", "ival", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 389609625655246800, 3852020413771069256, 18446744073709551615, 18446744073709551615, 130, 134, 130, 134, 27, 28, true, "5000", "5000"], ["expression", "word-concatenation", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 10569149591004722800, 16900271628482084512, 18446744073709551615, 18446744073709551615, 225, 242, 225, 242, 48, 49, true, "data-augmentation", "data-augmentation"], ["expression", "word-concatenation", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 3458523808570659318, 12440619335925777434, 18446744073709551615, 18446744073709551615, 285, 301, 285, 301, 56, 57, true, "object-detection", "object-detection"], ["expression", "word-concatenation", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 11170541192325073142, 933495499316463478, 18446744073709551615, 18446744073709551615, 305, 325, 305, 325, 58, 59, true, "image-classification", "image-classification"], ["sentence", "", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 11408825138316374504, 8478950504398234103, 18446744073709551615, 18446744073709551615, 45, 156, 45, 156, 10, 32, true, "From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation.", "From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation."], ["sentence", "", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 12423813338298982980, 11414690006404237207, 18446744073709551615, 18446744073709551615, 157, 337, 157, 337, 32, 61, true, "Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms.", "Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms."], ["term", "enum-term-mark-2", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 6721676886213735149, 343887467640033113, 18446744073709551615, 18446744073709551615, 285, 325, 285, 325, 56, 59, true, "object-detection or image-classification", "object-detection or image-classification"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 7520153641230220879, 15768111495623467099, 18446744073709551615, 18446744073709551615, 97, 110, 97, 110, 21, 23, true, "training data", "training data"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 15907509424283598820, 1309701731943016996, 18446744073709551615, 18446744073709551615, 168, 178, 168, 178, 35, 37, true, "large size", "large size"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 1001363875463467083, 8251004718776725960, 18446744073709551615, 18446744073709551615, 225, 252, 225, 252, 48, 50, true, "data-augmentation technique", "data-augmentation technique"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 16323431510197362598, 15799839381285791422, 18446744073709551615, 18446744073709551615, 305, 336, 305, 336, 58, 60, true, "image-classification algorithms", "image-classification algorithms"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 14639575749168485524, 17067935029815538881, 18446744073709551615, 18446744073709551615, 0, 8, 0, 8, 0, 1, true, "location", "location"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 329104159216638303, 3735971156137506785, 18446744073709551615, 18446744073709551615, 25, 30, 25, 30, 5, 6, true, "table", "table"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 389609625632301461, 3852646011442706184, 18446744073709551615, 18446744073709551615, 39, 43, 39, 43, 8, 9, true, "page", "page"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 329104161667992688, 3729850945376531663, 18446744073709551615, 18446744073709551615, 62, 67, 62, 67, 13, 14, true, "pages", "pages"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 329104161667992688, 3729850945376431434, 18446744073709551615, 18446744073709551615, 88, 93, 88, 93, 19, 20, true, "pages", "pages"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 329104161667992688, 3729850945376541076, 18446744073709551615, 18446744073709551615, 135, 140, 135, 140, 28, 29, true, "pages", "pages"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 5456363662501675139, 15220816455592297792, 18446744073709551615, 18446744073709551615, 145, 155, 145, 155, 30, 31, true, "evaluation", "evaluation"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 8106396676716241904, 14648059457390189617, 18446744073709551615, 18446744073709551615, 186, 193, 186, 193, 39, 40, true, "dataset", "dataset"], ["term", "single-term", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 3458523808570659318, 12440619335925777434, 18446744073709551615, 18446744073709551615, 285, 301, 285, 301, 56, 57, true, "object-detection", "object-detection"], ["verb", "compound-verb", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 6184629322227841334, 13137096673988516295, 18446744073709551615, 18446744073709551615, 72, 81, 72, 81, 16, 18, true, "have used", "have used"], ["verb", "compound-verb", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 6850430008328108593, 5399883786399020567, 18446744073709551615, 18446744073709551615, 198, 220, 198, 220, 42, 47, true, "did not need to employ", "did not need to employ"], ["verb", "compound-verb", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 15603860103506328238, 5837976326483269424, 18446744073709551615, 18446744073709551615, 260, 270, 260, 270, 52, 54, true, "is usually", "is usually"], ["verb", "single-verb", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 389609625632409820, 3852621341384686815, 18446744073709551615, 18446744073709551615, 115, 119, 115, 119, 24, 25, true, "kept", "kept"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 6514618750987489747, 8397355782128073403, 18446744073709551615, 18446744073709551615, 271, 284, 271, 284, 54, 56, true, "necessary for", "necessary for"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 15441160910541485670, 288606476502796116, 18446744073709551615, 18446744073709551615, 9, 11, 9, 11, 1, 2, true, "of", "of"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 15441160910541487054, 288595831133479933, 18446744073709551615, 18446744073709551615, 12, 14, 12, 14, 2, 3, true, "at", "at"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 8106342614185119603, 5315721009696706391, 18446744073709551615, 18446744073709551615, 31, 38, 31, 38, 6, 8, true, "on each", "on each"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 15033932706303876615, 13736182680378928256, 18446744073709551615, 18446744073709551615, 45, 55, 45, 55, 10, 12, true, "From these", "From these"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 15441160910541487053, 288595830704355564, 18446744073709551615, 18446744073709551615, 94, 96, 94, 96, 20, 21, true, "as", "as"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 12178341415895625940, 2076970598603837899, 18446744073709551615, 18446744073709551615, 141, 144, 141, 144, 29, 30, true, "for", "for"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 16381206565712212855, 7145463495673152577, 18446744073709551615, 18446744073709551615, 179, 185, 179, 185, 37, 39, true, "of the", "of the"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 16381206519425733256, 5276673569012284605, 18446744073709551615, 18446744073709551615, 161, 167, 161, 167, 33, 35, true, "to the", "to the"], ["conn", "single-conn", 9651706913678711778, "TEXT", "#/texts/49", 1.0, 15441160910541485865, 288606447891747161, 18446744073709551615, 18446744073709551615, 211, 213, 211, 213, 45, 46, true, "to", "to"], ["numval", "ival", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 17767354399704235157, 12945931063214660534, 18446744073709551615, 18446744073709551615, 370, 371, 370, 371, 70, 71, true, "5", "5"], ["numval", "ival", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 17767354399704235157, 12945931063214542735, 18446744073709551615, 18446744073709551615, 579, 580, 579, 580, 110, 111, true, "5", "5"], ["expression", "wtoken-concatenation", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206533950151485, 9489822756853105574, 18446744073709551615, 18446744073709551615, 451, 457, 451, 457, 86, 87, true, "YOLOv2", "YOLOv2"], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 4061378656158338373, 14465990865377588223, 18446744073709551615, 18446744073709551615, 0, 151, 0, 151, 0, 30, true, "We do not locate the table directly on the image of the original PDF page but rather on an image representation of the parsed PDF page with cell boxes.", "We do not locate the table directly on the image of the original PDF page but rather on an image representation of the parsed PDF page with cell boxes."], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 7230305818063790009, 5970193631513633625, 18446744073709551615, 18446744073709551615, 152, 319, 152, 319, 30, 59, true, "The reasoning behind this is to reduce the variability between all input PDF pages as much as possible and thus increase the effectiveness of the deep neural networks.", "The reasoning behind this is to reduce the variability between all input PDF pages as much as possible and thus increase the effectiveness of the deep neural networks."], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 17664701293225181585, 4146648270552361528, 18446744073709551615, 18446744073709551615, 320, 372, 320, 372, 59, 72, true, "An example of such an image can be seen in Figure 5.", "An example of such an image can be seen in Figure 5."], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 5465327582624645132, 9551200007057793983, 18446744073709551615, 18446744073709551615, 373, 514, 373, 514, 72, 99, true, "The red bounding boxes around the tables are a result of the prediction using YOLOv2 and are absent in the image on which the model predicts.", "The red bounding boxes around the tables are a result of the prediction using YOLOv2 and are absent in the image on which the model predicts."], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 7667365579028617388, 5332282821916987263, 18446744073709551615, 18446744073709551615, 515, 669, 515, 669, 99, 127, true, "Note that the visualisation of the text cells visible in Figure 5 does not include any text of the original document, but only its geometrical definition.", "Note that the visualisation of the text cells visible in Figure 5 does not include any text of the original document, but only its geometrical definition."], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 10642870011809358022, 16157254256485996574, 18446744073709551615, 18446744073709551615, 670, 828, 670, 828, 127, 152, true, "This is important when one compares for example Asian documents with Japanese, Chinese or Korean characters versus European languages with the roman alphabet.", "This is important when one compares for example Asian documents with Japanese, Chinese or Korean characters versus European languages with the roman alphabet."], ["sentence", "", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 986317930482970848, 7349070355092674215, 18446744073709551615, 18446744073709551615, 829, 955, 829, 955, 152, 179, true, "We do not want the deep neural network to focus on the specific characters, but rather on the layout of the cells in the page.", "We do not want the deep neural network to focus on the specific characters, but rather on the layout of the cells in the page."], ["term", "enum-term-mark-1", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 10919039264205031078, 14544387719036714097, 18446744073709551615, 18446744073709551615, 739, 777, 739, 777, 138, 144, true, "Japanese, Chinese or Korean characters", "Japanese, Chinese or Korean characters"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 11734732391183296006, 2587291782923000164, 18446744073709551615, 18446744073709551615, 56, 73, 56, 73, 12, 15, true, "original PDF page", "original PDF page"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15093911763430035033, 3694205018907190846, 18446744073709551615, 18446744073709551615, 91, 111, 91, 111, 19, 21, true, "image representation", "image representation"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14650937348812924036, 5539748062603626443, 18446744073709551615, 18446744073709551615, 126, 134, 126, 134, 24, 26, true, "PDF page", "PDF page"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 2707481668736505496, 7198690923753170302, 18446744073709551615, 18446744073709551615, 140, 150, 140, 150, 27, 29, true, "cell boxes", "cell boxes"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 13885091818292120189, 16117092993028309, 18446744073709551615, 18446744073709551615, 219, 234, 219, 234, 41, 44, true, "input PDF pages", "input PDF pages"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 17376821969571444655, 15750834930012037640, 18446744073709551615, 18446744073709551615, 298, 318, 298, 318, 55, 58, true, "deep neural networks", "deep neural networks"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 17072853444362853273, 16262656258750397787, 18446744073709551615, 18446744073709551615, 377, 395, 377, 395, 73, 76, true, "red bounding boxes", "red bounding boxes"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 5748925367544727060, 8711755993137804135, 18446744073709551615, 18446744073709551615, 550, 560, 550, 560, 105, 107, true, "text cells", "text cells"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 11738704476441755021, 599225668707656883, 18446744073709551615, 18446744073709551615, 614, 631, 614, 631, 118, 120, true, "original document", "original document"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 10286550524249249390, 16039764385894237573, 18446744073709551615, 18446744073709551615, 646, 668, 646, 668, 124, 126, true, "geometrical definition", "geometrical definition"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 9867266577668942964, 9457468283515724221, 18446744073709551615, 18446744073709551615, 710, 733, 710, 733, 134, 137, true, "example Asian documents", "example Asian documents"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 10390141656432228254, 10147168339704319013, 18446744073709551615, 18446744073709551615, 760, 777, 760, 777, 142, 144, true, "Korean characters", "Korean characters"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 12210549037008273093, 7960050686572576759, 18446744073709551615, 18446744073709551615, 785, 803, 785, 803, 145, 147, true, "European languages", "European languages"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 9514268965649159808, 10277151526855785007, 18446744073709551615, 18446744073709551615, 813, 827, 813, 827, 149, 151, true, "roman alphabet", "roman alphabet"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 692814466722212625, 12089462010086874276, 18446744073709551615, 18446744073709551615, 848, 867, 848, 867, 157, 160, true, "deep neural network", "deep neural network"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 13643956046017087081, 16700714172171101819, 18446744073709551615, 18446744073709551615, 884, 903, 884, 903, 164, 166, true, "specific characters", "specific characters"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104159216638303, 14652104812982082451, 18446744073709551615, 18446744073709551615, 21, 26, 21, 26, 5, 6, true, "table", "table"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161828335551, 9390410964741037597, 18446744073709551615, 18446744073709551615, 43, 48, 43, 48, 9, 10, true, "image", "image"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 6165973192311129130, 13673636908541982518, 18446744073709551615, 18446744073709551615, 156, 165, 156, 165, 31, 32, true, "reasoning", "reasoning"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 11600564911974996302, 11742311844381609874, 18446744073709551615, 18446744073709551615, 195, 206, 195, 206, 38, 39, true, "variability", "variability"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 10219753174915530122, 1883418763479216285, 18446744073709551615, 18446744073709551615, 277, 290, 277, 290, 52, 53, true, "effectiveness", "effectiveness"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 8106397496085150773, 4842581059053952340, 18446744073709551615, 18446744073709551615, 323, 330, 323, 330, 60, 61, true, "example", "example"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161828335551, 9390410964741015440, 18446744073709551615, 18446744073709551615, 342, 347, 342, 347, 64, 65, true, "image", "image"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206514091025767, 14972195604838550183, 18446744073709551615, 18446744073709551615, 363, 369, 363, 369, 69, 70, true, "Figure", "Figure"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206513098478539, 9453181643734708057, 18446744073709551615, 18446744073709551615, 407, 413, 407, 413, 78, 79, true, "tables", "tables"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206521509536706, 9304998689309080594, 18446744073709551615, 18446744073709551615, 420, 426, 420, 426, 81, 82, true, "result", "result"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14103651237077221583, 692779663451117535, 18446744073709551615, 18446744073709551615, 434, 444, 434, 444, 84, 85, true, "prediction", "prediction"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206533950151485, 9489822756853105574, 18446744073709551615, 18446744073709551615, 451, 457, 451, 457, 86, 87, true, "YOLOv2", "YOLOv2"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161828335551, 9390410964741025354, 18446744073709551615, 18446744073709551615, 480, 485, 480, 485, 92, 93, true, "image", "image"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161610777240, 9390567938245318871, 18446744073709551615, 18446744073709551615, 499, 504, 499, 504, 96, 97, true, "model", "model"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 5688100034928622351, 14997538649878118765, 18446744073709551615, 18446744073709551615, 529, 542, 529, 542, 102, 103, true, "visualisation", "visualisation"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206514091025767, 14972195604838595929, 18446744073709551615, 18446744073709551615, 572, 578, 572, 578, 109, 110, true, "Figure", "Figure"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 389609625631325904, 11992223016262407561, 18446744073709551615, 18446744073709551615, 602, 606, 602, 606, 115, 116, true, "text", "text"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 12178341415895621781, 15944948045218351689, 18446744073709551615, 18446744073709551615, 693, 696, 693, 696, 131, 132, true, "one", "one"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206590620761857, 12848968517137340397, 18446744073709551615, 18446744073709551615, 923, 929, 923, 929, 171, 172, true, "layout", "layout"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161531686411, 9388401245790561328, 18446744073709551615, 18446744073709551615, 937, 942, 937, 942, 174, 175, true, "cells", "cells"], ["term", "single-term", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 389609625632301461, 11992203554903389909, 18446744073709551615, 18446744073709551615, 950, 954, 950, 954, 177, 178, true, "page", "page"], ["verb", "compound-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 9093868816922383778, 10658048985165887467, 18446744073709551615, 18446744073709551615, 3, 16, 3, 16, 1, 4, true, "do not locate", "do not locate"], ["verb", "compound-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 6623117478668938130, 11887640081287291311, 18446744073709551615, 18446744073709551615, 178, 190, 178, 190, 34, 37, true, "is to reduce", "is to reduce"], ["verb", "compound-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14892762836247367071, 16988368498155692642, 18446744073709551615, 18446744073709551615, 348, 359, 348, 359, 65, 68, true, "can be seen", "can be seen"], ["verb", "compound-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 13754841728266722569, 5786336844458416623, 18446744073709551615, 18446744073709551615, 581, 597, 581, 597, 111, 114, true, "does not include", "does not include"], ["verb", "compound-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 2266019913458277733, 12639741498410603017, 18446744073709551615, 18446744073709551615, 832, 843, 832, 843, 153, 156, true, "do not want", "do not want"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206517379850387, 14955464588357672805, 18446744073709551615, 18446744073709551615, 119, 125, 119, 125, 23, 24, true, "parsed", "parsed"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14637951609302951605, 17658725300768464798, 18446744073709551615, 18446744073709551615, 264, 272, 264, 272, 50, 51, true, "increase", "increase"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 12178341415895564896, 15959393995125623708, 18446744073709551615, 18446744073709551615, 414, 417, 414, 417, 79, 80, true, "are", "are"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104159157798023, 14628335270365657217, 18446744073709551615, 18446744073709551615, 445, 450, 445, 450, 85, 86, true, "using", "using"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 12178341415895564896, 15959393995125627936, 18446744073709551615, 18446744073709551615, 462, 465, 462, 465, 88, 89, true, "are", "are"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14814125862003597070, 852547638956416530, 18446744073709551615, 18446744073709551615, 505, 513, 505, 513, 97, 98, true, "predicts", "predicts"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 389609625695584167, 11991047750942547462, 18446744073709551615, 18446744073709551615, 515, 519, 515, 519, 99, 100, true, "Note", "Note"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15441160910541486535, 16635202711086327418, 18446744073709551615, 18446744073709551615, 675, 677, 675, 677, 128, 129, true, "is", "is"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14652282388581682239, 13937110036866613314, 18446744073709551615, 18446744073709551615, 697, 705, 697, 705, 132, 133, true, "compares", "compares"], ["verb", "single-verb", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161710991423, 9388791396474858101, 18446744073709551615, 18446744073709551615, 871, 876, 871, 876, 161, 162, true, "focus", "focus"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 8106464529736241562, 11507614992345422349, 18446744073709551615, 18446744073709551615, 238, 245, 238, 245, 45, 47, true, "much as", "much as"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 5950055285115702077, 1625228371522495478, 18446744073709551615, 18446744073709551615, 466, 475, 466, 475, 89, 91, true, "absent in", "absent in"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 4711116093400357657, 13061186436309055298, 18446744073709551615, 18446744073709551615, 561, 571, 561, 571, 107, 109, true, "visible in", "visible in"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206566339127348, 18133865400670459150, 18446744073709551615, 18446744073709551615, 36, 42, 36, 42, 7, 9, true, "on the", "on the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169631358, 18446744073709551615, 18446744073709551615, 49, 55, 49, 55, 10, 12, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 329104161572724641, 9391892882871994485, 18446744073709551615, 18446744073709551615, 85, 90, 85, 90, 17, 19, true, "on an", "on an"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169619016, 18446744073709551615, 18446744073709551615, 112, 118, 112, 118, 21, 23, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 389609625618037948, 11995818646377557360, 18446744073709551615, 18446744073709551615, 135, 139, 135, 139, 26, 27, true, "with", "with"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 13689038747610583945, 10114373553176097977, 18446744073709551615, 18446744073709551615, 166, 177, 166, 177, 32, 34, true, "behind this", "behind this"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 2011002864324373888, 658421949741848157, 18446744073709551615, 18446744073709551615, 207, 218, 207, 218, 39, 41, true, "between all", "between all"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169625865, 18446744073709551615, 18446744073709551615, 291, 297, 291, 297, 53, 55, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15441160910541485670, 16635202722366690068, 18446744073709551615, 18446744073709551615, 331, 333, 331, 333, 61, 62, true, "of", "of"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15441160910541486538, 16635202713083707741, 18446744073709551615, 18446744073709551615, 360, 362, 360, 362, 68, 69, true, "in", "in"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15388840276945242407, 16541083529619122756, 18446744073709551615, 18446744073709551615, 396, 406, 396, 406, 76, 78, true, "around the", "around the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169622547, 18446744073709551615, 18446744073709551615, 427, 433, 427, 433, 82, 84, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15441160910541485678, 16635202722513330465, 18446744073709551615, 18446744073709551615, 486, 488, 486, 488, 93, 94, true, "on", "on"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14634130761162415388, 3116502908862951252, 18446744073709551615, 18446744073709551615, 520, 528, 520, 528, 100, 102, true, "that the", "that the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169642174, 18446744073709551615, 18446744073709551615, 543, 549, 543, 549, 103, 105, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169466222, 18446744073709551615, 18446744073709551615, 607, 613, 607, 613, 116, 118, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 12178341415895625940, 15944948137295589714, 18446744073709551615, 18446744073709551615, 706, 709, 706, 709, 133, 134, true, "for", "for"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 389609625618037948, 11995818646377545406, 18446744073709551615, 18446744073709551615, 734, 738, 734, 738, 137, 138, true, "with", "with"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206519567123880, 14860984398233775563, 18446744073709551615, 18446744073709551615, 778, 784, 778, 784, 144, 145, true, "versus", "versus"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 14638857868319795209, 10777935728644887730, 18446744073709551615, 18446744073709551615, 804, 812, 804, 812, 147, 149, true, "with the", "with the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206566339127348, 18133865400670791260, 18446744073709551615, 18446744073709551615, 877, 883, 877, 883, 162, 164, true, "on the", "on the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206566339127348, 18133865400670794962, 18446744073709551615, 18446744073709551615, 916, 922, 916, 922, 169, 171, true, "on the", "on the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206565712212855, 18137745878169470569, 18446744073709551615, 18446744073709551615, 930, 936, 930, 936, 172, 174, true, "of the", "of the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 16381206560518651853, 2434079979099920259, 18446744073709551615, 18446744073709551615, 943, 949, 943, 949, 175, 177, true, "in the", "in the"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15441160910541485865, 16635202727199936440, 18446744073709551615, 18446744073709551615, 181, 183, 181, 183, 35, 36, true, "to", "to"], ["conn", "single-conn", 1363251178266051349, "TEXT", "#/texts/50", 1.0, 15441160910541485865, 16635202727200087940, 18446744073709551615, 18446744073709551615, 868, 870, 868, 870, 160, 161, true, "to", "to"], ["numval", "ival", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 17767354399704235161, 12733743888687180225, 18446744073709551615, 18446744073709551615, 93, 94, 93, 94, 16, 17, true, "1", "1"], ["numval", "ival", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415896426714, 4652804192217870476, 18446744073709551615, 18446744073709551615, 291, 294, 291, 294, 53, 54, true, "100", "100"], ["numval", "ival", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104147765109382, 8033726402022826926, 18446744073709551615, 18446744073709551615, 312, 317, 312, 317, 58, 59, true, "25000", "25000"], ["numval", "ival", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415896426714, 4652804192217923506, 18446744073709551615, 18446744073709551615, 354, 357, 354, 357, 66, 67, true, "100", "100"], ["numval", "ival", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 17767354399704235152, 12733743887789901018, 18446744073709551615, 18446744073709551615, 508, 509, 508, 509, 91, 92, true, "8", "8"], ["numval", "ival", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541481982, 2599356122854466353, 18446744073709551615, 18446744073709551615, 894, 896, 892, 894, 164, 165, true, "10", "10"], ["parenthesis", "round brackets", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 10542145200230278619, 11521937276206628775, 18446744073709551615, 18446744073709551615, 889, 912, 889, 910, 162, 171, true, "(\u2248 10 pages/sec/node)", "(\u2248 10 pages/sec/node)"], ["expression", "common", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541486545, 2599358878961543341, 18446744073709551615, 18446744073709551615, 303, 307, 303, 307, 56, 57, true, "ie", "i.e."], ["expression", "word-concatenation", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 6285955549867796622, 12901492066051428715, 18446744073709551615, 18446744073709551615, 108, 124, 108, 124, 21, 22, true, "time-to-solution", "time-to-solution"], ["expression", "word-concatenation", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15656590191683919916, 3502038016915722737, 18446744073709551615, 18446744073709551615, 385, 398, 385, 398, 73, 74, true, "out-ofthe-box", "out-ofthe-box"], ["expression", "word-concatenation", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104162326555074, 12378649640990487310, 18446744073709551615, 18446744073709551615, 406, 411, 406, 411, 75, 76, true, "R-CNN", "R-CNN"], ["expression", "word-concatenation", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 6285955549867796622, 12901492066051459793, 18446744073709551615, 18446744073709551615, 651, 667, 651, 667, 119, 120, true, "time-to-solution", "time-to-solution"], ["expression", "wtoken-concatenation", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206533950151485, 7463375822213972642, 18446744073709551615, 18446744073709551615, 493, 499, 493, 499, 89, 90, true, "YOLOv2", "YOLOv2"], ["expression", "wtoken-concatenation", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206533950151485, 7463375822214056128, 18446744073709551615, 18446744073709551615, 787, 793, 787, 793, 145, 146, true, "YOLOv2", "YOLOv2"], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 11214795667451364706, 15381220353542038442, 18446744073709551615, 18446744073709551615, 0, 83, 0, 83, 0, 14, true, "Let us now discuss both deep neural network training microservices on the platform.", "Let us now discuss both deep neural network training microservices on the platform."], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 17449560956934989976, 12526021364899620960, 18446744073709551615, 18446744073709551615, 84, 227, 84, 227, 14, 41, true, "In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision.", "In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision."], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 13058222401901188325, 14090621328054154871, 18446744073709551615, 18446744073709551615, 228, 364, 228, 364, 41, 69, true, "In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times.", "In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times."], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16675190523738339061, 7202929718160933759, 18446744073709551615, 18446744073709551615, 365, 587, 365, 587, 69, 107, true, "We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied.", "We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied."], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 10235041227958384786, 9628423971346406996, 18446744073709551615, 18446744073709551615, 588, 691, 588, 691, 107, 125, true, "We believe that this is the main origin for the discrepancy of time-to-solution for the training phase.", "We believe that this is the main origin for the discrepancy of time-to-solution for the training phase."], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 11909429825414533491, 7916582600131240808, 18446744073709551615, 18446744073709551615, 692, 731, 692, 731, 125, 133, true, "The same holds true for the prediction.", "The same holds true for the prediction."], ["sentence", "", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 7447987213947934224, 363147361352019607, 18446744073709551615, 18446744073709551615, 732, 913, 732, 911, 133, 172, true, "Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", "Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node)."], ["term", "enum-term-mark-2", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 11037453576911667853, 14703723871622436608, 18446744073709551615, 18446744073709551615, 206, 226, 206, 226, 37, 40, true, "recall and precision", "recall and precision"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 13848731310568719727, 15095939915134652393, 18446744073709551615, 18446744073709551615, 24, 66, 24, 66, 5, 10, true, "deep neural network training microservices", "deep neural network training microservices"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 1353284443403550494, 17158735888603064564, 18446744073709551615, 18446744073709551615, 155, 166, 155, 166, 27, 29, true, "single page", "single page"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12141441254112579393, 8271858979549873106, 18446744073709551615, 18446744073709551615, 235, 249, 235, 249, 43, 45, true, "training phase", "training phase"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 18169256434676190331, 11634553033353850813, 18446744073709551615, 18446744073709551615, 318, 329, 318, 329, 59, 61, true, "page images", "page images"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 1151653930094198889, 6279210758650536115, 18446744073709551615, 18446744073709551615, 385, 411, 385, 411, 73, 76, true, "out-ofthe-box Faster R-CNN", "out-ofthe-box Faster R-CNN"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12141441254112579393, 8271858979549955993, 18446744073709551615, 18446744073709551615, 471, 485, 471, 485, 85, 87, true, "training phase", "training phase"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 2503288761659507641, 9743919505994936922, 18446744073709551615, 18446744073709551615, 493, 507, 493, 507, 89, 91, true, "YOLOv2 batches", "YOLOv2 batches"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16269569307198368878, 14888617347479270783, 18446744073709551615, 18446744073709551615, 616, 627, 616, 627, 113, 115, true, "main origin", "main origin"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12141441254112579393, 8271858979549787104, 18446744073709551615, 18446744073709551615, 676, 690, 676, 690, 122, 124, true, "training phase", "training phase"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 4237078182846444452, 7428907322213125011, 18446744073709551615, 18446744073709551615, 787, 806, 787, 806, 145, 147, true, "YOLOv2 architecture", "YOLOv2 architecture"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14814125365076808131, 10453527503990612347, 18446744073709551615, 18446744073709551615, 74, 82, 74, 82, 12, 13, true, "platform", "platform"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 6285955549867796622, 12901492066051428715, 18446744073709551615, 18446744073709551615, 108, 124, 108, 124, 21, 22, true, "time-to-solution", "time-to-solution"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14634153919632515335, 365322755488345032, 18446744073709551615, 18446744073709551615, 129, 137, 129, 137, 23, 24, true, "training", "training"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 5731695876385560379, 1758035992340926235, 18446744073709551615, 18446744073709551615, 182, 193, 182, 193, 33, 34, true, "performance", "performance"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104159246284497, 8646809584775625185, 18446744073709551615, 18446744073709551615, 197, 202, 197, 202, 35, 36, true, "terms", "terms"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206521531485437, 11024740562177031234, 18446744073709551615, 18446744073709551615, 206, 212, 206, 212, 37, 38, true, "recall", "recall"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 6184954595655792282, 2740680839011190488, 18446744073709551615, 18446744073709551615, 217, 226, 217, 226, 39, 40, true, "precision", "precision"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15359670209433732834, 11505488180295702106, 18446744073709551615, 18446744073709551615, 271, 281, 271, 281, 50, 51, true, "algorithms", "algorithms"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206565270919865, 7578403846550666862, 18446744073709551615, 18446744073709551615, 295, 301, 295, 301, 54, 55, true, "epochs", "epochs"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8106342689863369930, 11135817727321581998, 18446744073709551615, 18446744073709551615, 346, 353, 346, 353, 65, 66, true, "network", "network"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104159219994925, 8640251348534211245, 18446744073709551615, 18446744073709551615, 358, 363, 358, 363, 67, 68, true, "times", "times"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 2455254482033220466, 11766388440552122471, 18446744073709551615, 18446744073709551615, 417, 427, 417, 427, 77, 78, true, "Tensorflow", "Tensorflow"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14652257119591248677, 16033503133782517052, 18446744073709551615, 18446744073709551615, 451, 459, 451, 459, 82, 83, true, "batching", "batching"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206560620045048, 7774432132927566429, 18446744073709551615, 18446744073709551615, 510, 516, 510, 516, 92, 93, true, "images", "images"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 389609625631241985, 11701890325058806343, 18446744073709551615, 18446744073709551615, 522, 526, 522, 526, 95, 96, true, "time", "time"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206519429140242, 7379520217990130218, 18446744073709551615, 18446744073709551615, 528, 534, 528, 534, 97, 98, true, "thanks", "thanks"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104161828335551, 12350292282878253456, 18446744073709551615, 18446744073709551615, 541, 546, 541, 546, 100, 101, true, "image", "image"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 1478855739373258073, 16768663803468661998, 18446744073709551615, 18446744073709551615, 636, 647, 636, 647, 117, 118, true, "discrepancy", "discrepancy"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 6285955549867796622, 12901492066051459793, 18446744073709551615, 18446744073709551615, 651, 667, 651, 667, 119, 120, true, "time-to-solution", "time-to-solution"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14103651237077221583, 1262912962491166125, 18446744073709551615, 18446744073709551615, 720, 730, 720, 730, 131, 132, true, "prediction", "prediction"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104161594416377, 12352174572142722555, 18446744073709551615, 18446744073709551615, 752, 757, 752, 757, 137, 138, true, "point", "point"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 389609625619349298, 11674445135708463101, 18446744073709551615, 18446744073709551615, 761, 765, 761, 765, 139, 140, true, "view", "view"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14814125365076808131, 10453527503990666008, 18446744073709551615, 18446744073709551615, 773, 781, 773, 781, 142, 143, true, "platform", "platform"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 5300910362436626583, 8416985596353960814, 18446744073709551615, 18446744073709551615, 831, 841, 831, 841, 151, 152, true, "deployment", "deployment"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 5748881733723959229, 774140163382881369, 18446744073709551615, 18446744073709551615, 878, 888, 878, 888, 161, 162, true, "throughput", "throughput"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104161667992688, 11860182035845045407, 18446744073709551615, 18446744073709551615, 897, 902, 895, 900, 165, 166, true, "pages", "pages"], ["term", "single-term", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415895638619, 4652781528079557311, 18446744073709551615, 18446744073709551615, 903, 906, 901, 904, 167, 168, true, "sec", "sec"], ["verb", "compound-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8526860058636487735, 15955870111469140752, 18446744073709551615, 18446744073709551615, 330, 341, 330, 341, 61, 64, true, "were fed to", "were fed to"], ["verb", "compound-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 436128332273723128, 12647681645588449593, 18446744073709551615, 18446744073709551615, 428, 446, 428, 446, 78, 81, true, "does not implement", "does not implement"], ["verb", "compound-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 2778023241922598008, 5238034027547162597, 18446744073709551615, 18446744073709551615, 562, 586, 562, 586, 103, 106, true, "is automatically applied", "is automatically applied"], ["verb", "compound-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 18110906195041757747, 18325478196446152715, 18446744073709551615, 18446744073709551615, 807, 826, 807, 826, 147, 150, true, "seems better suited", "seems better suited"], ["verb", "compound-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 2039260297159993470, 11990526724975601040, 18446744073709551615, 18446744073709551615, 849, 863, 849, 863, 155, 158, true, "allows to have", "allows to have"], ["verb", "compound-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104147695762436, 8034268094721023599, 18446744073709551615, 18446744073709551615, 906, 911, 904, 909, 168, 170, true, "/node", "/node"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415896275389, 4652821010771256286, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "Let", "Let"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8106397868479560363, 5980952610294528544, 18446744073709551615, 18446744073709551615, 11, 18, 11, 18, 3, 4, true, "discuss", "discuss"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 389609625741152123, 11698558665309690548, 18446744073709551615, 18446744073709551615, 99, 103, 99, 103, 19, 20, true, "show", "show"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14103651237077222912, 1262912573528208063, 18446744073709551615, 18446744073709551615, 142, 152, 142, 152, 25, 26, true, "predicting", "predicting"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206564578053366, 7676681725158730412, 18446744073709551615, 18446744073709551615, 254, 260, 254, 260, 47, 48, true, "ensure", "ensure"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415895649364, 4652781883350111182, 18446744073709551615, 18446744073709551615, 282, 285, 282, 285, 51, 52, true, "ran", "ran"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541486545, 2599358878961543341, 18446744073709551615, 18446744073709551615, 303, 307, 303, 307, 56, 57, true, "ie", "i.e."], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8106342033696543838, 10720166011679309151, 18446744073709551615, 18446744073709551615, 368, 375, 368, 375, 70, 71, true, "observe", "observe"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14634109260174176887, 3059970276159290973, 18446744073709551615, 18446744073709551615, 547, 555, 547, 555, 101, 102, true, "resizing", "resizing"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8106397860663428876, 2379893300042418437, 18446744073709551615, 18446744073709551615, 591, 598, 591, 598, 108, 109, true, "believe", "believe"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541486535, 2599358878751709903, 18446744073709551615, 18446744073709551615, 609, 611, 609, 611, 111, 112, true, "is", "is"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104161533598953, 11928511646589428500, 18446744073709551615, 18446744073709551615, 701, 706, 701, 706, 127, 128, true, "holds", "holds"], ["verb", "single-verb", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 17767354399704339168, 12733722225655458138, 18446744073709551615, 18446744073709551615, 890, 893, 890, 891, 163, 164, true, "\u2248", "\u2248"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14634153888224917429, 9004783391296823986, 18446744073709551615, 18446744073709551615, 707, 715, 707, 715, 128, 130, true, "true for", "true for"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206566339127348, 7523956295610612753, 18446744073709551615, 18446744073709551615, 67, 73, 67, 73, 10, 12, true, "on the", "on the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541480354, 2599356225275492892, 18446744073709551615, 18446744073709551615, 84, 86, 84, 86, 14, 15, true, "In", "In"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415895625940, 4653059449996398372, 18446744073709551615, 18446744073709551615, 125, 128, 125, 128, 22, 23, true, "for", "for"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206568455155979, 8062169836442615762, 18446744073709551615, 18446744073709551615, 175, 181, 175, 181, 31, 33, true, "as the", "as the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541486538, 2599358879133688732, 18446744073709551615, 18446744073709551615, 194, 196, 194, 196, 34, 35, true, "in", "in"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541485670, 2599358870315263905, 18446744073709551615, 18446744073709551615, 203, 205, 203, 205, 36, 37, true, "of", "of"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16380809977974811061, 11732651135400697626, 18446744073709551615, 18446744073709551615, 228, 234, 228, 234, 41, 43, true, "In the", "In the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 3504047303032829403, 14383519537824238604, 18446744073709551615, 18446744073709551615, 261, 270, 261, 270, 48, 50, true, "that both", "that both"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14634130761162415388, 10901511361886185107, 18446744073709551615, 18446744073709551615, 376, 384, 376, 384, 71, 73, true, "that the", "that the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 389609625697843734, 11702137981936100184, 18446744073709551615, 18446744073709551615, 412, 416, 412, 416, 76, 77, true, "from", "from"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 2511937742856062086, 2355253536228937084, 18446744073709551615, 18446744073709551615, 460, 470, 460, 470, 83, 85, true, "during the", "during the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104161580427521, 12357508218241612915, 18446744073709551615, 18446744073709551615, 487, 492, 487, 492, 88, 89, true, "while", "while"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 389609625700792947, 11701923673037716898, 18446744073709551615, 18446744073709551615, 517, 521, 517, 521, 93, 95, true, "at a", "at a"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 3504047303127782210, 14386938221778026486, 18446744073709551615, 18446744073709551615, 599, 608, 599, 608, 109, 111, true, "that this", "that this"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8106397727991264470, 4625930078648415204, 18446744073709551615, 18446744073709551615, 628, 635, 628, 635, 115, 117, true, "for the", "for the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541485670, 2599358870315233503, 18446744073709551615, 18446744073709551615, 648, 650, 648, 650, 118, 119, true, "of", "of"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 8106397727991264470, 4625930078648412606, 18446744073709551615, 18446744073709551615, 668, 675, 668, 675, 120, 122, true, "for the", "for the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 14637917359887717745, 11341143089950838331, 18446744073709551615, 18446744073709551615, 743, 751, 743, 751, 135, 137, true, "from the", "from the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541485670, 2599358870315209500, 18446744073709551615, 18446744073709551615, 758, 760, 758, 760, 138, 139, true, "of", "of"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206565712212855, 7825456364758516667, 18446744073709551615, 18446744073709551615, 766, 772, 766, 772, 140, 142, true, "of the", "of the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 12178341415895625940, 4653059449996278256, 18446744073709551615, 18446744073709551615, 827, 830, 827, 830, 150, 151, true, "for", "for"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541487053, 2599358845406797182, 18446744073709551615, 18446744073709551615, 843, 845, 843, 845, 153, 154, true, "as", "as"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 16381206519425733256, 7379223398534589543, 18446744073709551615, 18446744073709551615, 339, 345, 339, 345, 63, 65, true, "to the", "to the"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 329104159243175056, 8638673086732548345, 18446744073709551615, 18446744073709551615, 535, 540, 535, 540, 98, 100, true, "to an", "to an"], ["conn", "single-conn", 18259197018396996238, "TEXT", "#/texts/51", 1.0, 15441160910541485865, 2599358851656141726, 18446744073709551615, 18446744073709551615, 856, 858, 856, 858, 156, 157, true, "to", "to"], ["numval", "ival", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 17767354399704235160, 8675424045619207091, 18446744073709551615, 18446744073709551615, 231, 232, 231, 232, 35, 36, true, "0", "0"], ["numval", "ival", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 17767354399704235161, 8675424045602759514, 18446744073709551615, 18446744073709551615, 237, 238, 237, 238, 37, 38, true, "1", "1"], ["name", "name-concatenation", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 2906527799746313192, 7162268361091325108, 18446744073709551615, 18446744073709551615, 354, 363, 354, 363, 61, 64, true, "Not-Table", "Not-Table"], ["expression", "word-concatenation", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 3002943871017471876, 6314608314970297277, 18446744073709551615, 18446744073709551615, 49, 63, 49, 63, 9, 10, true, "pre-processing", "pre-processing"], ["expression", "word-concatenation", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 3458523808570659318, 9975991896240937817, 18446744073709551615, 18446744073709551615, 141, 157, 141, 157, 22, 23, true, "object-detection", "object-detection"], ["sentence", "", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 7429795002768371766, 12580216355924388710, 18446744073709551615, 18446744073709551615, 0, 136, 0, 136, 0, 21, true, "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously.", "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously."], ["sentence", "", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 16291040095568243120, 1594236025068685140, 18446744073709551615, 18446744073709551615, 137, 239, 137, 239, 21, 39, true, "The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1.", "The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1."], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 4471200074237295914, 1456466697102274833, 18446744073709551615, 18446744073709551615, 8, 28, 8, 28, 2, 4, true, "performance analysis", "performance analysis"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 4048925549312111393, 15542194947650577050, 18446744073709551615, 18446744073709551615, 49, 69, 49, 69, 9, 11, true, "pre-processing stage", "pre-processing stage"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 15479850329146856745, 787461524154987429, 18446744073709551615, 18446744073709551615, 141, 166, 141, 166, 22, 24, true, "object-detection networks", "object-detection networks"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 4874473477449861741, 3504061852580538950, 18446744073709551615, 18446744073709551615, 206, 222, 206, 222, 32, 34, true, "confidence level", "confidence level"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 5568743441709168075, 13244961468904706800, 18446744073709551615, 18446744073709551615, 322, 337, 322, 337, 56, 58, true, "particular case", "particular case"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 8106464574171450434, 15318495777273702751, 18446744073709551615, 18446744073709551615, 107, 114, 107, 114, 17, 18, true, "metrics", "metrics"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 12178341415895638602, 6222934568051327791, 18446744073709551615, 18446744073709551615, 177, 180, 177, 180, 26, 27, true, "set", "set"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 329104159325617355, 15838640579331060931, 18446744073709551615, 18446744073709551615, 193, 198, 193, 198, 29, 30, true, "boxes", "boxes"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 329104159325617355, 15838640579331035020, 18446744073709551615, 18446744073709551615, 262, 267, 262, 267, 43, 44, true, "boxes", "boxes"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 389609625696024605, 3998089761623990856, 18446744073709551615, 18446744073709551615, 291, 295, 291, 295, 48, 49, true, "cell", "cell"], ["term", "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 329104161624445793, 1096780638347487949, 18446744073709551615, 18446744073709551615, 298, 303, 298, 303, 50, 51, true, "label", "label"], ["verb", "compound-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 6181919773618307675, 13087072183397009947, 18446744073709551615, 18446744073709551615, 76, 85, 76, 85, 12, 14, true, "is needed", "is needed"], ["verb", "compound-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 3312537848285575572, 3682069485478563076, 18446744073709551615, 18446744073709551615, 115, 135, 115, 135, 18, 20, true, "described previously", "described previously"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 12178341415895617983, 6222927924466837926, 18446744073709551615, 18446744073709551615, 30, 33, 30, 33, 5, 6, true, "let", "let"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 8106342536055423396, 1623603363237275433, 18446744073709551615, 18446744073709551615, 37, 44, 37, 44, 7, 8, true, "outline", "outline"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 5947879507992892292, 3137884750946432419, 18446744073709551615, 18446744073709551615, 93, 102, 93, 102, 15, 16, true, "computing", "computing"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 8106476016678293182, 8897474810961070939, 18446744073709551615, 18446744073709551615, 167, 174, 167, 174, 24, 25, true, "predict", "predict"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 14652253380850532610, 15688350870772298580, 18446744073709551615, 18446744073709551615, 184, 192, 184, 192, 28, 29, true, "bounding", "bounding"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 12178341415895516060, 6222929879151021867, 18446744073709551615, 18446744073709551615, 243, 246, 243, 246, 40, 41, true, "use", "use"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 14652253380850532610, 15688350870772294533, 18446744073709551615, 18446744073709551615, 253, 261, 253, 261, 42, 43, true, "bounding", "bounding"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 5950066721891255692, 2770587476745436308, 18446744073709551615, 18446744073709551615, 271, 280, 271, 280, 45, 46, true, "associate", "associate"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 15441160910541486535, 15053982258803746941, 18446744073709551615, 18446744073709551615, 311, 313, 311, 313, 53, 54, true, "is", "is"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 6180152660545840784, 16460990176239850859, 18446744073709551615, 18446744073709551615, 365, 374, 365, 374, 65, 66, true, "depending", "depending"], ["verb", "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 8106342531491540207, 1091864981917538389, 18446744073709551615, 18446744073709551615, 391, 398, 391, 398, 69, 70, true, "overlap", "overlap"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 8106351438779293396, 7036921387199751321, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 2, true, "For the", "For the"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 16381206569837301772, 829894264837423586, 18446744073709551615, 18446744073709551615, 86, 92, 86, 92, 14, 15, true, "before", "before"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 15441160910541485670, 15053982237527373603, 18446744073709551615, 18446744073709551615, 181, 183, 181, 183, 27, 28, true, "of", "of"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 16381206557726458966, 4275353707798328089, 18446744073709551615, 18446744073709551615, 199, 205, 199, 205, 30, 32, true, "with a", "with a"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 8106397860038858133, 2367955007216749470, 18446744073709551615, 18446744073709551615, 223, 230, 223, 230, 34, 35, true, "between", "between"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 3534222425899983491, 17228377976260951108, 18446744073709551615, 18446744073709551615, 281, 290, 281, 290, 46, 48, true, "with each", "with each"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 8106398107541152403, 9925559447860985794, 18446744073709551615, 18446744073709551615, 314, 321, 314, 321, 54, 56, true, "in this", "in this"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 15441160910541485678, 15053982241221563648, 18446744073709551615, 18446744073709551615, 375, 377, 375, 377, 66, 67, true, "on", "on"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 8106477878453677833, 420597325771448632, 18446744073709551615, 18446744073709551615, 378, 385, 378, 385, 67, 68, true, "whether", "whether"], ["conn", "single-conn", 14663676516964431047, "TEXT", "#/texts/52", 1.0, 15441160910541485865, 15053982239329549650, 18446744073709551615, 18446744073709551615, 268, 270, 268, 270, 44, 45, true, "to", "to"], ["numval", "ival", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 17767354399704235162, 15759397524433803932, 18446744073709551615, 18446744073709551615, 6, 7, 6, 7, 1, 2, true, "2", "2"], ["sentence", "", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 13412490586202463721, 17653988074073433733, 18446744073709551615, 18446744073709551615, 0, 95, 0, 95, 0, 17, true, "Table 2: Performance results for the template specific model of the Physical Review B journals.", "Table 2: Performance results for the template specific model of the Physical Review B journals."], ["sentence", "", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 2713668199866952841, 4447940936101437620, 18446744073709551615, 18446744073709551615, 96, 202, 96, 202, 17, 34, true, "The confusion matrix highlights the huge imbalance between the number of text cells with different labels.", "The confusion matrix highlights the huge imbalance between the number of text cells with different labels."], ["sentence", "", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 12325075441819606052, 4798224535047183092, 18446744073709551615, 18446744073709551615, 203, 310, 203, 310, 34, 53, true, "The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types.", "The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types."], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 8087581502811400566, 7573439973442034769, 18446744073709551615, 18446744073709551615, 9, 28, 9, 28, 3, 5, true, "Performance results", "Performance results"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 13356790934987174038, 18420992769499992239, 18446744073709551615, 18446744073709551615, 37, 60, 37, 60, 7, 10, true, "template specific model", "template specific model"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 9872729223299515659, 7908640068811257205, 18446744073709551615, 18446744073709551615, 68, 94, 68, 94, 12, 16, true, "Physical Review B journals", "Physical Review B journals"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 5497358094214601811, 7433163521566214246, 18446744073709551615, 18446744073709551615, 100, 116, 100, 116, 18, 20, true, "confusion matrix", "confusion matrix"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 1488936167715046380, 16637143750883657942, 18446744073709551615, 18446744073709551615, 132, 146, 132, 146, 22, 24, true, "huge imbalance", "huge imbalance"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 5748925367544727060, 15357132638157717228, 18446744073709551615, 18446744073709551615, 169, 179, 169, 179, 28, 30, true, "text cells", "text cells"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 220880076010336098, 14991640362132342656, 18446744073709551615, 18446744073709551615, 185, 201, 185, 201, 31, 33, true, "different labels", "different labels"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 4360412890788129778, 6086964040649348468, 18446744073709551615, 18446744073709551615, 216, 232, 216, 232, 37, 39, true, "ensemble machine", "ensemble machine"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 9628232334734286437, 15559530413649010038, 18446744073709551615, 18446744073709551615, 275, 288, 275, 288, 46, 48, true, "high accuracy", "high accuracy"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 5579859536360440221, 12384760726355576022, 18446744073709551615, 18446744073709551615, 298, 309, 298, 309, 50, 52, true, "label types", "label types"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 16381206574973295053, 15664074499384566316, 18446744073709551615, 18446744073709551615, 159, 165, 159, 165, 26, 27, true, "number", "number"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 329104159157898666, 7979932887321468479, 18446744073709551615, 18446744073709551615, 207, 212, 207, 212, 35, 36, true, "usage", "usage"], ["term", "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 8106464574531629743, 13092511743146000891, 18446744073709551615, 18446744073709551615, 242, 249, 242, 249, 40, 41, true, "methods", "methods"], ["verb", "compound-verb", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 12736124800502880399, 3048726189598552717, 18446744073709551615, 18446744073709551615, 250, 267, 250, 267, 41, 44, true, "allows to achieve", "allows to achieve"], ["verb", "single-verb", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 15927123199600624159, 11830974991863511971, 18446744073709551615, 18446744073709551615, 117, 127, 117, 127, 20, 21, true, "highlights", "highlights"], ["verb", "single-verb", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 14639581097006750428, 17977442740486581742, 18446744073709551615, 18446744073709551615, 233, 241, 233, 241, 39, 40, true, "learning", "learning"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 8106397727991264470, 13939727220022896426, 18446744073709551615, 18446744073709551615, 29, 36, 29, 36, 5, 7, true, "for the", "for the"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 16381206565712212855, 15527423972997370423, 18446744073709551615, 18446744073709551615, 61, 67, 61, 67, 10, 12, true, "of the", "of the"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 2011002864325523456, 16665978214615422828, 18446744073709551615, 18446744073709551615, 147, 158, 147, 158, 24, 26, true, "between the", "between the"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 15441160910541485670, 10632466984953712528, 18446744073709551615, 18446744073709551615, 166, 168, 166, 168, 27, 28, true, "of", "of"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 389609625618037948, 18050712937266565062, 18446744073709551615, 18446744073709551615, 180, 184, 180, 184, 30, 31, true, "with", "with"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 15441160910541485670, 10632466984953723750, 18446744073709551615, 18446744073709551615, 213, 215, 213, 215, 36, 37, true, "of", "of"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 14814149446809805987, 2376885852812773633, 18446744073709551615, 18446744073709551615, 289, 297, 289, 297, 48, 50, true, "over all", "over all"], ["conn", "single-conn", 4577067829072175096, "TEXT", "#/texts/53", 1.0, 15441160910541485865, 10632466981388317765, 18446744073709551615, 18446744073709551615, 257, 259, 257, 259, 42, 43, true, "to", "to"], ["numval", "ival", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 17767354399704235160, 13994996428325642210, 18446744073709551615, 18446744073709551615, 443, 444, 443, 444, 78, 79, true, "0", "0"], ["numval", "ival", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 17767354399704235157, 13994996428928834278, 18446744073709551615, 18446744073709551615, 446, 447, 446, 447, 80, 81, true, "5", "5"], ["parenthesis", "round brackets", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 5763721985249138201, 11333613653010201493, 18446744073709551615, 18446744073709551615, 726, 746, 726, 746, 129, 135, true, "(made with a camera)", "(made with a camera)"], ["expression", "word-concatenation", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 2772095701715059387, 18429532044600751065, 18446744073709551615, 18446744073709551615, 99, 109, 99, 109, 16, 17, true, "dual-class", "dual-class"], ["expression", "word-concatenation", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 329104162326555074, 15570664097727008132, 18446744073709551615, 18446744073709551615, 460, 465, 460, 465, 84, 85, true, "R-CNN", "R-CNN"], ["expression", "word-concatenation", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 329104162326555074, 15570664097727047898, 18446744073709551615, 18446744073709551615, 815, 820, 815, 820, 145, 146, true, "R-CNN", "R-CNN"], ["expression", "wtoken-concatenation", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206533950151485, 198566132787583629, 18446744073709551615, 18446744073709551615, 278, 284, 278, 284, 47, 48, true, "YOLOv2", "YOLOv2"], ["expression", "wtoken-concatenation", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541480158, 10477275210029982213, 18446744073709551615, 18446744073709551615, 400, 402, 400, 402, 69, 70, true, "F1", "F1"], ["expression", "wtoken-concatenation", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 329104147618556708, 15461264859114081015, 18446744073709551615, 18446744073709551615, 412, 417, 412, 417, 72, 73, true, "98.7%", "98.7%"], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 784428348664963687, 2735229758044296436, 18446744073709551615, 18446744073709551615, 33, 133, 33, 133, 6, 20, true, "The corresponding recall and precision are then computed for this dual-class classification problem.", "The corresponding recall and precision are then computed for this dual-class classification problem."], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 3927917834152176938, 12569591881522562313, 18446744073709551615, 18446744073709551615, 134, 273, 134, 273, 20, 46, true, "In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence.", "In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence."], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 3956872905292683881, 2752157999599851583, 18446744073709551615, 18446744073709551615, 274, 445, 274, 445, 46, 80, true, "For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0.", "For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0."], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 17055744903410885404, 12761534484507818149, 18446744073709551615, 18446744073709551615, 449, 556, 449, 556, 82, 101, true, "The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers.", "The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers."], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14420414998277701657, 3037581738866623003, 18446744073709551615, 18446744073709551615, 557, 667, 557, 667, 101, 119, true, "We believe this originates from the selective search algorithm which is used to determine regions of interest.", "We believe this originates from the selective search algorithm which is used to determine regions of interest."], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14678097696923692160, 11491609575789433741, 18446744073709551615, 18446744073709551615, 668, 773, 668, 773, 119, 139, true, "The images we feed it are not typical photographic images (made with a camera) but layout visualisations.", "The images we feed it are not typical photographic images (made with a camera) but layout visualisations."], ["sentence", "", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 1336288703622935510, 15435580690586079242, 18446744073709551615, 18446744073709551615, 774, 867, 774, 867, 139, 156, true, "The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", "The selective search algorithm in Faster R-CNN might not be optimal for such type of objects."], ["term", "enum-term-mark-2", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 11037453576911667853, 12443097430245333421, 18446744073709551615, 18446744073709551615, 51, 71, 51, 71, 8, 11, true, "recall and precision", "recall and precision"], ["term", "enum-term-mark-2", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 767578358531619449, 1472685584560725507, 18446744073709551615, 18446744073709551615, 204, 224, 204, 224, 35, 38, true, "precision and recall", "precision and recall"], ["term", "enum-term-mark-2", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 767578358531619449, 1472685584560746355, 18446744073709551615, 18446744073709551615, 527, 547, 527, 547, 96, 99, true, "precision and recall", "precision and recall"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 7379815267840909102, 11228728212639806867, 18446744073709551615, 18446744073709551615, 19, 31, 19, 31, 3, 5, true, "bounding box", "bounding box"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 7737036869804521677, 431221867393766623, 18446744073709551615, 18446744073709551615, 37, 57, 37, 57, 7, 9, true, "corresponding recall", "corresponding recall"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 11075783049363921732, 14381818982688268241, 18446744073709551615, 18446744073709551615, 99, 132, 99, 132, 16, 19, true, "dual-class classification problem", "dual-class classification problem"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 8581372359543855162, 10333944193716453687, 18446744073709551615, 18446744073709551615, 151, 166, 151, 166, 25, 27, true, "fair comparison", "fair comparison"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16904814960714419182, 7305130667909903014, 18446744073709551615, 18446744073709551615, 218, 232, 218, 232, 37, 39, true, "recall metrics", "recall metrics"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 5859613489047657680, 4575208165015881094, 18446744073709551615, 18446744073709551615, 392, 408, 392, 408, 68, 71, true, "maximum F1 score", "maximum F1 score"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 4874473477449861741, 7312361899298084317, 18446744073709551615, 18446744073709551615, 423, 439, 423, 439, 75, 77, true, "confidence level", "confidence level"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6927970521128218953, 6482828839300817669, 18446744073709551615, 18446744073709551615, 453, 472, 453, 472, 83, 86, true, "Faster R-CNN method", "Faster R-CNN method"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16904814894749305757, 5737021334745277149, 18446744073709551615, 18446744073709551615, 541, 555, 541, 555, 98, 100, true, "recall numbers", "recall numbers"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 4349380732135272089, 16458298459980248480, 18446744073709551615, 18446744073709551615, 593, 619, 593, 619, 107, 110, true, "selective search algorithm", "selective search algorithm"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 2351536754407393176, 12969141846351017301, 18446744073709551615, 18446744073709551615, 698, 725, 698, 725, 126, 129, true, "typical photographic images", "typical photographic images"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 18245848170103364623, 3851473044777784430, 18446744073709551615, 18446744073709551615, 751, 772, 751, 772, 136, 138, true, "layout visualisations", "layout visualisations"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 4349380732135272089, 16458298459980260537, 18446744073709551615, 18446744073709551615, 778, 804, 778, 804, 140, 143, true, "selective search algorithm", "selective search algorithm"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 5327781098613689502, 14889487484335627658, 18446744073709551615, 18446744073709551615, 808, 820, 808, 820, 144, 146, true, "Faster R-CNN", "Faster R-CNN"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6165459236568103333, 2812369711373771464, 18446744073709551615, 18446744073709551615, 846, 855, 846, 855, 151, 153, true, "such type", "such type"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6184954595655792282, 18387321712019319773, 18446744073709551615, 18446744073709551615, 62, 71, 62, 71, 10, 11, true, "precision", "precision"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 329104161571401725, 15575423851065642052, 18446744073709551615, 18446744073709551615, 137, 142, 137, 142, 21, 22, true, "order", "order"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14814151113413570861, 12729204908894192489, 18446744073709551615, 18446744073709551615, 178, 186, 178, 186, 30, 31, true, "networks", "networks"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6184954595655792282, 18387321712019245881, 18446744073709551615, 18446744073709551615, 204, 213, 204, 213, 35, 36, true, "precision", "precision"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206521526353544, 16408450721845756506, 18446744073709551615, 18446744073709551615, 238, 244, 238, 244, 40, 41, true, "regard", "regard"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 2702871111219879214, 2512541272008941381, 18446744073709551615, 18446744073709551615, 262, 272, 262, 272, 44, 45, true, "confidence", "confidence"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206533950151485, 198566132787583629, 18446744073709551615, 18446744073709551615, 278, 284, 278, 284, 47, 48, true, "YOLOv2", "YOLOv2"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206521531485437, 16408606466535231414, 18446744073709551615, 18446744073709551615, 305, 311, 305, 311, 52, 53, true, "recall", "recall"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6184954595655792282, 18387321712019270016, 18446744073709551615, 18446744073709551615, 330, 339, 330, 339, 57, 58, true, "precision", "precision"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 2702871111219879214, 2512541272008894019, 18446744073709551615, 18446744073709551615, 355, 365, 355, 365, 62, 63, true, "confidence", "confidence"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6184954595655792282, 18387321712019273929, 18446744073709551615, 18446744073709551615, 527, 536, 527, 536, 96, 97, true, "precision", "precision"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 8106478448964548679, 12701825139671272799, 18446744073709551615, 18446744073709551615, 647, 654, 647, 654, 115, 116, true, "regions", "regions"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14637953883246475850, 7956817731702541219, 18446744073709551615, 18446744073709551615, 658, 666, 658, 666, 117, 118, true, "interest", "interest"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206560620045048, 3784940468244328560, 18446744073709551615, 18446744073709551615, 672, 678, 672, 678, 120, 121, true, "images", "images"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206563351041630, 1952046848832586628, 18446744073709551615, 18446744073709551615, 739, 745, 739, 745, 133, 134, true, "camera", "camera"], ["term", "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 8106342034010873556, 18238380662499221230, 18446744073709551615, 18446744073709551615, 859, 866, 859, 866, 154, 155, true, "objects", "objects"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 11891944663675020942, 13358251629780069780, 18446744073709551615, 18446744073709551615, 72, 89, 72, 89, 11, 14, true, "are then computed", "are then computed"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6183880245133195430, 11375315636474919011, 18446744073709551615, 18446744073709551615, 312, 321, 312, 321, 53, 55, true, "goes down", "goes down"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 2694830089385977061, 235012322887490211, 18446744073709551615, 18446744073709551615, 366, 378, 366, 378, 63, 65, true, "is increased", "is increased"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 7743689594175537908, 4826765463732452457, 18446744073709551615, 18446744073709551615, 473, 502, 473, 502, 86, 91, true, "is also performing quite well", "is also performing quite well"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14568989124066371477, 1068965357575472568, 18446744073709551615, 18446744073709551615, 508, 520, 508, 520, 93, 95, true, "has slightly", "has slightly"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16534452113033443144, 7065494418204761025, 18446744073709551615, 18446744073709551615, 626, 646, 626, 646, 111, 115, true, "is used to determine", "is used to determine"], ["verb", "compound-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 8106397797831668975, 18220343756781523026, 18446744073709551615, 18446744073709551615, 690, 697, 690, 697, 124, 126, true, "are not", "are not"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6184954633443293966, 15964917443528191555, 18446744073709551615, 18446744073709551615, 9, 18, 9, 18, 2, 3, true, "predicted", "predicted"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541486853, 10477289391110259759, 18446744073709551615, 18446744073709551615, 146, 148, 146, 148, 23, 24, true, "do", "do"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14814150880980441564, 5851167619774412175, 18446744073709551615, 18446744073709551615, 191, 199, 191, 199, 33, 34, true, "optimise", "optimise"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6184954633443293966, 15964917443528178420, 18446744073709551615, 18446744073709551615, 252, 261, 252, 261, 43, 44, true, "predicted", "predicted"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 8106342033696543838, 18232753974273180210, 18446744073709551615, 18446744073709551615, 288, 295, 288, 295, 49, 50, true, "observe", "observe"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 389609625699055541, 1239396878369861980, 18446744073709551615, 18446744073709551615, 340, 344, 340, 344, 58, 59, true, "goes", "goes"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 6168826060228989821, 9992741985777267919, 18446744073709551615, 18446744073709551615, 380, 389, 380, 389, 66, 67, true, "obtaining", "obtaining"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 8106397860663428876, 16964248131253901291, 18446744073709551615, 18446744073709551615, 560, 567, 560, 567, 102, 103, true, "believe", "believe"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 13983620007877845674, 12955352785275452378, 18446744073709551615, 18446744073709551615, 573, 583, 573, 583, 104, 105, true, "originates", "originates"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 389609625697838276, 1239402610955961201, 18446744073709551615, 18446744073709551615, 682, 686, 682, 686, 122, 123, true, "feed", "feed"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 389609625618411791, 1242783662433971802, 18446744073709551615, 18446744073709551615, 727, 731, 727, 731, 130, 131, true, "made", "made"], ["verb", "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541487001, 10477275230049640367, 18446744073709551615, 18446744073709551615, 831, 833, 831, 833, 148, 149, true, "be", "be"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16553501753141503400, 15045481503517904124, 18446744073709551615, 18446744073709551615, 834, 845, 834, 845, 149, 151, true, "optimal for", "optimal for"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14638857868319795209, 11777518988570895518, 18446744073709551615, 18446744073709551615, 0, 8, 0, 8, 0, 2, true, "with the", "with the"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14637917333165224513, 10908983268505451281, 18446744073709551615, 18446744073709551615, 90, 98, 90, 98, 14, 16, true, "for this", "for this"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541480354, 10477275240531848205, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 20, 21, true, "In", "In"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206565712212855, 1966173897978141572, 18446744073709551615, 18446744073709551615, 167, 173, 167, 173, 27, 29, true, "of the", "of the"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 389609625618037948, 1242787593333487218, 18446744073709551615, 18446744073709551615, 233, 237, 233, 237, 39, 40, true, "with", "with"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 12178341415896108722, 156309885604541418, 18446744073709551615, 18446744073709551615, 274, 277, 274, 277, 46, 47, true, "For", "For"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14634130761162415388, 14288776936577427060, 18446744073709551615, 18446744073709551615, 296, 304, 296, 304, 50, 52, true, "that the", "that the"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206568455155979, 1869095877123778211, 18446744073709551615, 18446744073709551615, 348, 354, 348, 354, 60, 62, true, "as the", "as the"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541485670, 10477275256518274646, 18446744073709551615, 18446744073709551615, 409, 411, 409, 411, 71, 72, true, "of", "of"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 389609625700792947, 1238530397841875604, 18446744073709551615, 18446744073709551615, 418, 422, 418, 422, 73, 75, true, "at a", "at a"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541485670, 10477275256518295884, 18446744073709551615, 18446744073709551615, 440, 442, 440, 442, 77, 78, true, "of", "of"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 14637917359887717745, 1544745809668392834, 18446744073709551615, 18446744073709551615, 584, 592, 584, 592, 105, 107, true, "from the", "from the"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541485670, 10477275256518301113, 18446744073709551615, 18446744073709551615, 655, 657, 655, 657, 116, 117, true, "of", "of"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206557726458966, 3788832551851477825, 18446744073709551615, 18446744073709551615, 732, 738, 732, 738, 131, 133, true, "with a", "with a"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541486538, 10477275205185242704, 18446744073709551615, 18446744073709551615, 805, 807, 805, 807, 143, 144, true, "in", "in"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541485670, 10477275256518310244, 18446744073709551615, 18446744073709551615, 856, 858, 856, 858, 153, 154, true, "of", "of"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541485865, 10477275215095288698, 18446744073709551615, 18446744073709551615, 143, 145, 143, 145, 22, 23, true, "to", "to"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 16381206519425733256, 370344314517327407, 18446744073709551615, 18446744073709551615, 245, 251, 245, 251, 41, 43, true, "to the", "to the"], ["conn", "single-conn", 2569392033451362672, "TEXT", "#/texts/54", 1.0, 15441160910541485865, 10477275215095322459, 18446744073709551615, 18446744073709551615, 634, 636, 634, 636, 113, 114, true, "to", "to"], ["expression", "wtoken-concatenation", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 329104147725158908, 18028372742913290156, 18446744073709551615, 18446744073709551615, 0, 5, 0, 5, 0, 1, true, "3.4.3", "3.4.3"], ["sentence", "", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 7718133462399744108, 17823198661305637266, 18446744073709551615, 18446744073709551615, 0, 31, 0, 31, 0, 5, true, "3.4.3 Template specific Models.", "3.4.3 Template specific Models."], ["sentence", "", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 10092485441396158590, 1921679794908306598, 18446744073709551615, 18446744073709551615, 32, 159, 32, 159, 5, 27, true, "The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template.", "The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template."], ["sentence", "", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15812734743858168044, 5104988671183900609, 18446744073709551615, 18446744073709551615, 160, 272, 160, 272, 27, 47, true, "This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance.", "This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance."], ["sentence", "", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 551135567978634707, 9805137836117614428, 18446744073709551615, 18446744073709551615, 273, 460, 273, 460, 47, 78, true, "Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality.", "Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality."], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 11907907877741579530, 940094317087021995, 18446744073709551615, 18446744073709551615, 6, 30, 6, 30, 1, 4, true, "Template specific Models", "Template specific Models"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 3663813169945470735, 17139564151051767194, 18446744073709551615, 18446744073709551615, 44, 68, 44, 68, 8, 11, true, "template specific models", "template specific models"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 16960645913427248555, 7662141651479474713, 18446744073709551615, 18446744073709551615, 91, 109, 91, 109, 16, 18, true, "extraction quality", "extraction quality"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 10137510760641589283, 15174113578041628274, 18446744073709551615, 18446744073709551615, 141, 158, 141, 158, 24, 26, true, "specific template", "specific template"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 7342862043108457350, 10866470711373289678, 18446744073709551615, 18446744073709551615, 181, 202, 181, 202, 31, 34, true, "many technical fields", "many technical fields"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 3376407656379762908, 17651500245932752692, 18446744073709551615, 18446744073709551615, 251, 271, 251, 271, 44, 46, true, "paramount importance", "paramount importance"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 879437392081459464, 10698589901478685905, 18446744073709551615, 18446744073709551615, 286, 310, 286, 310, 49, 52, true, "many technical documents", "many technical documents"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15130402050161305835, 1457144697725364176, 18446744073709551615, 18446744073709551615, 316, 330, 316, 330, 54, 56, true, "specific field", "specific field"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 5723400002059657755, 8384905200420629131, 18446744073709551615, 18446744073709551615, 353, 369, 353, 369, 60, 62, true, "certain template", "certain template"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 16960645913427248555, 7662141651479431440, 18446744073709551615, 18446744073709551615, 441, 459, 441, 459, 75, 77, true, "extraction quality", "extraction quality"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 389609625699055241, 14883359024073212478, 18446744073709551615, 18446744073709551615, 36, 40, 36, 40, 6, 7, true, "goal", "goal"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 329104161610777240, 15370809836743986311, 18446744073709551615, 18446744073709551615, 130, 135, 130, 135, 21, 22, true, "model", "model"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 14650440612701450082, 10632661340355574917, 18446744073709551615, 18446744073709551615, 214, 222, 214, 222, 37, 38, true, "accuracy", "accuracy"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 389609625696431489, 14876459829455684771, 18446744073709551615, 18446744073709551615, 240, 244, 240, 244, 41, 42, true, "data", "data"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 329104161787480235, 15382185116652927163, 18446744073709551615, 18446744073709551615, 389, 394, 389, 394, 66, 67, true, "sense", "sense"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 5946904284821171904, 7436968498862967568, 18446744073709551615, 18446744073709551615, 403, 412, 403, 412, 69, 70, true, "advantage", "advantage"], ["term", "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 14634130803848280536, 13102933406250746055, 18446744073709551615, 18446744073709551615, 421, 429, 421, 429, 72, 73, true, "template", "template"], ["verb", "compound-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 6623118764989562485, 9686528964214635468, 18446744073709551615, 18446744073709551615, 69, 81, 69, 81, 11, 14, true, "is to obtain", "is to obtain"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15180748647375949898, 15041926949817059678, 18446744073709551615, 18446744073709551615, 113, 125, 113, 125, 19, 20, true, "specializing", "specializing"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541486535, 1662040640859036333, 18446744073709551615, 18446744073709551615, 165, 167, 165, 167, 28, 29, true, "is", "is"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 6168374324562720592, 8408511475472730744, 18446744073709551615, 18446744073709551615, 230, 239, 230, 239, 40, 41, true, "extracted", "extracted"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541486535, 1662040640859038873, 18446744073709551615, 18446744073709551615, 245, 247, 245, 247, 42, 43, true, "is", "is"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 16381206574684919940, 8690278604869594595, 18446744073709551615, 18446744073709551615, 341, 347, 341, 347, 57, 58, true, "appear", "appear"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 329104161505838030, 15370325700124998836, 18446744073709551615, 18446744073709551615, 383, 388, 383, 388, 65, 66, true, "makes", "makes"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 389609625631208371, 14878114134196888026, 18446744073709551615, 18446744073709551615, 398, 402, 398, 402, 68, 69, true, "take", "take"], ["verb", "single-verb", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 8106398106568099440, 4690670493670021785, 18446744073709551615, 18446744073709551615, 433, 440, 433, 440, 74, 75, true, "improve", "improve"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 3701312585595488544, 14499465500010376427, 18446744073709551615, 18446744073709551615, 168, 180, 168, 180, 29, 31, true, "necessary in", "necessary in"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541485670, 1662040798765251967, 18446744073709551615, 18446744073709551615, 41, 43, 41, 43, 7, 8, true, "of", "of"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541486989, 1662040951000079940, 18446744073709551615, 18446744073709551615, 110, 112, 110, 112, 18, 19, true, "by", "by"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 389609625618762887, 14878547061345061059, 18446744073709551615, 18446744073709551615, 136, 140, 136, 140, 22, 24, true, "on a", "on a"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 16381206565712212855, 5026312373792128532, 18446744073709551615, 18446744073709551615, 223, 229, 223, 229, 38, 40, true, "of the", "of the"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541485670, 1662040798765106998, 18446744073709551615, 18446744073709551615, 248, 250, 248, 250, 43, 44, true, "of", "of"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 389609625698530964, 14883385687690770855, 18446744073709551615, 18446744073709551615, 311, 315, 311, 315, 52, 54, true, "in a", "in a"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 389609625698530964, 14883385687690756753, 18446744073709551615, 18446744073709551615, 348, 352, 348, 352, 58, 60, true, "in a", "in a"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 8106342927224204628, 15389357728224894046, 18446744073709551615, 18446744073709551615, 413, 420, 413, 420, 70, 72, true, "of this", "of this"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541485865, 1662040545925493605, 18446744073709551615, 18446744073709551615, 72, 74, 72, 74, 12, 13, true, "to", "to"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541485865, 1662040545925472456, 18446744073709551615, 18446744073709551615, 395, 397, 395, 397, 67, 68, true, "to", "to"], ["conn", "single-conn", 14539041145469267811, "TEXT", "#/texts/55", 1.0, 15441160910541485865, 1662040545925478963, 18446744073709551615, 18446744073709551615, 430, 432, 430, 432, 73, 74, true, "to", "to"], ["numval", "irng", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 8104408072639178553, 3436406430005144206, 18446744073709551615, 18446744073709551615, 251, 258, 251, 258, 47, 48, true, "100-400", "100-400"], ["expression", "word-concatenation", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 6179391750322252074, 7609163653836261740, 18446744073709551615, 18446744073709551615, 309, 318, 309, 318, 58, 59, true, "man-hours", "man-hours"], ["sentence", "", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15260123598736784563, 9203716626740230654, 18446744073709551615, 18446744073709551615, 0, 96, 0, 96, 0, 17, true, "For an algorithm to fit in the interactive platform design we identified a few key requirements.", "For an algorithm to fit in the interactive platform design we identified a few key requirements."], ["sentence", "", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 4991934942360344417, 464502954292559498, 18446744073709551615, 18446744073709551615, 97, 187, 97, 187, 17, 36, true, "First, it is crucial that the model can generate good results with a limited set of pages.", "First, it is crucial that the model can generate good results with a limited set of pages."], ["sentence", "", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 5804013038180128821, 5640425092716649521, 18446744073709551615, 18446744073709551615, 188, 334, 188, 334, 36, 62, true, "In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation.", "In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation."], ["sentence", "", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 4985111903766472827, 3150108135738516677, 18446744073709551615, 18446744073709551615, 335, 406, 335, 406, 62, 75, true, "Second it must be robust against extreme imbalance of the labeled data.", "Second it must be robust against extreme imbalance of the labeled data."], ["sentence", "", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 785870778923519952, 737901236245424740, 18446744073709551615, 18446744073709551615, 407, 510, 407, 510, 75, 97, true, "It is clear that cells of the label Title will be much more uncommon than cells with the label of Text.", "It is clear that cells of the label Title will be much more uncommon than cells with the label of Text."], ["sentence", "", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 9332774394694438134, 16216949282398930168, 18446744073709551615, 18446744073709551615, 511, 635, 511, 635, 97, 120, true, "Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process.", "Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process."], ["term", "enum-term-mark-2", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 4382912746554659998, 10486697078971674734, 18446744073709551615, 18446744073709551615, 553, 576, 553, 576, 107, 110, true, "training and predicting", "training and predicting"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15648340612932904440, 3282829189538707082, 18446744073709551615, 18446744073709551615, 31, 58, 31, 58, 7, 10, true, "interactive platform design", "interactive platform design"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 6908529689048994003, 5714340384362777333, 18446744073709551615, 18446744073709551615, 75, 95, 75, 95, 13, 16, true, "few key requirements", "few key requirements"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 8407082861571023662, 15351385334075847588, 18446744073709551615, 18446744073709551615, 146, 158, 146, 158, 27, 29, true, "good results", "good results"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 5934032006560084170, 4928254777620989731, 18446744073709551615, 18446744073709551615, 166, 177, 166, 177, 31, 33, true, "limited set", "limited set"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 4147505635383066832, 5727214689898978610, 18446744073709551615, 18446744073709551615, 259, 274, 259, 274, 48, 50, true, "annotated pages", "annotated pages"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 18385357359584472461, 13895653974217727096, 18446744073709551615, 18446744073709551615, 368, 385, 368, 385, 68, 70, true, "extreme imbalance", "extreme imbalance"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 5579859539081108650, 13588256740689789301, 18446744073709551615, 18446744073709551615, 437, 448, 437, 448, 82, 84, true, "label Title", "label Title"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 11179896262039516860, 11055979193001627571, 18446744073709551615, 18446744073709551615, 604, 634, 604, 634, 116, 119, true, "interactive annotation process", "interactive annotation process"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 5946733998943492893, 12428281114523894179, 18446744073709551615, 18446744073709551615, 7, 16, 7, 16, 2, 3, true, "algorithm", "algorithm"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161610777240, 12260173115935047807, 18446744073709551615, 18446744073709551615, 127, 132, 127, 132, 24, 25, true, "model", "model"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161667992688, 12263531108286392881, 18446744073709551615, 18446744073709551615, 181, 186, 181, 186, 34, 35, true, "pages", "pages"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 14814125472896938138, 16750744787410262504, 18446744073709551615, 18446744073709551615, 191, 199, 191, 199, 37, 38, true, "practice", "practice"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 5946733998943492893, 12428281114523913343, 18446744073709551615, 18446744073709551615, 215, 224, 215, 224, 41, 42, true, "algorithm", "algorithm"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 5364746105625482840, 9265279288259411951, 18446744073709551615, 18446744073709551615, 283, 293, 283, 293, 53, 54, true, "equivalent", "equivalent"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206562413425049, 9436735506305030424, 18446744073709551615, 18446744073709551615, 299, 305, 299, 305, 56, 57, true, "couple", "couple"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 6179391750322252074, 7609163653836261740, 18446744073709551615, 18446744073709551615, 309, 318, 309, 318, 58, 59, true, "man-hours", "man-hours"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15359807916847495711, 1899733546879222276, 18446744073709551615, 18446744073709551615, 323, 333, 323, 333, 60, 61, true, "annotation", "annotation"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 389609625696431489, 14766578679289820558, 18446744073709551615, 18446744073709551615, 401, 405, 401, 405, 73, 74, true, "data", "data"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161531686411, 12388798607045074404, 18446744073709551615, 18446744073709551615, 424, 429, 424, 429, 79, 80, true, "cells", "cells"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161531686411, 12388798607045077590, 18446744073709551615, 18446744073709551615, 481, 486, 481, 486, 90, 91, true, "cells", "cells"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161624445793, 12260240665376036393, 18446744073709551615, 18446744073709551615, 496, 501, 496, 501, 93, 94, true, "label", "label"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 389609625541629035, 11509492960090225407, 18446744073709551615, 18446744073709551615, 505, 509, 505, 509, 95, 96, true, "Text", "Text"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161610777240, 12260173115935041441, 18446744073709551615, 18446744073709551615, 521, 526, 521, 526, 100, 101, true, "model", "model"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 14634153919632515335, 18085216687124440147, 18446744073709551615, 18446744073709551615, 553, 561, 553, 561, 107, 108, true, "training", "training"], ["term", "single-term", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 14103651237077222912, 68076985749864369, 18446744073709551615, 18446744073709551615, 566, 576, 566, 576, 109, 110, true, "predicting", "predicting"], ["verb", "compound-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 17533333750889623004, 9472769641599292929, 18446744073709551615, 18446744073709551615, 225, 246, 225, 246, 42, 46, true, "needs to perform well", "needs to perform well"], ["verb", "compound-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 11061360903444416284, 2129300334891083426, 18446744073709551615, 18446744073709551615, 449, 461, 449, 461, 84, 87, true, "will be much", "will be much"], ["verb", "compound-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 8076044115168679328, 14919913684743101171, 18446744073709551615, 18446744073709551615, 527, 543, 527, 543, 101, 105, true, "needs to be very", "needs to be very"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 12178341415895625823, 9936689573502689067, 18446744073709551615, 18446744073709551615, 20, 23, 20, 23, 4, 5, true, "fit", "fit"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15995920061809434499, 9928580205597280350, 18446744073709551615, 18446744073709551615, 62, 72, 62, 72, 11, 12, true, "identified", "identified"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541486535, 1453464821964574132, 18446744073709551615, 18446744073709551615, 107, 109, 107, 109, 20, 21, true, "is", "is"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 4017818373869155501, 4278801556520084204, 18446744073709551615, 18446744073709551615, 133, 145, 133, 145, 25, 27, true, "can generate", "can generate"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161618623456, 12265975418452186520, 18446744073709551615, 18446744073709551615, 205, 210, 205, 210, 39, 40, true, "means", "means"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206514640520764, 9808439134705831038, 18446744073709551615, 18446744073709551615, 335, 341, 335, 341, 62, 63, true, "Second", "Second"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 8106464533804387051, 8890708598446613982, 18446744073709551615, 18446744073709551615, 345, 352, 345, 352, 64, 66, true, "must be", "must be"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 8106342931007190203, 9172963578761412207, 18446744073709551615, 18446744073709551615, 393, 400, 393, 400, 72, 73, true, "labeled", "labeled"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541486535, 1453464821964494202, 18446744073709551615, 18446744073709551615, 410, 412, 410, 412, 76, 77, true, "is", "is"], ["verb", "single-verb", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 11040131848055511293, 9945998267769661183, 18446744073709551615, 18446744073709551615, 587, 599, 587, 599, 113, 115, true, "will support", "will support"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 14228775800344759211, 1968197584824290171, 18446744073709551615, 18446744073709551615, 110, 122, 110, 122, 21, 23, true, "crucial that", "crucial that"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 1993790582685692910, 4432318201339565934, 18446744073709551615, 18446744073709551615, 353, 367, 353, 367, 66, 68, true, "robust against", "robust against"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 2617690495147367356, 14858078827112379350, 18446744073709551615, 18446744073709551615, 413, 423, 413, 423, 77, 79, true, "clear that", "clear that"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 11114365829612930466, 10473638374653243311, 18446744073709551615, 18446744073709551615, 467, 480, 467, 480, 88, 90, true, "uncommon than", "uncommon than"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 14637920980696500808, 1921583426992803260, 18446744073709551615, 18446744073709551615, 544, 552, 544, 552, 105, 107, true, "quick in", "quick in"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206535218531925, 10766932565669116762, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 2, true, "For an", "For an"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206560518651853, 18004306290400611053, 18446744073709551615, 18446744073709551615, 24, 30, 24, 30, 5, 7, true, "in the", "in the"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206557726458966, 18019159896119976996, 18446744073709551615, 18446744073709551615, 159, 165, 159, 165, 29, 31, true, "with a", "with a"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541485670, 1453464820859683358, 18446744073709551615, 18446744073709551615, 178, 180, 178, 180, 33, 34, true, "of", "of"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541480354, 1453453982064787832, 18446744073709551615, 18446744073709551615, 188, 190, 188, 190, 36, 37, true, "In", "In"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 12178341415895625940, 9936689583318236091, 18446744073709551615, 18446744073709551615, 247, 250, 247, 250, 46, 47, true, "for", "for"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 389609625620237736, 11496284453519477340, 18446744073709551615, 18446744073709551615, 294, 298, 294, 298, 54, 56, true, "of a", "of a"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541485670, 1453464820859611646, 18446744073709551615, 18446744073709551615, 306, 308, 306, 308, 57, 58, true, "of", "of"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 12178341415895625940, 9936689583318206889, 18446744073709551615, 18446744073709551615, 319, 322, 319, 322, 59, 60, true, "for", "for"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206565712212855, 9482496689021090789, 18446744073709551615, 18446744073709551615, 386, 392, 386, 392, 70, 72, true, "of the", "of the"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 16381206565712212855, 9482496689021060650, 18446744073709551615, 18446744073709551615, 430, 436, 430, 436, 80, 82, true, "of the", "of the"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 14638857868319795209, 2580294575252347269, 18446744073709551615, 18446744073709551615, 487, 495, 487, 495, 91, 93, true, "with the", "with the"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541485670, 1453464820859607533, 18446744073709551615, 18446744073709551615, 502, 504, 502, 504, 94, 95, true, "of", "of"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 329104161786618045, 12383315557724444592, 18446744073709551615, 18446744073709551615, 578, 583, 578, 583, 111, 112, true, "since", "since"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541485865, 1453464826493133085, 18446744073709551615, 18446744073709551615, 17, 19, 17, 19, 3, 4, true, "to", "to"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541485865, 1453464826493115912, 18446744073709551615, 18446744073709551615, 231, 233, 231, 233, 43, 44, true, "to", "to"], ["conn", "single-conn", 8607014065143641201, "TEXT", "#/texts/56", 1.0, 15441160910541485865, 1453464826493232036, 18446744073709551615, 18446744073709551615, 533, 535, 533, 535, 102, 103, true, "to", "to"], ["parenthesis", "reference", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 12178341415895551595, 3578610866005582188, 18446744073709551615, 18446744073709551615, 42, 45, 42, 45, 8, 9, true, "[2]", "[2]"], ["expression", "wtoken-concatenation", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 12178341415895551595, 3578610866005582188, 18446744073709551615, 18446744073709551615, 42, 45, 42, 45, 8, 9, true, "[2]", "[2]"], ["sentence", "", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 1321865598114444635, 2215181109629009596, 18446744073709551615, 18446744073709551615, 0, 107, 0, 107, 0, 19, true, "For these reasons, we chose random forest [2] as a machine learning algorithm for template specific models.", "For these reasons, we chose random forest [2] as a machine learning algorithm for template specific models."], ["sentence", "", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 5933656628218586373, 4615859734668438555, 18446744073709551615, 18446744073709551615, 108, 243, 108, 243, 19, 42, true, "Random forest algorithms are known to be trained fast and can produce very accurate results on limited, but relatively structured data.", "Random forest algorithms are known to be trained fast and can produce very accurate results on limited, but relatively structured data."], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 8479084307598384759, 13789581635532023596, 18446744073709551615, 18446744073709551615, 28, 41, 28, 41, 6, 8, true, "random forest", "random forest"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 3663813169945470735, 13771905067446488114, 18446744073709551615, 18446744073709551615, 82, 106, 82, 106, 15, 18, true, "template specific models", "template specific models"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 6407272496581372949, 9529106438636169186, 18446744073709551615, 18446744073709551615, 108, 132, 108, 132, 19, 22, true, "Random forest algorithms", "Random forest algorithms"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 363090472507169169, 8561180021799884651, 18446744073709551615, 18446744073709551615, 183, 199, 183, 199, 32, 34, true, "accurate results", "accurate results"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 4517874168209370779, 3521538572593201674, 18446744073709551615, 18446744073709551615, 227, 242, 227, 242, 39, 41, true, "structured data", "structured data"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 8106478449187889361, 13919224543462497012, 18446744073709551615, 18446744073709551615, 10, 17, 10, 17, 2, 3, true, "reasons", "reasons"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 8106464587473865376, 16361697669749387702, 18446744073709551615, 18446744073709551615, 51, 58, 51, 58, 11, 12, true, "machine", "machine"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 5946733998943492893, 15258259268393418233, 18446744073709551615, 18446744073709551615, 68, 77, 68, 77, 13, 14, true, "algorithm", "algorithm"], ["term", "single-term", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 389609625695123443, 6542771302846269141, 18446744073709551615, 18446744073709551615, 251, 255, 251, 255, 44, 45, true, "case", "case"], ["verb", "compound-verb", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 3672237414008980378, 8168753950103855601, 18446744073709551615, 18446744073709551615, 133, 161, 133, 161, 22, 28, true, "are known to be trained fast", "are known to be trained fast"], ["verb", "compound-verb", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 15705911622867996458, 18088967880959376572, 18446744073709551615, 18446744073709551615, 166, 182, 166, 182, 29, 32, true, "can produce very", "can produce very"], ["verb", "single-verb", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 329104161556620669, 14781203896201770352, 18446744073709551615, 18446744073709551615, 22, 27, 22, 27, 5, 6, true, "chose", "chose"], ["verb", "single-verb", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 14639581097006750428, 4008136393337002779, 18446744073709551615, 18446744073709551615, 59, 67, 59, 67, 12, 13, true, "learning", "learning"], ["conn", "single-conn", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 6560705639796409909, 14533538587347702670, 18446744073709551615, 18446744073709551615, 0, 9, 0, 9, 0, 2, true, "For these", "For these"], ["conn", "single-conn", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 389609625700764258, 6136167252535921243, 18446744073709551615, 18446744073709551615, 46, 50, 46, 50, 9, 11, true, "as a", "as a"], ["conn", "single-conn", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 12178341415895625940, 3578619344503340053, 18446744073709551615, 18446744073709551615, 78, 81, 78, 81, 14, 15, true, "for", "for"], ["conn", "single-conn", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 15441160910541485678, 2424995179865918878, 18446744073709551615, 18446744073709551615, 200, 202, 200, 202, 34, 35, true, "on", "on"], ["conn", "single-conn", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 15441160910541480354, 2424967642558730888, 18446744073709551615, 18446744073709551615, 244, 246, 244, 246, 42, 43, true, "In", "In"], ["conn", "single-conn", 1994904537764312371, "TEXT", "#/texts/57", 1.0, 15441160910541485865, 2424995192349443979, 18446744073709551615, 18446744073709551615, 143, 145, 143, 145, 24, 25, true, "to", "to"], ["sentence", "", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 3849526231748074223, 318347409270939671, 18446744073709551615, 18446744073709551615, 55, 207, 55, 207, 9, 35, true, "Furthermore, random forest is an ensemble method, meaning that they learn on the distribution function of the features, and not individual dataelements.", "Furthermore, random forest is an ensemble method, meaning that they learn on the distribution function of the features, and not individual dataelements."], ["sentence", "", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 5442113846327811609, 17895930310474857340, 18446744073709551615, 18446744073709551615, 208, 346, 208, 346, 35, 58, true, "As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised.", "As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised."], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 3327207230779122172, 14880303134672427170, 18446744073709551615, 18446744073709551615, 5, 25, 5, 25, 1, 3, true, "structure originates", "structure originates"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 8479084307598384759, 16847024660628644782, 18446744073709551615, 18446744073709551615, 68, 81, 68, 81, 11, 13, true, "random forest", "random forest"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 17604104298028087389, 5936050629867305383, 18446744073709551615, 18446744073709551615, 88, 103, 88, 103, 15, 17, true, "ensemble method", "ensemble method"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 2221030665390994181, 12248418922188426900, 18446744073709551615, 18446744073709551615, 136, 157, 136, 157, 24, 26, true, "distribution function", "distribution function"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 79538879438919706, 10884144148399035709, 18446744073709551615, 18446744073709551615, 183, 206, 183, 206, 32, 34, true, "individual dataelements", "individual dataelements"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 12791568251594841134, 3705185927483894330, 18446744073709551615, 18446744073709551615, 306, 328, 306, 328, 53, 55, true, "distribution functions", "distribution functions"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 16381206562412792821, 5747237974239301187, 18446744073709551615, 18446744073709551615, 29, 35, 29, 35, 4, 5, true, "course", "course"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 14634130803848280536, 6539927821134743025, 18446744073709551615, 18446744073709551615, 45, 53, 45, 53, 7, 8, true, "template", "template"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 14637918593917529467, 18355240018153108157, 18446744073709551615, 18446744073709551615, 165, 173, 165, 173, 28, 29, true, "features", "features"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 2343822922798056892, 6095175646980864634, 18446744073709551615, 18446744073709551615, 213, 224, 213, 224, 37, 38, true, "consequence", "consequence"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 6187814126721711351, 3003359478748945666, 18446744073709551615, 18446744073709551615, 265, 274, 265, 274, 45, 46, true, "imbalance", "imbalance"], ["term", "single-term", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 389609625696431489, 2843435508709525326, 18446744073709551615, 18446744073709551615, 290, 294, 290, 294, 49, 50, true, "data", "data"], ["verb", "compound-verb", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 11444323110081493576, 8704817656229962117, 18446744073709551615, 18446744073709551615, 231, 244, 231, 244, 40, 42, true, "are typically", "are typically"], ["verb", "compound-verb", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 1544956657377891563, 14235903438134168102, 18446744073709551615, 18446744073709551615, 329, 345, 329, 345, 55, 57, true, "are renormalised", "are renormalised"], ["verb", "single-verb", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 15441160910541486535, 2182812634861815046, 18446744073709551615, 18446744073709551615, 82, 84, 82, 84, 13, 14, true, "is", "is"], ["verb", "single-verb", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 8106464574621932200, 11319020816616427866, 18446744073709551615, 18446744073709551615, 105, 112, 105, 112, 18, 19, true, "meaning", "meaning"], ["verb", "single-verb", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 329104161602730070, 3875356443080677100, 18446744073709551615, 18446744073709551615, 123, 128, 123, 128, 21, 22, true, "learn", "learn"], ["verb", "single-verb", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 8106342931007190203, 4209020374123555528, 18446744073709551615, 18446744073709551615, 282, 289, 282, 289, 48, 49, true, "labeled", "labeled"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 1993790582685692910, 7138562820453798245, 18446744073709551615, 18446744073709551615, 250, 264, 250, 264, 43, 45, true, "robust against", "robust against"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 15441160910541485670, 2182812832524328350, 18446744073709551615, 18446744073709551615, 26, 28, 26, 28, 3, 4, true, "of", "of"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 14637917359887717745, 4260221633890204909, 18446744073709551615, 18446744073709551615, 36, 44, 36, 44, 5, 7, true, "from the", "from the"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 389609625631229034, 2814456960913688391, 18446744073709551615, 18446744073709551615, 113, 117, 113, 117, 19, 20, true, "that", "that"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 16381206566339127348, 5718424238614799049, 18446744073709551615, 18446744073709551615, 129, 135, 129, 135, 22, 24, true, "on the", "on the"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 16381206565712212855, 5705860195075376256, 18446744073709551615, 18446744073709551615, 158, 164, 158, 164, 26, 28, true, "of the", "of the"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 389609625539850184, 2844626104089028763, 18446744073709551615, 18446744073709551615, 208, 212, 208, 212, 35, 37, true, "As a", "As a"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 16381206565712212855, 5705860195075407861, 18446744073709551615, 18446744073709551615, 275, 281, 275, 281, 46, 48, true, "of the", "of the"], ["conn", "single-conn", 7742256726079628058, "TEXT", "#/texts/58", 1.0, 6168057894310307081, 5201584897100688456, 18446744073709551615, 18446744073709551615, 296, 305, 296, 305, 51, 53, true, "since the", "since the"], ["parenthesis", "round brackets", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 18214073896143357061, 14194881149350742521, 18446744073709551615, 18446744073709551615, 361, 386, 361, 386, 66, 74, true, "(normal, italic, or bold)", "(normal, italic, or bold)"], ["sentence", "", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14954795956210947038, 8148012250980112641, 18446744073709551615, 18446744073709551615, 0, 122, 0, 122, 0, 23, true, "The random forest method is applied to each cell of the page based on a feature vector representing all of its properties.", "The random forest method is applied to each cell of the page based on a feature vector representing all of its properties."], ["sentence", "", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 1490071144008831449, 94755760377075300, 18446744073709551615, 18446744073709551615, 123, 289, 123, 289, 23, 56, true, "For example, the feature vector contains information as the page number, the size of the text cell, its position, as well as the distance from the neighbouring cells.", "For example, the feature vector contains information as the page number, the size of the text cell, its position, as well as the distance from the neighbouring cells."], ["sentence", "", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 11740301738178623667, 17550032900348134352, 18446744073709551615, 18446744073709551615, 290, 451, 290, 451, 56, 86, true, "Additionally to pure geometrical information we include the text style (normal, italic, or bold) and some text statistics, as the fraction of numeric characters.", "Additionally to pure geometrical information we include the text style (normal, italic, or bold) and some text statistics, as the fraction of numeric characters."], ["sentence", "", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 11705059311263698902, 5031191200048059765, 18446744073709551615, 18446744073709551615, 452, 689, 452, 689, 86, 122, true, "We then improve the obtained results by performing subsequent iterations with other random forest methods, which operate on an enlarged feature space including the previously predicted labels of the neighbourhood around the current cell.", "We then improve the obtained results by performing subsequent iterations with other random forest methods, which operate on an enlarged feature space including the previously predicted labels of the neighbourhood around the current cell."], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 7925527528304634469, 3204166286438856601, 18446744073709551615, 18446744073709551615, 4, 24, 4, 24, 1, 4, true, "random forest method", "random forest method"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 6742946212951218383, 17384086898033499364, 18446744073709551615, 18446744073709551615, 72, 86, 72, 86, 15, 17, true, "feature vector", "feature vector"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 6742946212951218383, 17384086898033529847, 18446744073709551615, 18446744073709551615, 140, 154, 140, 154, 27, 29, true, "feature vector", "feature vector"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 18169256436041200544, 976921865353389358, 18446744073709551615, 18446744073709551615, 183, 194, 183, 194, 33, 35, true, "page number", "page number"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 3503955255877193443, 7762227561921017193, 18446744073709551615, 18446744073709551615, 212, 221, 212, 221, 40, 42, true, "text cell", "text cell"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 273962029668891890, 13794602264048178557, 18446744073709551615, 18446744073709551615, 311, 334, 311, 334, 59, 61, true, "geometrical information", "geometrical information"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 5748925367277359212, 2937463379481336632, 18446744073709551615, 18446744073709551615, 350, 360, 350, 360, 64, 66, true, "text style", "text style"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8543478576579995429, 5058478595230737997, 18446744073709551615, 18446744073709551615, 396, 411, 396, 411, 76, 78, true, "text statistics", "text statistics"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 3609517846327801127, 8654423261512076457, 18446744073709551615, 18446744073709551615, 432, 450, 432, 450, 83, 85, true, "numeric characters", "numeric characters"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 2494731039743157046, 13423733116746670576, 18446744073709551615, 18446744073709551615, 503, 524, 503, 524, 94, 96, true, "subsequent iterations", "subsequent iterations"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 5742902273463386997, 4995768139577900128, 18446744073709551615, 18446744073709551615, 530, 557, 530, 557, 97, 101, true, "other random forest methods", "other random forest methods"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 15287023664061002798, 10516263643469914706, 18446744073709551615, 18446744073709551615, 579, 601, 579, 601, 106, 109, true, "enlarged feature space", "enlarged feature space"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 5679217233560191195, 14779368924185208427, 18446744073709551615, 18446744073709551615, 676, 688, 676, 688, 119, 121, true, "current cell", "current cell"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 389609625696024605, 9253399486045178962, 18446744073709551615, 18446744073709551615, 44, 48, 44, 48, 8, 9, true, "cell", "cell"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 389609625632301461, 9250410801167727201, 18446744073709551615, 18446744073709551615, 56, 60, 56, 60, 11, 12, true, "page", "page"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14088628410271132453, 13936532626365380428, 18446744073709551615, 18446744073709551615, 111, 121, 111, 121, 21, 22, true, "properties", "properties"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8106397496085150773, 3178546719087309481, 18446744073709551615, 18446744073709551615, 127, 134, 127, 134, 24, 25, true, "example", "example"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14388065630035882329, 7808684742840080268, 18446744073709551615, 18446744073709551615, 164, 175, 164, 175, 30, 31, true, "information", "information"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 389609625741058932, 9253648583843157057, 18446744073709551615, 18446744073709551615, 200, 204, 200, 204, 37, 38, true, "size", "size"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14814126611988074969, 9583758094086294160, 18446744073709551615, 18446744073709551615, 227, 235, 227, 235, 44, 45, true, "position", "position"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14652260393507470214, 16689376904140573845, 18446744073709551615, 18446744073709551615, 252, 260, 252, 260, 50, 51, true, "distance", "distance"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 329104161531686411, 6302060538749910757, 18446744073709551615, 18446744073709551615, 283, 288, 283, 288, 54, 55, true, "cells", "cells"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14637917407223052431, 12528486671610885230, 18446744073709551615, 18446744073709551615, 420, 428, 420, 428, 81, 82, true, "fraction", "fraction"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8106478445190161533, 16793107486304810346, 18446744073709551615, 18446744073709551615, 481, 488, 481, 488, 91, 92, true, "results", "results"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206590740615814, 12391907012289685737, 18446744073709551615, 18446744073709551615, 637, 643, 637, 643, 113, 114, true, "labels", "labels"], ["term", "single-term", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 6287765270733427081, 3171255276160848390, 18446744073709551615, 18446744073709551615, 651, 664, 651, 664, 116, 117, true, "neighbourhood", "neighbourhood"], ["verb", "compound-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 17539759121844497705, 10937437759161684631, 18446744073709551615, 18446744073709551615, 25, 38, 25, 38, 4, 7, true, "is applied to", "is applied to"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 329104159219515955, 5948276622309129008, 18446744073709551615, 18446744073709551615, 61, 66, 61, 66, 12, 13, true, "based", "based"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 9321541094732601010, 1440697959529725234, 18446744073709551615, 18446744073709551615, 87, 99, 87, 99, 17, 18, true, "representing", "representing"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14652282307552191074, 8945592419970363175, 18446744073709551615, 18446744073709551615, 155, 163, 155, 163, 29, 30, true, "contains", "contains"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 819608854126323397, 13629426058076355470, 18446744073709551615, 18446744073709551615, 270, 282, 270, 282, 53, 54, true, "neighbouring", "neighbouring"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 389609625632445688, 9250410036172686662, 18446744073709551615, 18446744073709551615, 306, 310, 306, 310, 58, 59, true, "pure", "pure"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8106398345764800179, 11692781860860359624, 18446744073709551615, 18446744073709551615, 338, 345, 338, 345, 62, 63, true, "include", "include"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8106398106568099440, 984708905024410978, 18446744073709551615, 18446744073709551615, 460, 467, 460, 467, 88, 89, true, "improve", "improve"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14814126654807168093, 9749940858670701320, 18446744073709551615, 18446744073709551615, 472, 480, 472, 480, 90, 91, true, "obtained", "obtained"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 13928988056851964444, 2695484601897645328, 18446744073709551615, 18446744073709551615, 492, 502, 492, 502, 93, 94, true, "performing", "performing"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8106342542940968443, 11775695301087073544, 18446744073709551615, 18446744073709551615, 565, 572, 565, 572, 103, 104, true, "operate", "operate"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 6182652448619835769, 17369145016370015604, 18446744073709551615, 18446744073709551615, 602, 611, 602, 611, 109, 110, true, "including", "including"], ["verb", "single-verb", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 6184954633443293966, 11039243749541224362, 18446744073709551615, 18446744073709551615, 627, 636, 627, 636, 112, 113, true, "predicted", "predicted"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206565712212855, 12155489767757724071, 18446744073709551615, 18446744073709551615, 49, 55, 49, 55, 9, 11, true, "of the", "of the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 389609625618762887, 9250465343613782814, 18446744073709551615, 18446744073709551615, 67, 71, 67, 71, 13, 15, true, "on a", "on a"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206569373536144, 16646083731239545289, 18446744073709551615, 18446744073709551615, 100, 106, 100, 106, 18, 20, true, "all of", "all of"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 12178341415896108722, 6936029374485055754, 18446744073709551615, 18446744073709551615, 123, 126, 123, 126, 23, 24, true, "For", "For"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206568455155979, 16651545914296571149, 18446744073709551615, 18446744073709551615, 176, 182, 176, 182, 31, 33, true, "as the", "as the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206565712212855, 12155489767757717168, 18446744073709551615, 18446744073709551615, 205, 211, 205, 211, 38, 40, true, "of the", "of the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206568455155979, 16651545914296575484, 18446744073709551615, 18446744073709551615, 245, 251, 245, 251, 48, 50, true, "as the", "as the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 14637917359887717745, 6508784519982619302, 18446744073709551615, 18446744073709551615, 261, 269, 261, 269, 51, 53, true, "from the", "from the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206568455155979, 16651545914296653810, 18446744073709551615, 18446744073709551615, 413, 419, 413, 419, 79, 81, true, "as the", "as the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 15441160910541485670, 2853352555985182029, 18446744073709551615, 18446744073709551615, 429, 431, 429, 431, 82, 83, true, "of", "of"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 15441160910541486989, 2853352954143451202, 18446744073709551615, 18446744073709551615, 489, 491, 489, 491, 92, 93, true, "by", "by"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 389609625618037948, 9250626276987800313, 18446744073709551615, 18446744073709551615, 525, 529, 525, 529, 96, 97, true, "with", "with"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 329104161572724641, 6270983890392409045, 18446744073709551615, 18446744073709551615, 573, 578, 573, 578, 104, 106, true, "on an", "on an"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 16381206565712212855, 12155489767757875831, 18446744073709551615, 18446744073709551615, 644, 650, 644, 650, 114, 116, true, "of the", "of the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 15388840276945242407, 564998623247738541, 18446744073709551615, 18446744073709551615, 665, 675, 665, 675, 117, 119, true, "around the", "around the"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 8106351192289801590, 15045507645878621842, 18446744073709551615, 18446744073709551615, 36, 43, 36, 43, 6, 8, true, "to each", "to each"], ["conn", "single-conn", 8810233123818174294, "TEXT", "#/texts/59", 1.0, 15441160910541485865, 2853352555402880464, 18446744073709551615, 18446744073709551615, 303, 305, 303, 305, 57, 58, true, "to", "to"], ["sentence", "", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 6373769167897665877, 13936013081570568770, 18446744073709551615, 18446744073709551615, 0, 84, 0, 84, 0, 15, true, "It is important to realize that almost all of these features are purely geometrical.", "It is important to realize that almost all of these features are purely geometrical."], ["sentence", "", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 13043292274507419925, 9292264259899558631, 18446744073709551615, 18446744073709551615, 85, 198, 85, 198, 15, 34, true, "This allows us to apply exactly the same machine learning methods on both scanned and programmatic PDF documents.", "This allows us to apply exactly the same machine learning methods on both scanned and programmatic PDF documents."], ["term", "single-term", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 4047408680480058129, 791889066255431706, 18446744073709551615, 18446744073709551615, 121, 133, 121, 133, 22, 24, true, "same machine", "same machine"], ["term", "single-term", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 1482873404926828774, 10226222577235274804, 18446744073709551615, 18446744073709551615, 171, 197, 171, 197, 30, 33, true, "programmatic PDF documents", "programmatic PDF documents"], ["term", "single-term", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 14637918593917529467, 8112106004831142076, 18446744073709551615, 18446744073709551615, 52, 60, 52, 60, 10, 11, true, "features", "features"], ["term", "single-term", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 8106464574531629743, 10962337505744550461, 18446744073709551615, 18446744073709551615, 143, 150, 143, 150, 25, 26, true, "methods", "methods"], ["verb", "compound-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 15388942013532414882, 7899202709814799236, 18446744073709551615, 18446744073709551615, 61, 71, 61, 71, 11, 13, true, "are purely", "are purely"], ["verb", "compound-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 3371194906505970753, 16420304959018069903, 18446744073709551615, 18446744073709551615, 103, 116, 103, 116, 19, 21, true, "apply exactly", "apply exactly"], ["verb", "single-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 15441160910541486535, 11324533273861188960, 18446744073709551615, 18446744073709551615, 3, 5, 3, 5, 1, 2, true, "is", "is"], ["verb", "single-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 8106478449187824165, 11112254232972886588, 18446744073709551615, 18446744073709551615, 19, 26, 19, 26, 4, 5, true, "realize", "realize"], ["verb", "single-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 16381206569317834029, 9402022932348997822, 18446744073709551615, 18446744073709551615, 90, 96, 90, 96, 16, 17, true, "allows", "allows"], ["verb", "single-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 14639581097006750428, 9268359803668835067, 18446744073709551615, 18446744073709551615, 134, 142, 134, 142, 24, 25, true, "learning", "learning"], ["verb", "single-verb", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 8106478648743879659, 9110333708159415479, 18446744073709551615, 18446744073709551615, 159, 166, 159, 166, 28, 29, true, "scanned", "scanned"], ["conn", "single-conn", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 389609625631229034, 15586907952937505973, 18446744073709551615, 18446744073709551615, 27, 31, 27, 31, 5, 6, true, "that", "that"], ["conn", "single-conn", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 7969468038485950075, 10183770825831524298, 18446744073709551615, 18446744073709551615, 39, 51, 39, 51, 7, 10, true, "all of these", "all of these"], ["conn", "single-conn", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 8106342614190349012, 9383049340028722314, 18446744073709551615, 18446744073709551615, 151, 158, 151, 158, 26, 28, true, "on both", "on both"], ["conn", "single-conn", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 15441160910541485865, 11324533292921284119, 18446744073709551615, 18446744073709551615, 16, 18, 16, 18, 3, 4, true, "to", "to"], ["conn", "single-conn", 16446711449286912460, "TEXT", "#/texts/60", 1.0, 15441160910541485865, 11324533292921287046, 18446744073709551615, 18446744073709551615, 100, 102, 100, 102, 18, 19, true, "to", "to"], ["numval", "ival", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 17767354399704235162, 5756427125969625434, 18446744073709551615, 18446744073709551615, 9, 10, 9, 10, 2, 3, true, "2", "2"], ["numval", "ival", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 12178341415896426714, 2843618427898669081, 18446744073709551615, 18446744073709551615, 145, 148, 145, 148, 25, 26, true, "100", "100"], ["numval", "ival", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 12178341415896306457, 2843616608679996346, 18446744073709551615, 18446744073709551615, 182, 185, 182, 185, 30, 31, true, "400", "400"], ["numval", "ival", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 17767354399704235158, 5756427126305291768, 18446744073709551615, 18446744073709551615, 205, 206, 205, 206, 35, 36, true, "6", "6"], ["numval", "ival", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 17767354399704235162, 5756427125969610002, 18446744073709551615, 18446744073709551615, 231, 232, 231, 232, 40, 41, true, "2", "2"], ["expression", "word-concatenation", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 17169426656242239826, 9947433032897910154, 18446744073709551615, 18446744073709551615, 149, 160, 149, 160, 26, 27, true, "open-access", "open-access"], ["expression", "wtoken-concatenation", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206535642250146, 7407819848928651078, 18446744073709551615, 18446744073709551615, 117, 125, 117, 125, 20, 21, true, "B^{12}", "B$^{12}$"], ["expression", "wtoken-concatenation", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 12178341415896195376, 2843610961077514238, 18446744073709551615, 18446744073709551615, 458, 461, 458, 461, 81, 82, true, "99%", "99%"], ["sentence", "", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16985873230757285690, 4799033660507198173, 18446744073709551615, 18446744073709551615, 0, 126, 0, 126, 0, 22, true, "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$.", "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$."], ["sentence", "", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 9489295008065044229, 10701823520823813640, 18446744073709551615, 18446744073709551615, 127, 223, 127, 223, 22, 39, true, "We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels.", "We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels."], ["sentence", "", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 9055398559525017790, 8115071261852986143, 18446744073709551615, 18446744073709551615, 224, 369, 224, 369, 39, 65, true, "Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label.", "Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label."], ["sentence", "", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 9522204688480473546, 14969526070984716859, 18446744073709551615, 18446744073709551615, 370, 462, 370, 462, 65, 83, true, "We observe that the recall and precision numbers are excellent, with most of them above 99%.", "We observe that the recall and precision numbers are excellent, with most of them above 99%."], ["sentence", "", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 1074921700746209986, 10442039656480201719, 18446744073709551615, 18446744073709551615, 463, 558, 463, 558, 83, 100, true, "This is not surprising, since we are building models that specialise for a particular template.", "This is not surprising, since we are building models that specialise for a particular template."], ["term", "enum-term-mark-2", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 11037453576911667853, 1588379900983990272, 18446744073709551615, 18446744073709551615, 325, 345, 325, 345, 57, 60, true, "recall and precision", "recall and precision"], ["term", "enum-term-mark-2", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 11037453576911667853, 1588379900983994608, 18446744073709551615, 18446744073709551615, 390, 410, 390, 410, 69, 72, true, "recall and precision", "recall and precision"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 7309351122725453953, 337257349231081751, 18446744073709551615, 18446744073709551615, 30, 49, 30, 49, 7, 9, true, "performance results", "performance results"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16684308181841452846, 17463669784208672593, 18446744073709551615, 18446744073709551615, 70, 99, 70, 99, 14, 17, true, "particular scientific journal", "particular scientific journal"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16636937637740271145, 5405861143207605185, 18446744073709551615, 18446744073709551615, 101, 125, 101, 125, 18, 21, true, "Physical Review B^{12}", "Physical Review B$^{12}$"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 3970679774622312652, 3451308206383338191, 18446744073709551615, 18446744073709551615, 149, 167, 149, 167, 26, 28, true, "open-access papers", "open-access papers"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 17144395416522725511, 2403796765467765884, 18446744073709551615, 18446744073709551615, 207, 222, 207, 222, 36, 38, true, "semantic labels", "semantic labels"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 5497358094214601811, 15347237438195368414, 18446744073709551615, 18446744073709551615, 243, 259, 243, 259, 43, 45, true, "confusion matrix", "confusion matrix"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 17430011809584307966, 717352244476807441, 18446744073709551615, 18446744073709551615, 317, 331, 317, 331, 56, 58, true, "derived recall", "derived recall"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 13620323371457554126, 402277245215938637, 18446744073709551615, 18446744073709551615, 336, 353, 336, 353, 59, 61, true, "precision metrics", "precision metrics"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 13620323047369748942, 9949010308293253292, 18446744073709551615, 18446744073709551615, 401, 418, 401, 418, 71, 73, true, "precision numbers", "precision numbers"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 6423268802182337214, 8832367853516231633, 18446744073709551615, 18446744073709551615, 538, 557, 538, 557, 97, 99, true, "particular template", "particular template"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206567230470443, 6809318760293640371, 18446744073709551615, 18446744073709551615, 57, 63, 57, 63, 11, 12, true, "models", "models"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161667992688, 12417150832156993513, 18446744073709551615, 18446744073709551615, 186, 191, 186, 191, 31, 32, true, "pages", "pages"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206490439671949, 6475152685903048525, 18446744073709551615, 18446744073709551615, 224, 230, 224, 230, 39, 40, true, "Tables", "Tables"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161786359270, 12407069593524672865, 18446744073709551615, 18446744073709551615, 233, 238, 233, 238, 41, 42, true, "shows", "shows"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206590740615814, 3989685327840662497, 18446744073709551615, 18446744073709551615, 295, 301, 295, 301, 51, 52, true, "labels", "labels"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161624445793, 12403043577860159502, 18446744073709551615, 18446744073709551615, 363, 368, 363, 368, 63, 64, true, "label", "label"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206521531485437, 5908878450625800036, 18446744073709551615, 18446744073709551615, 390, 396, 390, 396, 69, 70, true, "recall", "recall"], ["term", "single-term", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206567230470443, 6809318760293617748, 18446744073709551615, 18446744073709551615, 509, 515, 509, 515, 92, 93, true, "models", "models"], ["verb", "compound-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206478391039341, 6631544301857728246, 18446744073709551615, 18446744073709551615, 468, 474, 468, 474, 84, 86, true, "is not", "is not"], ["verb", "compound-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 12677514500900765355, 13122361205873164518, 18446744073709551615, 18446744073709551615, 496, 508, 496, 508, 90, 92, true, "are building", "are building"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 2512422596140069222, 889649029264302336, 18446744073709551615, 18446744073709551615, 15, 25, 15, 25, 5, 6, true, "illustrate", "illustrate"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161556620669, 12424395445817823224, 18446744073709551615, 18446744073709551615, 139, 144, 139, 144, 24, 25, true, "chose", "chose"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 5946726816546568286, 13670733632578799336, 18446744073709551615, 18446744073709551615, 172, 181, 172, 181, 29, 30, true, "annotated", "annotated"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 6184954633443293966, 6350969766554330414, 18446744073709551615, 18446744073709551615, 285, 294, 285, 294, 50, 51, true, "predicted", "predicted"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 8106342033696543838, 3620688114961400856, 18446744073709551615, 18446744073709551615, 373, 380, 373, 380, 66, 67, true, "observe", "observe"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 12178341415895564896, 2843664315664361569, 18446744073709551615, 18446744073709551615, 419, 422, 419, 422, 73, 74, true, "are", "are"], ["verb", "single-verb", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 14105928813319051554, 14261583300981947149, 18446744073709551615, 18446744073709551615, 521, 531, 521, 531, 94, 95, true, "specialise", "specialise"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 15441160910541480354, 13052544442623061343, 18446744073709551615, 18446744073709551615, 0, 2, 0, 2, 0, 1, true, "In", "In"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206565712212855, 6848890042535190429, 18446744073709551615, 18446744073709551615, 50, 56, 50, 56, 9, 11, true, "of the", "of the"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161711024499, 12417932501529865029, 18446744073709551615, 18446744073709551615, 64, 69, 64, 69, 12, 14, true, "for a", "for a"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 15441160910541485670, 13052544660078805351, 18446744073709551615, 18446744073709551615, 192, 194, 192, 194, 32, 33, true, "of", "of"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 389609625618037948, 2187892054443163370, 18446744073709551615, 18446744073709551615, 200, 204, 200, 204, 34, 35, true, "with", "with"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 2011002864325523456, 4381318699110906503, 18446744073709551615, 18446744073709551615, 260, 271, 260, 271, 45, 47, true, "between the", "between the"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 16381206568455155979, 7803994972436827427, 18446744073709551615, 18446744073709551615, 310, 316, 310, 316, 54, 56, true, "as the", "as the"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 14637917333167503367, 3527732148100938158, 18446744073709551615, 18446744073709551615, 354, 362, 354, 362, 61, 63, true, "for each", "for each"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 14634130761162415388, 6412837279061811049, 18446744073709551615, 18446744073709551615, 381, 389, 381, 389, 67, 69, true, "that the", "that the"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 389609625618037948, 2187892054439452539, 18446744073709551615, 18446744073709551615, 434, 438, 434, 438, 76, 77, true, "with", "with"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 15441160910541485670, 13052544660078698144, 18446744073709551615, 18446744073709551615, 444, 446, 444, 446, 78, 79, true, "of", "of"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104159273688418, 11826923278090038152, 18446744073709551615, 18446744073709551615, 452, 457, 452, 457, 80, 81, true, "above", "above"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161786618045, 12406057958801194093, 18446744073709551615, 18446744073709551615, 487, 492, 487, 492, 88, 89, true, "since", "since"], ["conn", "single-conn", 9558434107504657973, "TEXT", "#/texts/61", 1.0, 329104161711024499, 12417932501529436504, 18446744073709551615, 18446744073709551615, 532, 537, 532, 537, 95, 97, true, "for a", "for a"], ["numval", "ival", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 17767354399704235163, 3904423922360684838, 18446744073709551615, 18446744073709551615, 139, 140, 139, 140, 25, 26, true, "3", "3"], ["expression", "word-concatenation", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 16381206521481257058, 14297813622105226054, 18446744073709551615, 18446744073709551615, 250, 256, 250, 256, 49, 50, true, "re-use", "re-use"], ["expression", "word-concatenation", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 6307689511527468252, 1214499694592820290, 18446744073709551615, 18446744073709551615, 266, 282, 266, 282, 52, 53, true, "machine-learning", "machine-learning"], ["sentence", "", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 13329249844625854201, 11647195949968361009, 18446744073709551615, 18446744073709551615, 0, 201, 0, 201, 0, 38, true, "Moreover, the same ML algorithm proves to perform very well on different document templates, as is evident from the numbers shown in Table 3, simply by providing it with different datasets to train on.", "Moreover, the same ML algorithm proves to perform very well on different document templates, as is evident from the numbers shown in Table 3, simply by providing it with different datasets to train on."], ["sentence", "", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14866244958236799246, 1582241290890638298, 18446744073709551615, 18446744073709551615, 202, 390, 202, 390, 38, 71, true, "The latter is the power of our platform: we can re-use the same machine-learning algorithm to generate different models solely based on the data gathered by the annotation on the platform.", "The latter is the power of our platform: we can re-use the same machine-learning algorithm to generate different models solely based on the data gathered by the annotation on the platform."], ["sentence", "", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 7056640689505297457, 15898107189486909355, 18446744073709551615, 18446744073709551615, 391, 497, 391, 497, 71, 93, true, "We do not need to define rules and heuristics or update code in order to deal with new types of documents.", "We do not need to define rules and heuristics or update code in order to deal with new types of documents."], ["sentence", "", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 10417108364281472724, 12592678685937578318, 18446744073709551615, 18446744073709551615, 498, 531, 498, 531, 93, 101, true, "We only need to gather more data.", "We only need to gather more data."], ["term", "enum-term-mark-3", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14516628572457889571, 3134557747501443101, 18446744073709551615, 18446744073709551615, 416, 436, 416, 436, 77, 80, true, "rules and heuristics", "rules and heuristics"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14797292965502676681, 16865966694447962424, 18446744073709551615, 18446744073709551615, 14, 31, 14, 31, 3, 6, true, "same ML algorithm", "same ML algorithm"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14139930886739072217, 17176244257802466831, 18446744073709551615, 18446744073709551615, 63, 91, 63, 91, 12, 15, true, "different document templates", "different document templates"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 5449238095598290102, 787778668063090810, 18446744073709551615, 18446744073709551615, 170, 188, 170, 188, 32, 34, true, "different datasets", "different datasets"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 3259939013257966773, 11632558026542658713, 18446744073709551615, 18446744073709551615, 261, 292, 261, 292, 51, 54, true, "same machine-learning algorithm", "same machine-learning algorithm"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 220880082258103430, 3861022815522000004, 18446744073709551615, 18446744073709551615, 305, 321, 305, 321, 56, 58, true, "different models", "different models"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 6774260833957264902, 18407524815623595945, 18446744073709551615, 18446744073709551615, 440, 451, 440, 451, 81, 83, true, "update code", "update code"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 6172031744431621751, 3978488892788269949, 18446744073709551615, 18446744073709551615, 474, 483, 474, 483, 88, 90, true, "new types", "new types"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 8106352625329644634, 17757554404763683613, 18446744073709551615, 18446744073709551615, 116, 123, 116, 123, 21, 22, true, "numbers", "numbers"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 16381206590630461421, 3492035996256028050, 18446744073709551615, 18446744073709551615, 206, 212, 206, 212, 39, 40, true, "latter", "latter"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 329104161594617373, 7515263185749906232, 18446744073709551615, 18446744073709551615, 220, 225, 220, 225, 42, 43, true, "power", "power"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14814125365076808131, 3729402058695751624, 18446744073709551615, 18446744073709551615, 233, 241, 233, 241, 45, 46, true, "platform", "platform"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 389609625696431489, 7894812289861145312, 18446744073709551615, 18446744073709551615, 342, 346, 342, 346, 62, 63, true, "data", "data"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15359807916847495711, 4163871681010445755, 18446744073709551615, 18446744073709551615, 363, 373, 363, 373, 66, 67, true, "annotation", "annotation"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14814125365076808131, 3729402058695741659, 18446744073709551615, 18446744073709551615, 381, 389, 381, 389, 69, 70, true, "platform", "platform"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 329104161825278214, 8012832663492087338, 18446744073709551615, 18446744073709551615, 416, 421, 416, 421, 77, 78, true, "rules", "rules"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15990705612308896517, 3857150851002098021, 18446744073709551615, 18446744073709551615, 426, 436, 426, 436, 79, 80, true, "heuristics", "heuristics"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 329104161571401725, 7516826412201686197, 18446744073709551615, 18446744073709551615, 455, 460, 455, 460, 84, 85, true, "order", "order"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 6167933651658664291, 13301884288216877389, 18446744073709551615, 18446744073709551615, 487, 496, 487, 496, 91, 92, true, "documents", "documents"], ["term", "single-term", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 389609625696431489, 7894812289861139762, 18446744073709551615, 18446744073709551615, 526, 530, 526, 530, 99, 100, true, "data", "data"], ["verb", "compound-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14747077774534403201, 7105556452324242867, 18446744073709551615, 18446744073709551615, 32, 59, 32, 59, 6, 11, true, "proves to perform very well", "proves to perform very well"], ["verb", "compound-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14239158811794973922, 10315187407549061159, 18446744073709551615, 18446744073709551615, 394, 415, 394, 415, 72, 77, true, "do not need to define", "do not need to define"], ["verb", "compound-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 16971139356283959223, 11139264805031781559, 18446744073709551615, 18446744073709551615, 506, 520, 506, 520, 95, 98, true, "need to gather", "need to gather"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541486535, 15153334710149597966, 18446744073709551615, 18446744073709551615, 96, 98, 96, 98, 17, 18, true, "is", "is"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 329104161786359265, 8061425452439289620, 18446744073709551615, 18446744073709551615, 124, 129, 124, 129, 22, 23, true, "shown", "shown"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 6171748239210728040, 9366341145459489170, 18446744073709551615, 18446744073709551615, 152, 161, 152, 161, 29, 30, true, "providing", "providing"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 329104159241569908, 7610535763181481526, 18446744073709551615, 18446744073709551615, 192, 197, 192, 197, 35, 36, true, "train", "train"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541486535, 15153334710149605791, 18446744073709551615, 18446744073709551615, 213, 215, 213, 215, 40, 41, true, "is", "is"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 2873469186180050816, 16698193277509785614, 18446744073709551615, 18446744073709551615, 246, 256, 246, 256, 48, 50, true, "can re-use", "can re-use"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14639576584100401389, 15397542964574277795, 18446744073709551615, 18446744073709551615, 296, 304, 296, 304, 55, 56, true, "generate", "generate"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 329104159219515955, 7608650457402937501, 18446744073709551615, 18446744073709551615, 329, 334, 329, 334, 59, 60, true, "based", "based"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 14639712996089786853, 8663919117501187161, 18446744073709551615, 18446744073709551615, 347, 355, 347, 355, 63, 64, true, "gathered", "gathered"], ["verb", "single-verb", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 389609625696287852, 7895900577300378447, 18446744073709551615, 18446744073709551615, 464, 468, 464, 468, 86, 87, true, "deal", "deal"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 3610960565523737115, 13561712172663392615, 18446744073709551615, 18446744073709551615, 99, 111, 99, 111, 18, 20, true, "evident from", "evident from"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485678, 15153334713242377590, 18446744073709551615, 18446744073709551615, 60, 62, 60, 62, 11, 12, true, "on", "on"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541487053, 15153334931560349328, 18446744073709551615, 18446744073709551615, 93, 95, 93, 95, 16, 17, true, "as", "as"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541486538, 15153334711652936703, 18446744073709551615, 18446744073709551615, 130, 132, 130, 132, 23, 24, true, "in", "in"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541486989, 15153334930388051925, 18446744073709551615, 18446744073709551615, 149, 151, 149, 151, 28, 29, true, "by", "by"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 389609625618037948, 7897898173905938343, 18446744073709551615, 18446744073709551615, 165, 169, 165, 169, 31, 32, true, "with", "with"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485670, 15153334714140199582, 18446744073709551615, 18446744073709551615, 226, 228, 226, 228, 43, 44, true, "of", "of"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 16381206566339127348, 14669148860732540835, 18446744073709551615, 18446744073709551615, 335, 341, 335, 341, 60, 62, true, "on the", "on the"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 16381206574363061705, 2766189990559691740, 18446744073709551615, 18446744073709551615, 356, 362, 356, 362, 64, 66, true, "by the", "by the"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 16381206566339127348, 14669148860732601697, 18446744073709551615, 18446744073709551615, 374, 380, 374, 380, 67, 69, true, "on the", "on the"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541486538, 15153334711652948392, 18446744073709551615, 18446744073709551615, 452, 454, 452, 454, 83, 84, true, "in", "in"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 389609625618037948, 7897898173905972427, 18446744073709551615, 18446744073709551615, 469, 473, 469, 473, 87, 88, true, "with", "with"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485670, 15153334714140216286, 18446744073709551615, 18446744073709551615, 484, 486, 484, 486, 90, 91, true, "of", "of"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485865, 15153334765432013192, 18446744073709551615, 18446744073709551615, 39, 41, 39, 41, 7, 8, true, "to", "to"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485865, 15153334765432022097, 18446744073709551615, 18446744073709551615, 189, 191, 189, 191, 34, 35, true, "to", "to"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485865, 15153334765432033275, 18446744073709551615, 18446744073709551615, 293, 295, 293, 295, 54, 55, true, "to", "to"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485865, 15153334765432040505, 18446744073709551615, 18446744073709551615, 406, 408, 406, 408, 75, 76, true, "to", "to"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485865, 15153334765432019581, 18446744073709551615, 18446744073709551615, 461, 463, 461, 463, 85, 86, true, "to", "to"], ["conn", "single-conn", 18349896906192842040, "TEXT", "#/texts/62", 1.0, 15441160910541485865, 15153334765432014978, 18446744073709551615, 18446744073709551615, 511, 513, 511, 513, 96, 97, true, "to", "to"], ["numval", "fval", 10082834006373808153, "TEXT", "#/texts/63", 1.0, 12178341415896435186, 12354888335591318213, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "3.5", "3.5"], ["parenthesis", "round brackets", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 1949057018516412029, 4731319093472217309, 18446744073709551615, 18446744073709551615, 114, 127, 114, 127, 23, 27, true, "(e.g. tables)", "(e.g. tables)"], ["expression", "common", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 15441160910541487324, 7536681162710661076, 18446744073709551615, 18446744073709551615, 115, 119, 115, 119, 24, 25, true, "eg", "e.g."], ["sentence", "", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 5437557385871984946, 3509044475987188634, 18446744073709551615, 18446744073709551615, 0, 187, 0, 187, 0, 37, true, "In this component, we build a structured data file in JSON or XML format, which contains all the text and objects (e.g. tables) from the original document, retaining the layout semantics.", "In this component, we build a structured data file in JSON or XML format, which contains all the text and objects (e.g. tables) from the original document, retaining the layout semantics."], ["term", "enum-term-mark-4", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 11674491770136657522, 974643053390547455, 18446744073709551615, 18446744073709551615, 54, 65, 54, 65, 11, 14, true, "JSON or XML", "JSON or XML"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 3435211303988053560, 11739782976917057217, 18446744073709551615, 18446744073709551615, 30, 50, 30, 50, 7, 10, true, "structured data file", "structured data file"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 15966067594173682327, 4212498403530896421, 18446744073709551615, 18446744073709551615, 62, 72, 62, 72, 13, 15, true, "XML format", "XML format"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 6167884912352185035, 12959445755126332733, 18446744073709551615, 18446744073709551615, 115, 126, 115, 126, 24, 26, true, "eg tables", "e.g. tables"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 11738704476441755021, 12646170625007623252, 18446744073709551615, 18446744073709551615, 137, 154, 137, 154, 29, 31, true, "original document", "original document"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 8089093614662532807, 16534350903274590994, 18446744073709551615, 18446744073709551615, 170, 186, 170, 186, 34, 36, true, "layout semantics", "layout semantics"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 3435211303988053560, 11739782976916952568, 18446744073709551615, 18446744073709551615, 193, 213, 193, 213, 38, 41, true, "structured data file", "structured data file"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 18223109722832599454, 8892683979055541833, 18446744073709551615, 18446744073709551615, 266, 277, 266, 277, 50, 52, true, "parsed file", "parsed file"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 5947879501615734370, 16723148886486918663, 18446744073709551615, 18446744073709551615, 8, 17, 8, 17, 2, 3, true, "component", "component"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 389609625541450799, 17523067649600904691, 18446744073709551615, 18446744073709551615, 54, 58, 54, 58, 11, 12, true, "JSON", "JSON"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 389609625631325904, 17516519717884761463, 18446744073709551615, 18446744073709551615, 97, 101, 97, 101, 20, 21, true, "text", "text"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 8106342034010873556, 2969114644928140757, 18446744073709551615, 18446744073709551615, 106, 113, 106, 113, 22, 23, true, "objects", "objects"], ["term", "single-term", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 329104161531686411, 1734187893073550921, 18446744073709551615, 18446744073709551615, 251, 256, 251, 256, 47, 48, true, "cells", "cells"], ["verb", "compound-verb", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 13995199617429053628, 17399804092489638859, 18446744073709551615, 18446744073709551615, 214, 228, 214, 228, 41, 43, true, "is constructed", "is constructed"], ["verb", "single-verb", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 329104159303279946, 1921849107251509286, 18446744073709551615, 18446744073709551615, 22, 27, 22, 27, 5, 6, true, "build", "build"], ["verb", "single-verb", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 14652282307552191074, 8217532366089348940, 18446744073709551615, 18446744073709551615, 80, 88, 80, 88, 17, 18, true, "contains", "contains"], ["verb", "single-verb", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 6168253838748177623, 17899337151654565740, 18446744073709551615, 18446744073709551615, 156, 165, 156, 165, 32, 33, true, "retaining", "retaining"], ["verb", "single-verb", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 5615554093848987331, 567272589042250740, 18446744073709551615, 18446744073709551615, 232, 242, 232, 242, 44, 45, true, "assembling", "assembling"], ["conn", "single-conn", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 8106396862006371970, 13010212846614719649, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 2, true, "In this", "In this"], ["conn", "single-conn", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 15441160910541486538, 7536681282669100229, 18446744073709551615, 18446744073709551615, 51, 53, 51, 53, 10, 11, true, "in", "in"], ["conn", "single-conn", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 14637917359887717745, 313033658418924384, 18446744073709551615, 18446744073709551615, 128, 136, 128, 136, 27, 29, true, "from the", "from the"], ["conn", "single-conn", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 15441160910541486989, 7536681162200555270, 18446744073709551615, 18446744073709551615, 229, 231, 229, 231, 43, 44, true, "by", "by"], ["conn", "single-conn", 15253541252152665681, "TEXT", "#/texts/64", 1.0, 14637917359887717745, 313033658418932224, 18446744073709551615, 18446744073709551615, 257, 265, 257, 265, 48, 50, true, "from the", "from the"], ["numval", "ival", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 17767354399704235161, 5588649276010093912, 18446744073709551615, 18446744073709551615, 8, 9, 8, 9, 1, 2, true, "1", "1"], ["sentence", "", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 4079383948124449940, 3119139511864959531, 18446744073709551615, 18446744073709551615, 0, 104, 0, 104, 0, 19, true, "Listing 1: Excerpt from the JSON output of the Corpus Conversion Service after conversion of this paper.", "Listing 1: Excerpt from the JSON output of the Corpus Conversion Service after conversion of this paper."], ["term", "single-term", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 11674491770136880709, 5217965483515470753, 18446744073709551615, 18446744073709551615, 28, 39, 28, 39, 6, 8, true, "JSON output", "JSON output"], ["term", "single-term", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 12638008641667971393, 2522219315756212794, 18446744073709551615, 18446744073709551615, 47, 72, 47, 72, 10, 13, true, "Corpus Conversion Service", "Corpus Conversion Service"], ["term", "single-term", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 2703018679320364082, 2865230855669483164, 18446744073709551615, 18446744073709551615, 79, 89, 79, 89, 14, 15, true, "conversion", "conversion"], ["term", "single-term", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 329104161668023890, 9676341964876116743, 18446744073709551615, 18446744073709551615, 98, 103, 98, 103, 17, 18, true, "paper", "paper"], ["verb", "single-verb", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 8106471806274607440, 321004264845765781, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 1, true, "Listing", "Listing"], ["conn", "single-conn", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 14637917359887717745, 10704083857127895113, 18446744073709551615, 18446744073709551615, 19, 27, 19, 27, 4, 6, true, "from the", "from the"], ["conn", "single-conn", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 16381206565712212855, 12911931045271253420, 18446744073709551615, 18446744073709551615, 40, 46, 40, 46, 8, 10, true, "of the", "of the"], ["conn", "single-conn", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 329104159268432372, 9679527653046328826, 18446744073709551615, 18446744073709551615, 73, 78, 73, 78, 13, 14, true, "after", "after"], ["conn", "single-conn", 3904142170608486950, "TEXT", "#/texts/65", 1.0, 8106342927224204628, 535620869506699266, 18446744073709551615, 18446744073709551615, 90, 97, 90, 97, 15, 17, true, "of this", "of this"], ["reference", "author", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 1571808557594152175, 14918332605162209401, 18446744073709551615, 18446744073709551615, 302, 315, 298, 311, 72, 74, true, "Michele Dolfi", "Michele Dolfi"], ["reference", "author", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 9737597816447750448, 9883948295774882902, 18446744073709551615, 18446744073709551615, 317, 331, 313, 327, 75, 77, true, "Christoph Auer", "Christoph Auer"], ["reference", "author", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 10999349626623612055, 12887976605120007677, 18446744073709551615, 18446744073709551615, 333, 345, 329, 341, 78, 80, true, "Costas Bekas", "Costas Bekas"], ["reference", "container-title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 12178341415896423241, 31391931283795274, 18446744073709551615, 18446744073709551615, 669, 672, 663, 666, 192, 193, true, "...", "..."], ["reference", "doi", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 17767354399704339002, 18295136162745407537, 18446744073709551615, 18446744073709551615, 349, 352, 345, 346, 82, 83, true, "\u2192", "\u2192"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 9199864015056444061, 9438312166855235376, 18446744073709551615, 18446744073709551615, 3, 17, 3, 17, 2, 5, true, "description ':", "description ':"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 13493359061554120914, 13299266397273620987, 18446744073709551615, 18446744073709551615, 20, 114, 20, 114, 6, 24, true, "'title ': 'Corpus Conversion Service: A machine learning platform to ingest documents at scale", "'title ': 'Corpus Conversion Service: A machine learning platform to ingest documents at scale"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 9861845143209339960, 199840760810459937, 18446744073709551615, 18446744073709551615, 125, 204, 123, 202, 30, 46, true, "abstract ': 'Over the past few decades, the amount of scientific articles [...]", "abstract ': 'Over the past few decades, the amount of scientific articles [...]"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 12622897476193339100, 2200514302757334256, 18446744073709551615, 18446744073709551615, 214, 255, 210, 251, 51, 58, true, "affiliations ': 'IBM Research Rueschlikon", "affiliations ': 'IBM Research Rueschlikon"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 11373032339986566896, 498694585844234180, 18446744073709551615, 18446744073709551615, 273, 300, 269, 296, 63, 71, true, "authors ': 'Peter W J Staar", "authors ': 'Peter W J Staar"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 10805145766135589992, 17856382738633318412, 18446744073709551615, 18446744073709551615, 357, 369, 351, 363, 86, 89, true, "main-text ':", "main-text ':"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8106396936981307458, 561260012869256244, 18446744073709551615, 18446744073709551615, 386, 393, 380, 387, 98, 101, true, "bbox ':", "bbox ':"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 17382646052484108955, 4493112625750578679, 18446744073709551615, 18446744073709551615, 395, 428, 389, 422, 102, 109, true, "52.304, 509.750, 168.099, 523.980", "52.304, 509.750, 168.099, 523.980"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 389609625632301461, 11826274251713036201, 18446744073709551615, 18446744073709551615, 432, 436, 426, 430, 112, 113, true, "page", "page"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 16094569755191349317, 16867272237372953839, 18446744073709551615, 18446744073709551615, 447, 472, 441, 466, 120, 125, true, "type ': 'subtitle-level-1", "type ': 'subtitle-level-1"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 3111807266741094717, 15278207992089561669, 18446744073709551615, 18446744073709551615, 477, 500, 471, 494, 128, 134, true, "text ': '1 INTRODUCTION", "text ': '1 INTRODUCTION"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 8106396936981307458, 561260012869264469, 18446744073709551615, 18446744073709551615, 521, 528, 515, 522, 145, 148, true, "bbox ':", "bbox ':"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 1814749400405805993, 510617230931178539, 18446744073709551615, 18446744073709551615, 530, 563, 524, 557, 149, 156, true, "52.304, 337.678, 286.067, 380.475", "52.304, 337.678, 286.067, 380.475"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 389609625632301461, 11826274251713027840, 18446744073709551615, 18446744073709551615, 567, 571, 561, 565, 159, 160, true, "page", "page"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 1634892833417271407, 13158357529214346435, 18446744073709551615, 18446744073709551615, 582, 600, 576, 594, 167, 172, true, "type ': 'paragraph", "type ': 'paragraph"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 14229845479552842969, 3618192917108068284, 18446744073709551615, 18446744073709551615, 605, 663, 599, 657, 175, 188, true, "text ': 'It is estimated that [...] put these into context", "text ': 'It is estimated that [...] put these into context"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 11268829192391503205, 15565340236233210764, 18446744073709551615, 18446744073709551615, 676, 697, 670, 691, 196, 200, true, "tables ': [{...},...]", "tables ': [{...},...]"], ["reference", "title", 6410818076508661508, "TEXT", "#/texts/66", 1.0, 3991782103663973419, 11178135656452670260, 18446744073709551615, 18446744073709551615, 700, 721, 694, 715, 202, 206, true, "images ': [{...},...]", "images ': [{...},...]"], ["parenthesis", "round brackets", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 11589998698201685701, 2108045663283293889, 18446744073709551615, 18446744073709551615, 47, 67, 47, 67, 6, 12, true, "(or human-annotated)", "(or human-annotated)"], ["sentence", "", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 3678194766815209883, 5187453010508481258, 18446744073709551615, 18446744073709551615, 92, 162, 92, 162, 16, 30, true, "It should be noted that no machine learning is used in this component.", "It should be noted that no machine learning is used in this component."], ["sentence", "", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 6629453114376390697, 8094247588633397965, 18446744073709551615, 18446744073709551615, 163, 226, 163, 226, 30, 40, true, "It is purely rule based and therefore completely deterministic.", "It is purely rule based and therefore completely deterministic."], ["term", "single-term", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 7093698410186659732, 515265999464842782, 18446744073709551615, 18446744073709551615, 68, 90, 68, 90, 12, 15, true, "layout semantic labels", "layout semantic labels"], ["term", "single-term", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 16568806906391567217, 17793791609084484746, 18446744073709551615, 18446744073709551615, 119, 135, 119, 135, 22, 24, true, "machine learning", "machine learning"], ["term", "single-term", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 2989796650905950968, 9358591355722397397, 18446744073709551615, 18446744073709551615, 3, 14, 3, 14, 1, 2, true, "combination", "combination"], ["term", "single-term", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 5947879501615734370, 3445417967466009645, 18446744073709551615, 18446744073709551615, 152, 161, 152, 161, 28, 29, true, "component", "component"], ["term", "single-term", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 389609625633008101, 12949720961570224958, 18446744073709551615, 18446744073709551615, 176, 180, 176, 180, 33, 34, true, "rule", "rule"], ["verb", "compound-verb", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 14972061082429253578, 7992280381584678423, 18446744073709551615, 18446744073709551615, 26, 46, 26, 46, 4, 6, true, "associated predicted", "associated predicted"], ["verb", "compound-verb", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 10453859466047522884, 10239611338580811250, 18446744073709551615, 18446744073709551615, 95, 110, 95, 110, 17, 20, true, "should be noted", "should be noted"], ["verb", "compound-verb", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 8106398132977396513, 9427955354524457620, 18446744073709551615, 18446744073709551615, 136, 143, 136, 143, 24, 26, true, "is used", "is used"], ["verb", "compound-verb", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 6181919770894982462, 17141845908897276483, 18446744073709551615, 18446744073709551615, 166, 175, 166, 175, 31, 33, true, "is purely", "is purely"], ["verb", "single-verb", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 5946726816546568286, 14981204101613166078, 18446744073709551615, 18446744073709551615, 57, 66, 57, 66, 10, 11, true, "annotated", "annotated"], ["verb", "single-verb", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 329104159219515955, 12104426966588498612, 18446744073709551615, 18446744073709551615, 181, 186, 181, 186, 34, 35, true, "based", "based"], ["conn", "single-conn", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 15441160910541486538, 13969787273736284569, 18446744073709551615, 18446744073709551615, 0, 2, 0, 2, 0, 1, true, "in", "in"], ["conn", "single-conn", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 389609625618037948, 12948291122374484621, 18446744073709551615, 18446744073709551615, 15, 19, 15, 19, 2, 3, true, "with", "with"], ["conn", "single-conn", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 8106351186178321347, 18145291139271698703, 18446744073709551615, 18446744073709551615, 111, 118, 111, 118, 20, 22, true, "that no", "that no"], ["conn", "single-conn", 12813875992986832439, "TEXT", "#/texts/67", 1.0, 8106398107541152403, 17574223839716805875, 18446744073709551615, 18446744073709551615, 144, 151, 144, 151, 26, 28, true, "in this", "in this"], ["numval", "ival", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 17767354399704235161, 5543555095985442958, 18446744073709551615, 18446744073709551615, 528, 529, 528, 529, 97, 98, true, "1", "1"], ["sentence", "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 10627520535034650380, 17531239345629359200, 18446744073709551615, 18446744073709551615, 0, 41, 0, 41, 0, 9, true, "The assembly phase is a two step process.", "The assembly phase is a two step process."], ["sentence", "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16727770016948924314, 14992801873823104190, 18446744073709551615, 18446744073709551615, 42, 161, 42, 161, 9, 30, true, "First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order.", "First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order."], ["sentence", "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 14844075509771675718, 14837722875086120924, 18446744073709551615, 18446744073709551615, 162, 263, 162, 263, 30, 50, true, "Then, the text of all cells that have the same label is contracted into a temporary document objects.", "Then, the text of all cells that have the same label is contracted into a temporary document objects."], ["sentence", "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 13321150786190303145, 15098908199975296360, 18446744073709551615, 18446744073709551615, 264, 386, 264, 386, 50, 72, true, "Third, we build the internal structure of the temporary document objects, based on the information provided by the models.", "Third, we build the internal structure of the temporary document objects, based on the information provided by the models."], ["sentence", "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 13420423012039011390, 1170303508595995079, 18446744073709551615, 18446744073709551615, 387, 467, 387, 467, 72, 86, true, "The latter is only applicable for internally structured objects, such as tables.", "The latter is only applicable for internally structured objects, such as tables."], ["sentence", "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 12664594763449438938, 4431678268083815697, 18446744073709551615, 18446744073709551615, 468, 530, 468, 530, 86, 99, true, "An example of the generated JSON output is shown in Listing 1.", "An example of the generated JSON output is shown in Listing 1."], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 14290303081280478932, 16534232039347859570, 18446744073709551615, 18446744073709551615, 4, 18, 4, 18, 1, 3, true, "assembly phase", "assembly phase"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 17347109100190648605, 3336910274907778664, 18446744073709551615, 18446744073709551615, 28, 40, 28, 40, 6, 8, true, "step process", "step process"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 2317020437411802284, 2943170210053648815, 18446744073709551615, 18446744073709551615, 97, 118, 97, 118, 19, 22, true, "layout semantic label", "layout semantic label"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 15944815540688621742, 12538918597954147758, 18446744073709551615, 18446744073709551615, 204, 214, 204, 214, 40, 42, true, "same label", "same label"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16002692145973620163, 4639543490213745049, 18446744073709551615, 18446744073709551615, 236, 262, 236, 262, 46, 49, true, "temporary document objects", "temporary document objects"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 10566132640081128, 4599927001618331381, 18446744073709551615, 18446744073709551615, 284, 302, 284, 302, 55, 57, true, "internal structure", "internal structure"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16002692145973620163, 4639543490213757407, 18446744073709551615, 18446744073709551615, 310, 336, 310, 336, 59, 62, true, "temporary document objects", "temporary document objects"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 7430002429723240008, 11164050789656870747, 18446744073709551615, 18446744073709551615, 486, 507, 486, 507, 90, 93, true, "generated JSON output", "generated JSON output"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 8106397416725855571, 6968734469406140499, 18446744073709551615, 18446744073709551615, 53, 60, 53, 60, 12, 13, true, "gathers", "gathers"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104161531686411, 14389223814653826808, 18446744073709551615, 18446744073709551615, 69, 74, 69, 74, 15, 16, true, "cells", "cells"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104161571401725, 14130794494432512208, 18446744073709551615, 18446744073709551615, 155, 160, 155, 160, 28, 29, true, "order", "order"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 389609625631325904, 11472131617453103029, 18446744073709551615, 18446744073709551615, 172, 176, 172, 176, 33, 34, true, "text", "text"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104161531686411, 14389223814653817543, 18446744073709551615, 18446744073709551615, 184, 189, 184, 189, 36, 37, true, "cells", "cells"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104161844229707, 14389961847775103244, 18446744073709551615, 18446744073709551615, 264, 269, 264, 269, 50, 51, true, "Third", "Third"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 14388065630035882329, 10056850550847032004, 18446744073709551615, 18446744073709551615, 351, 362, 351, 362, 66, 67, true, "information", "information"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206567230470443, 12055872324404544162, 18446744073709551615, 18446744073709551615, 379, 385, 379, 385, 70, 71, true, "models", "models"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206590630461421, 13437572129232177666, 18446744073709551615, 18446744073709551615, 391, 397, 391, 397, 73, 74, true, "latter", "latter"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 8106342034010873556, 5767697418284233272, 18446744073709551615, 18446744073709551615, 443, 450, 443, 450, 80, 81, true, "objects", "objects"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206513098478539, 14656247513331790784, 18446744073709551615, 18446744073709551615, 460, 466, 460, 466, 84, 85, true, "tables", "tables"], ["term", "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 8106397496085150773, 7053203505372722327, 18446744073709551615, 18446744073709551615, 471, 478, 471, 478, 87, 88, true, "example", "example"], ["verb", "compound-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 17902514739826327922, 7529083656148566052, 18446744073709551615, 18446744073709551615, 134, 154, 134, 154, 25, 28, true, "according to reading", "according to reading"], ["verb", "compound-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 12000496086994902479, 13430903801362966440, 18446744073709551615, 18446744073709551615, 215, 228, 215, 228, 42, 44, true, "is contracted", "is contracted"], ["verb", "compound-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 8106398132970509785, 7717436398183375812, 18446744073709551615, 18446744073709551615, 398, 405, 398, 405, 74, 76, true, "is only", "is only"], ["verb", "compound-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 14637951881518043285, 9028879385327672482, 18446744073709551615, 18446744073709551615, 508, 516, 508, 516, 93, 95, true, "is shown", "is shown"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 15441160910541486535, 10491326776662798407, 18446744073709551615, 18446744073709551615, 19, 21, 19, 21, 3, 4, true, "is", "is"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 5615021626537608757, 17464799388347342780, 18446744073709551615, 18446744073709551615, 86, 96, 86, 96, 18, 19, true, "associated", "associated"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104161786092648, 14156877157946491894, 18446744073709551615, 18446744073709551615, 123, 128, 123, 128, 23, 24, true, "sorts", "sorts"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 389609625695387621, 11482694222264222746, 18446744073709551615, 18446744073709551615, 195, 199, 195, 199, 38, 39, true, "have", "have"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104159303279946, 14234820330716235313, 18446744073709551615, 18446744073709551615, 274, 279, 274, 279, 53, 54, true, "build", "build"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 329104159219515955, 13988686724284707554, 18446744073709551615, 18446744073709551615, 338, 343, 338, 343, 63, 64, true, "based", "based"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 14814125838089603136, 10841486009179486453, 18446744073709551615, 18446744073709551615, 363, 371, 363, 371, 67, 68, true, "provided", "provided"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 14120356269929906423, 17410768018743515205, 18446744073709551615, 18446744073709551615, 432, 442, 432, 442, 79, 80, true, "structured", "structured"], ["verb", "single-verb", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 8106471806274607440, 1670327284070813530, 18446744073709551615, 18446744073709551615, 520, 527, 520, 527, 96, 97, true, "Listing", "Listing"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 901249285509952446, 4327457466414367608, 18446744073709551615, 18446744073709551615, 406, 420, 406, 420, 76, 78, true, "applicable for", "applicable for"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 8106478685702231057, 8652417891385661854, 18446744073709551615, 18446744073709551615, 452, 459, 452, 459, 82, 84, true, "such as", "such as"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 389609625618037948, 11467344892940421528, 18446744073709551615, 18446744073709551615, 75, 79, 75, 79, 16, 17, true, "with", "with"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206565712007226, 11723414488362140611, 18446744073709551615, 18446744073709551615, 177, 183, 177, 183, 34, 36, true, "of all", "of all"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206560517276114, 11772523745347271510, 18446744073709551615, 18446744073709551615, 229, 235, 229, 235, 44, 46, true, "into a", "into a"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206565712212855, 11847078438284722432, 18446744073709551615, 18446744073709551615, 303, 309, 303, 309, 57, 59, true, "of the", "of the"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206566339127348, 11698215495646926996, 18446744073709551615, 18446744073709551615, 344, 350, 344, 350, 64, 66, true, "on the", "on the"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206574363061705, 17340129670882622867, 18446744073709551615, 18446744073709551615, 372, 378, 372, 378, 68, 70, true, "by the", "by the"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 16381206565712212855, 11847078438284787116, 18446744073709551615, 18446744073709551615, 479, 485, 479, 485, 88, 90, true, "of the", "of the"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 15441160910541486538, 10491326776470778829, 18446744073709551615, 18446744073709551615, 517, 519, 517, 519, 95, 96, true, "in", "in"], ["conn", "single-conn", 11030869010407626539, "TEXT", "#/texts/68", 1.0, 15441160910541485865, 10491326711005526490, 18446744073709551615, 18446744073709551615, 144, 146, 144, 146, 26, 27, true, "to", "to"], ["numval", "ival", 2142320548375900929, "TEXT", "#/texts/69", 1.0, 17767354399704235156, 16458659285473085163, 18446744073709551615, 18446744073709551615, 0, 1, 0, 1, 0, 1, true, "4", "4"], ["expression", "word-concatenation", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6285955549867796622, 12192460564545960229, 18446744073709551615, 18446744073709551615, 618, 634, 618, 634, 111, 112, true, "time-to-solution", "time-to-solution"], ["sentence", "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 11044655914692672378, 2888733359687006370, 18446744073709551615, 18446744073709551615, 0, 123, 0, 123, 0, 22, true, "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated.", "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated."], ["sentence", "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 9774189456888168740, 4152543508246757256, 18446744073709551615, 18446744073709551615, 124, 246, 124, 246, 22, 43, true, "Before discussing the technical details, we would like to point out our requirements for the architecture of the platform.", "Before discussing the technical details, we would like to point out our requirements for the architecture of the platform."], ["sentence", "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 12407957798033762804, 13470604212648561724, 18446744073709551615, 18446744073709551615, 247, 293, 247, 293, 43, 51, true, "These requirements are all related to scaling.", "These requirements are all related to scaling."], ["sentence", "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 4653964671317425985, 17216044985232325101, 18446744073709551615, 18446744073709551615, 294, 461, 294, 461, 51, 83, true, "Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources.", "Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources."], ["sentence", "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 17228622883758304054, 15113971675963977401, 18446744073709551615, 18446744073709551615, 462, 680, 462, 680, 83, 121, true, "In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation.", "In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation."], ["sentence", "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6820290209528918513, 3633182920105543370, 18446744073709551615, 18446744073709551615, 681, 777, 681, 777, 121, 138, true, "It is clear that the architecture of such a service is heavily influenced by these requirements.", "It is clear that the architecture of such a service is heavily influenced by these requirements."], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 11289641670498948963, 4109634796027215399, 18446744073709551615, 18446744073709551615, 146, 163, 146, 163, 25, 27, true, "technical details", "technical details"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 4421383392096991748, 4820655472322214248, 18446744073709551615, 18446744073709551615, 443, 460, 443, 460, 80, 82, true, "compute resources", "compute resources"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16088126245064377604, 12842078242820415728, 18446744073709551615, 18446744073709551615, 465, 476, 465, 476, 84, 86, true, "other words", "other words"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 4421383392096991748, 4820655472321830361, 18446744073709551615, 18446744073709551615, 586, 603, 586, 603, 106, 108, true, "compute resources", "compute resources"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106478708629288965, 853306226471699405, 18446744073709551615, 18446744073709551615, 8, 15, 8, 15, 2, 3, true, "section", "section"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 990358581043194791, 2414189034056929402, 18446744073709551615, 18446744073709551615, 37, 50, 37, 50, 8, 9, true, "microservices", "microservices"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 2703018952916355661, 17317252314622786864, 18446744073709551615, 18446744073709551615, 66, 76, 66, 76, 13, 14, true, "components", "components"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14814125365076808131, 4170838424915628816, 18446744073709551615, 18446744073709551615, 84, 92, 84, 92, 16, 17, true, "platform", "platform"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 13240311013633905449, 11928407068432787608, 18446744073709551615, 18446744073709551615, 196, 208, 196, 208, 35, 36, true, "requirements", "requirements"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 11899564443746965611, 1669599917395635316, 18446744073709551615, 18446744073709551615, 217, 229, 217, 229, 38, 39, true, "architecture", "architecture"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14814125365076808131, 4170838424915634854, 18446744073709551615, 18446744073709551615, 237, 245, 237, 245, 41, 42, true, "platform", "platform"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 13240311013633905449, 11928407068432751416, 18446744073709551615, 18446744073709551615, 253, 265, 253, 265, 44, 45, true, "requirements", "requirements"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14814125365076808131, 4170838424915633248, 18446744073709551615, 18446744073709551615, 326, 334, 326, 334, 57, 58, true, "platform", "platform"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206574973295053, 579996873747921936, 18446744073709551615, 18446744073709551615, 353, 359, 353, 359, 62, 63, true, "number", "number"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6167933651658664291, 7440866408497827921, 18446744073709551615, 18446744073709551615, 363, 372, 363, 372, 64, 65, true, "documents", "documents"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206574973295053, 579996873747911416, 18446744073709551615, 18446744073709551615, 378, 384, 378, 384, 67, 68, true, "number", "number"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104159157820437, 15600004509778203866, 18446744073709551615, 18446744073709551615, 388, 393, 388, 393, 69, 70, true, "users", "users"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206574973295053, 579996873747912575, 18446744073709551615, 18446744073709551615, 421, 427, 421, 427, 76, 77, true, "number", "number"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104161517016668, 13957283097469922549, 18446744073709551615, 18446744073709551615, 431, 436, 431, 436, 78, 79, true, "cloud", "cloud"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106478708506632112, 1549478568074441550, 18446744073709551615, 18446744073709551615, 488, 495, 488, 495, 90, 91, true, "service", "service"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14638289822750178210, 16529051670404838156, 18446744073709551615, 18446744073709551615, 512, 520, 512, 520, 94, 95, true, "millions", "millions"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6167933651658664291, 7440866408497716574, 18446744073709551615, 18446744073709551615, 524, 533, 524, 533, 96, 97, true, "documents", "documents"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 3504070246238334482, 7971751554704088263, 18446744073709551615, 18446744073709551615, 553, 562, 553, 562, 100, 101, true, "thousands", "thousands"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104159157820437, 15600004509778174339, 18446744073709551615, 18446744073709551615, 566, 571, 566, 571, 102, 103, true, "users", "users"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6285955549867796622, 12192460564545960229, 18446744073709551615, 18446744073709551615, 618, 634, 618, 634, 111, 112, true, "time-to-solution", "time-to-solution"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104159219994925, 15605472043071850604, 18446744073709551615, 18446744073709551615, 656, 661, 656, 661, 116, 117, true, "times", "times"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6167836358624304835, 12533972813433648220, 18446744073709551615, 18446744073709551615, 670, 679, 670, 679, 119, 120, true, "operation", "operation"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 11899564443746965611, 1669599917395666812, 18446744073709551615, 18446744073709551615, 702, 714, 702, 714, 126, 127, true, "architecture", "architecture"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106478708506632112, 1549478568074460237, 18446744073709551615, 18446744073709551615, 725, 732, 725, 732, 130, 131, true, "service", "service"], ["term", "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 13240311013633905449, 11928407068432784250, 18446744073709551615, 18446744073709551615, 764, 776, 764, 776, 136, 137, true, "requirements", "requirements"], ["verb", "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 12669508327642496792, 11272358114773168348, 18446744073709551615, 18446744073709551615, 93, 105, 93, 105, 17, 19, true, "are deployed", "are deployed"], ["verb", "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 17737636265695672887, 8822130707725823076, 18446744073709551615, 18446744073709551615, 168, 187, 168, 187, 29, 33, true, "would like to point", "would like to point"], ["verb", "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 4717893903194484574, 13497868670598652853, 18446744073709551615, 18446744073709551615, 274, 292, 274, 292, 47, 50, true, "related to scaling", "related to scaling"], ["verb", "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 9576455331508001963, 2005151878314602116, 18446744073709551615, 18446744073709551615, 535, 552, 535, 552, 98, 100, true, "serve potentially", "serve potentially"], ["verb", "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6062403169006746003, 8883787506358796560, 18446744073709551615, 18446744073709551615, 733, 754, 733, 754, 131, 134, true, "is heavily influenced", "is heavily influenced"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14652261806242873016, 7890494648004461696, 18446744073709551615, 18446744073709551615, 20, 28, 20, 28, 5, 6, true, "describe", "describe"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 13632574162947055061, 147315883317329044, 18446744073709551615, 18446744073709551615, 110, 122, 110, 122, 20, 21, true, "orchestrated", "orchestrated"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 5314857828561765555, 11123792899717439144, 18446744073709551615, 18446744073709551615, 131, 141, 131, 141, 23, 24, true, "discussing", "discussing"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 12178341415895564896, 16193825294775180695, 18446744073709551615, 18446744073709551615, 266, 269, 266, 269, 45, 46, true, "are", "are"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8380894560351698162, 15803013507579142869, 18446744073709551615, 18446744073709551615, 311, 321, 311, 321, 54, 56, true, "would like", "would like"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104161785194305, 13942660614226268092, 18446744073709551615, 18446744073709551615, 338, 343, 338, 343, 59, 60, true, "scale", "scale"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104159219515955, 15594698497900091739, 18446744073709551615, 18446744073709551615, 437, 442, 437, 442, 79, 80, true, "based", "based"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 389609625633595931, 15688211806062539958, 18446744073709551615, 18446744073709551615, 481, 485, 481, 485, 88, 89, true, "want", "want"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 2873440693780286732, 10449764614793007239, 18446744073709551615, 18446744073709551615, 501, 511, 501, 511, 92, 94, true, "can ingest", "can ingest"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 329104161785194305, 13942660614225758865, 18446744073709551615, 18446744073709551615, 576, 581, 576, 581, 104, 105, true, "scale", "scale"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541486535, 2048505449065788699, 18446744073709551615, 18446744073709551615, 635, 637, 635, 637, 112, 113, true, "is", "is"], ["verb", "single-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541486535, 2048505449065787833, 18446744073709551615, 18446744073709551615, 684, 686, 684, 686, 122, 123, true, "is", "is"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 6165459236568015364, 497035845389833334, 18446744073709551615, 18446744073709551615, 604, 613, 604, 613, 108, 110, true, "such that", "such that"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16386233399945118620, 6139299107000348345, 18446744073709551615, 18446744073709551615, 638, 651, 638, 651, 113, 115, true, "reasonable at", "reasonable at"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 2617690495147367356, 5753489008096455564, 18446744073709551615, 18446744073709551615, 687, 697, 687, 697, 123, 125, true, "clear that", "clear that"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106396862006371970, 10149877881189646287, 18446744073709551615, 18446744073709551615, 0, 7, 0, 7, 0, 2, true, "In this", "In this"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106398107541243064, 4725564592462762947, 18446744073709551615, 18446744073709551615, 51, 58, 51, 58, 9, 11, true, "in each", "in each"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206565712212855, 16630894630023874072, 18446744073709551615, 18446744073709551615, 59, 65, 59, 65, 11, 13, true, "of the", "of the"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206565712212855, 16630894630023888451, 18446744073709551615, 18446744073709551615, 77, 83, 77, 83, 14, 16, true, "of the", "of the"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206535679983326, 14828520614292756444, 18446744073709551615, 18446744073709551615, 124, 130, 124, 130, 22, 23, true, "Before", "Before"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106397727991264470, 15908125160341103167, 18446744073709551615, 18446744073709551615, 209, 216, 209, 216, 36, 38, true, "for the", "for the"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 16381206565712212855, 16630894630015899877, 18446744073709551615, 18446744073709551615, 230, 236, 230, 236, 39, 41, true, "of the", "of the"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14638857868319795209, 4352025152199097228, 18446744073709551615, 18446744073709551615, 344, 352, 344, 352, 60, 62, true, "with the", "with the"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485670, 2048505603752346374, 18446744073709551615, 18446744073709551615, 360, 362, 360, 362, 63, 64, true, "of", "of"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485670, 2048505603752352577, 18446744073709551615, 18446744073709551615, 385, 387, 385, 387, 68, 69, true, "of", "of"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485670, 2048505603752367355, 18446744073709551615, 18446744073709551615, 428, 430, 428, 430, 77, 78, true, "of", "of"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541480354, 2048505281272838279, 18446744073709551615, 18446744073709551615, 462, 464, 462, 464, 83, 84, true, "In", "In"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485670, 2048505603752360531, 18446744073709551615, 18446744073709551615, 521, 523, 521, 523, 95, 96, true, "of", "of"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485670, 2048505603752440089, 18446744073709551615, 18446744073709551615, 563, 565, 563, 565, 101, 102, true, "of", "of"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 8106397728094825258, 15814643395009540075, 18446744073709551615, 18446744073709551615, 662, 669, 662, 669, 117, 119, true, "for any", "for any"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485670, 2048505603752438307, 18446744073709551615, 18446744073709551615, 715, 717, 715, 717, 127, 128, true, "of", "of"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 14652255025526908904, 16607703748518201877, 18446744073709551615, 18446744073709551615, 755, 763, 755, 763, 134, 136, true, "by these", "by these"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485865, 2048505449664077172, 18446744073709551615, 18446744073709551615, 179, 181, 179, 181, 31, 32, true, "to", "to"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485865, 2048505449663964299, 18446744073709551615, 18446744073709551615, 282, 284, 282, 284, 48, 49, true, "to", "to"], ["conn", "single-conn", 12747011194397783283, "TEXT", "#/texts/70", 1.0, 15441160910541485865, 2048505449663959877, 18446744073709551615, 18446744073709551615, 335, 337, 335, 337, 58, 59, true, "to", "to"], ["numval", "fval", 174789262945188010, "TEXT", "#/texts/71", 1.0, 12178341415896306585, 8581499132904184537, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "4.1", "4.1"], ["numval", "ival", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 17767354399704235161, 5235953771215622646, 18446744073709551615, 18446744073709551615, 10, 11, 10, 11, 2, 3, true, "1", "1"], ["numval", "ival", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 17767354399704235158, 5235953771432357895, 18446744073709551615, 18446744073709551615, 101, 102, 101, 102, 21, 22, true, "6", "6"], ["sentence", "", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 17007226042152908832, 18331404462945221276, 18446744073709551615, 18446744073709551615, 0, 90, 0, 90, 0, 19, true, "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents.", "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents."], ["term", "single-term", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 16381206514091025767, 10265022769446664856, 18446744073709551615, 18446744073709551615, 3, 9, 3, 9, 1, 2, true, "Figure", "Figure"], ["term", "single-term", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 8106396896178898697, 4219910857709835922, 18446744073709551615, 18446744073709551615, 29, 36, 29, 36, 8, 9, true, "diagram", "diagram"], ["term", "single-term", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 14814125852840540191, 5403353526375880725, 18446744073709551615, 18446744073709551615, 44, 52, 44, 52, 11, 12, true, "pipeline", "pipeline"], ["term", "single-term", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 14814125365076808131, 1502793658629529948, 18446744073709551615, 18446744073709551615, 60, 68, 60, 68, 14, 15, true, "platform", "platform"], ["term", "single-term", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 6167933651658664291, 2252968926517446007, 18446744073709551615, 18446744073709551615, 80, 89, 80, 89, 17, 18, true, "documents", "documents"], ["term", "single-term", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 16381206514091025767, 10265022769446670193, 18446744073709551615, 18446744073709551615, 94, 100, 94, 100, 20, 21, true, "Figure", "Figure"], ["term", "single-term", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 16381206578503830159, 17949034927811561938, 18446744073709551615, 18446744073709551615, 114, 120, 114, 120, 26, 27, true, "sketch", "sketch"], ["verb", "compound-verb", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 5518720687765131523, 9686268265492720351, 18446744073709551615, 18446744073709551615, 16, 26, 16, 26, 5, 7, true, "have shown", "have shown"], ["verb", "single-verb", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 8106476000254393164, 1942641588307467677, 18446744073709551615, 18446744073709551615, 72, 79, 72, 79, 16, 17, true, "process", "process"], ["verb", "single-verb", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 389609625741152123, 12332880687353575352, 18446744073709551615, 18446744073709551615, 107, 111, 107, 111, 24, 25, true, "show", "show"], ["conn", "single-conn", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 15441160910541480354, 17121927414994045497, 18446744073709551615, 18446744073709551615, 0, 2, 0, 2, 0, 1, true, "In", "In"], ["conn", "single-conn", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 15441160910541485670, 17121926226924974742, 18446744073709551615, 18446744073709551615, 37, 39, 37, 39, 9, 10, true, "of", "of"], ["conn", "single-conn", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 16381206566339127348, 9987704510349709695, 18446744073709551615, 18446744073709551615, 53, 59, 53, 59, 12, 14, true, "on the", "on the"], ["conn", "single-conn", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 15441160910541480354, 17121927414994051922, 18446744073709551615, 18446744073709551615, 91, 93, 91, 93, 19, 20, true, "In", "In"], ["conn", "single-conn", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 15441160910541485670, 17121926226924973211, 18446744073709551615, 18446744073709551615, 121, 123, 121, 123, 27, 28, true, "of", "of"], ["conn", "single-conn", 7228893318503650455, "TEXT", "#/texts/72", 1.0, 15441160910541485865, 17121926292459753487, 18446744073709551615, 18446744073709551615, 69, 71, 69, 71, 15, 16, true, "to", "to"], ["sentence", "", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 105368025718952442, 5450071664030950078, 18446744073709551615, 18446744073709551615, 14, 79, 14, 79, 2, 16, true, "As one can observe, we have grouped the service into four layers.", "As one can observe, we have grouped the service into four layers."], ["term", "single-term", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 11899564443746965611, 16284427380227364102, 18446744073709551615, 18446744073709551615, 0, 12, 0, 12, 0, 1, true, "architecture", "architecture"], ["term", "single-term", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 8106478708506632112, 6233289218919425562, 18446744073709551615, 18446744073709551615, 54, 61, 54, 61, 11, 12, true, "service", "service"], ["term", "single-term", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 16381206590620802860, 16233116481575014775, 18446744073709551615, 18446744073709551615, 72, 78, 72, 78, 14, 15, true, "layers", "layers"], ["term", "single-term", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 16381206590620802860, 16233116481574945048, 18446744073709551615, 18446744073709551615, 86, 92, 86, 92, 17, 18, true, "layers", "layers"], ["verb", "compound-verb", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 189925242426617641, 13895959288404047356, 18446744073709551615, 18446744073709551615, 37, 49, 37, 49, 8, 10, true, "have grouped", "have grouped"], ["verb", "single-verb", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 14892726175400695403, 16590583946158903014, 18446744073709551615, 18446744073709551615, 21, 32, 21, 32, 4, 6, true, "can observe", "can observe"], ["verb", "single-verb", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 12178341415895564896, 1634290879494977673, 18446744073709551615, 18446744073709551615, 93, 96, 93, 96, 18, 19, true, "are", "are"], ["conn", "single-conn", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 15441160910541480533, 4819755269055644271, 18446744073709551615, 18446744073709551615, 14, 16, 14, 16, 2, 3, true, "As", "As"], ["conn", "single-conn", 9230667184712205690, "TEXT", "#/texts/73", 1.0, 389609625698622943, 11283567657878655855, 18446744073709551615, 18446744073709551615, 62, 66, 62, 66, 12, 13, true, "into", "into"], ["numval", "ival", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 17767354399704235161, 17804011231002177177, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 15441160910541481977, 15610781920844557983, 18446744073709551615, 18446744073709551615, 275, 277, 275, 277, 46, 47, true, "13", "13"], ["parenthesis", "reference", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 12178341415896395122, 13204308870015609887, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 3, true, "(1)", "(1)"], ["expression", "word-concatenation", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14652188385287077849, 15073411726952517228, 18446744073709551615, 18446744073709551615, 42, 50, 42, 50, 9, 10, true, "REST-API", "REST-API"], ["expression", "word-concatenation", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14652188385287077849, 15073411726952601508, 18446744073709551615, 18446744073709551615, 138, 146, 138, 146, 27, 28, true, "REST-API", "REST-API"], ["expression", "word-concatenation", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 3753411203337468488, 5100377154689721404, 18446744073709551615, 18446744073709551615, 181, 193, 181, 193, 33, 34, true, "ground-truth", "ground-truth"], ["expression", "word-concatenation", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14652188385287077849, 15073411726952522114, 18446744073709551615, 18446744073709551615, 209, 217, 209, 217, 37, 38, true, "REST-API", "REST-API"], ["sentence", "", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 5975418266041050143, 7900187415786058174, 18446744073709551615, 18446744073709551615, 4, 204, 4, 204, 3, 36, true, "An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering.", "An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering."], ["sentence", "", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 16292425914778879272, 3552138339604334986, 18446744073709551615, 18446744073709551615, 205, 307, 205, 307, 36, 53, true, "The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", "The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python."], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 8692614377683751894, 6543009394914129596, 18446744073709551615, 18446744073709551615, 7, 22, 7, 22, 4, 6, true, "interface layer", "interface layer"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 11968118699453218413, 16853549282351953165, 18446744073709551615, 18446744073709551615, 57, 70, 57, 70, 12, 14, true, "user frontend", "user frontend"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 11968118699453218413, 16853549282351954610, 18446744073709551615, 18446744073709551615, 76, 89, 76, 89, 16, 18, true, "user frontend", "user frontend"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 17244914598722202958, 15499641398155697595, 18446744073709551615, 18446744073709551615, 96, 117, 96, 117, 20, 22, true, "AngularJS application", "AngularJS application"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 1591019414094504294, 17465194081095954832, 18446744073709551615, 18446744073709551615, 181, 203, 181, 203, 33, 35, true, "ground-truth gathering", "ground-truth gathering"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 17622757252402159492, 18169862670430999681, 18446744073709551615, 18446744073709551615, 252, 274, 252, 274, 44, 46, true, "OpenAPI specifications", "OpenAPI specifications"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14652188385287077849, 15073411726952517228, 18446744073709551615, 18446744073709551615, 42, 50, 42, 50, 9, 10, true, "REST-API", "REST-API"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 12178341415895527965, 13202575941196575545, 18446744073709551615, 18446744073709551615, 127, 130, 127, 130, 24, 25, true, "top", "top"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14652188385287077849, 15073411726952601508, 18446744073709551615, 18446744073709551615, 138, 146, 138, 146, 27, 28, true, "REST-API", "REST-API"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 15359807916847569012, 12690297070768585539, 18446744073709551615, 18446744073709551615, 166, 176, 166, 176, 31, 32, true, "annotators", "annotators"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14652188385287077849, 15073411726952522114, 18446744073709551615, 18446744073709551615, 209, 217, 209, 217, 37, 38, true, "REST-API", "REST-API"], ["term", "single-term", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 16381206485156459004, 2190123068096885489, 18446744073709551615, 18446744073709551615, 300, 306, 300, 306, 51, 52, true, "Python", "Python"], ["verb", "compound-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 14637952033947516066, 1170598421847841274, 18446744073709551615, 18446744073709551615, 218, 226, 218, 226, 38, 40, true, "is built", "is built"], ["verb", "compound-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 116696039858106091, 15266057091493719963, 18446744073709551615, 18446744073709551615, 231, 247, 231, 247, 41, 43, true, "documented using", "documented using"], ["verb", "compound-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 37170045853396780, 10862211224580790545, 18446744073709551615, 18446744073709551615, 282, 296, 282, 296, 48, 50, true, "is implemented", "is implemented"], ["verb", "single-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 5584174880054122043, 13797832223961649041, 18446744073709551615, 18446744073709551615, 29, 39, 29, 39, 7, 8, true, "implements", "implements"], ["verb", "single-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 15441160910541486535, 15610783856184804840, 18446744073709551615, 18446744073709551615, 90, 92, 90, 92, 18, 19, true, "is", "is"], ["verb", "single-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 329104159303279946, 14770817403596920463, 18446744073709551615, 18446744073709551615, 118, 123, 118, 123, 22, 23, true, "build", "build"], ["verb", "single-verb", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 5584174880054122043, 13797832223961674239, 18446744073709551615, 18446744073709551615, 151, 161, 151, 161, 29, 30, true, "implements", "implements"], ["conn", "single-conn", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 15441160910541485678, 15610783856720662618, 18446744073709551615, 18446744073709551615, 124, 126, 124, 126, 23, 24, true, "on", "on"], ["conn", "single-conn", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 16381206565712212855, 3629200545768582333, 18446744073709551615, 18446744073709551615, 131, 137, 131, 137, 25, 27, true, "of the", "of the"], ["conn", "single-conn", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 12178341415895625940, 13202525365648469119, 18446744073709551615, 18446744073709551615, 177, 180, 177, 180, 32, 33, true, "for", "for"], ["conn", "single-conn", 17419815751432442882, "TEXT", "#/texts/74", 1.0, 15441160910541486538, 15610783856135866232, 18446744073709551615, 18446744073709551615, 297, 299, 297, 299, 50, 51, true, "in", "in"], ["numval", "ival", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 17767354399704235162, 766019618037252930, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "2", "2"], ["parenthesis", "reference", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 12178341415896395187, 17029329038495000300, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 3, true, "(2)", "(2)"], ["parenthesis", "round brackets", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 1812897535394120128, 3870874385890063204, 18446744073709551615, 18446744073709551615, 303, 497, 303, 497, 51, 87, true, "(e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images)", "(e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images)"], ["expression", "common", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15441160910541487324, 5910392785272575830, 18446744073709551615, 18446744073709551615, 304, 309, 304, 309, 52, 53, true, "eg", "e. g."], ["expression", "word-concatenation", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 6187817560337829240, 10074786117573267255, 18446744073709551615, 18446744073709551615, 222, 231, 222, 231, 39, 40, true, "in-memory", "in-memory"], ["expression", "word-concatenation", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 10210587797782980674, 269685690895573883, 18446744073709551615, 18446744073709551615, 653, 667, 653, 667, 114, 115, true, "fault-tolerant", "fault-tolerant"], ["expression", "wtoken-concatenation", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15503455610017494293, 14071574465570134500, 18446744073709551615, 18446744073709551615, 175, 190, 175, 190, 31, 32, true, "RabbitMQ^{14}", "RabbitMQ$^{14}$"], ["expression", "wtoken-concatenation", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 9275871508895795608, 13145605651607786139, 18446744073709551615, 18446744073709551615, 243, 255, 243, 255, 42, 43, true, "Redis^{15}", "Redis$^{15}$"], ["sentence", "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 4025108859080697854, 7684932098811697541, 18446744073709551615, 18446744073709551615, 4, 122, 4, 122, 3, 22, true, "An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result.", "An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result."], ["sentence", "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 10578140482875773017, 5422705622766817180, 18446744073709551615, 18446744073709551615, 123, 191, 123, 191, 22, 33, true, "The task scheduling is done with the Message Broker RabbitMQ$^{14}$.", "The task scheduling is done with the Message Broker RabbitMQ$^{14}$."], ["sentence", "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 13063193318125667342, 9479480819005320541, 18446744073709551615, 18446744073709551615, 192, 256, 192, 256, 33, 44, true, "The results are stored in the in-memory data store Redis$^{15}$.", "The results are stored in the in-memory data store Redis$^{15}$."], ["sentence", "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 1495400135141364806, 13254541674959843841, 18446744073709551615, 18446744073709551615, 257, 612, 257, 612, 44, 106, true, "In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully.", "In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully."], ["sentence", "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 1841397930198716309, 11891118918529386819, 18446744073709551615, 18446744073709551615, 613, 702, 613, 702, 106, 121, true, "This approach allows for a very robust, fault-tolerant service with very little downtime.", "This approach allows for a very robust, fault-tolerant service with very little downtime."], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 881931955171775830, 1850213153382221251, 18446744073709551615, 18446744073709551615, 7, 26, 7, 26, 4, 6, true, "orchestration layer", "orchestration layer"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16569031532297427649, 2288991119528845313, 18446744073709551615, 18446744073709551615, 88, 104, 88, 104, 16, 18, true, "execution status", "execution status"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 12318137194760091867, 7829156630170179123, 18446744073709551615, 18446744073709551615, 109, 121, 109, 121, 19, 21, true, "final result", "final result"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 6315348039533026141, 3213548333245122529, 18446744073709551615, 18446744073709551615, 127, 142, 127, 142, 23, 25, true, "task scheduling", "task scheduling"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8295209353697935236, 9538738240789817737, 18446744073709551615, 18446744073709551615, 160, 190, 160, 190, 29, 32, true, "Message Broker RabbitMQ^{14}", "Message Broker RabbitMQ$^{14}$"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 10696383395384997690, 12295999164377771107, 18446744073709551615, 18446744073709551615, 222, 255, 222, 255, 39, 43, true, "in-memory data store Redis^{15}", "in-memory data store Redis$^{15}$"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 1969668578613914549, 8249504550464603474, 18446744073709551615, 18446744073709551615, 277, 302, 277, 302, 48, 51, true, "certain consecutive tasks", "certain consecutive tasks"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 14650937348812924036, 6147429119200407258, 18446744073709551615, 18446744073709551615, 320, 328, 320, 328, 55, 57, true, "PDF page", "PDF page"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15096203362930329687, 5146287085934249298, 18446744073709551615, 18446744073709551615, 390, 411, 390, 411, 67, 70, true, "programmatic PDF page", "programmatic PDF page"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 9914165367421220601, 17615564355797482202, 18446744073709551615, 18446744073709551615, 446, 457, 446, 457, 77, 79, true, "OCR service", "OCR service"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 10873436773834842694, 9664366626516883504, 18446744073709551615, 18446744073709551615, 537, 553, 537, 553, 95, 97, true, "subsequent steps", "subsequent steps"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16455430351063957858, 8578040699139249120, 18446744073709551615, 18446744073709551615, 653, 675, 653, 675, 114, 116, true, "fault-tolerant service", "fault-tolerant service"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 3478107702264293237, 8540445734424787667, 18446744073709551615, 18446744073709551615, 686, 701, 686, 701, 118, 120, true, "little downtime", "little downtime"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 329104159214088329, 8194156825567116069, 18446744073709551615, 18446744073709551615, 46, 51, 46, 51, 9, 10, true, "tasks", "tasks"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 990358581043194791, 1684395431220408370, 18446744073709551615, 18446744073709551615, 60, 73, 60, 73, 12, 13, true, "microservices", "microservices"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206578935372333, 8703937210789941258, 18446744073709551615, 18446744073709551615, 75, 81, 75, 81, 14, 15, true, "stores", "stores"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106478445190161533, 10842974023255663515, 18446744073709551615, 18446744073709551615, 196, 203, 196, 203, 34, 35, true, "results", "results"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 329104161571401725, 8064733993734324746, 18446744073709551615, 18446744073709551615, 260, 265, 260, 265, 45, 46, true, "order", "order"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15441160910541487324, 5910392785272575830, 18446744073709551615, 18446744073709551615, 304, 309, 304, 309, 52, 53, true, "eg", "e. g."], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206560620045048, 3483212362973437467, 18446744073709551615, 18446744073709551615, 351, 357, 351, 357, 60, 61, true, "images", "images"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106479143794098783, 12365847001997486008, 18446744073709551615, 18446744073709551615, 375, 382, 375, 382, 64, 65, true, "parsing", "parsing"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206560620045048, 3483212362973413702, 18446744073709551615, 18446744073709551615, 427, 433, 427, 433, 73, 74, true, "images", "images"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 329104161531686411, 8019458249201971616, 18446744073709551615, 18446744073709551615, 473, 478, 473, 478, 82, 83, true, "cells", "cells"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206560620045048, 3483212362973442358, 18446744073709551615, 18446744073709551615, 490, 496, 490, 496, 85, 86, true, "images", "images"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 329104159214088329, 8194156825567072634, 18446744073709551615, 18446744073709551615, 520, 525, 520, 525, 91, 92, true, "tasks", "tasks"], ["term", "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 14650448032998792781, 16582753633148168921, 18446744073709551615, 18446744073709551615, 618, 626, 618, 626, 107, 108, true, "approach", "approach"], ["verb", "compound-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106398132958436429, 6971888441348882646, 18446744073709551615, 18446744073709551615, 143, 150, 143, 150, 25, 27, true, "is done", "is done"], ["verb", "compound-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15388942590337907789, 4280950829150221216, 18446744073709551615, 18446744073709551615, 204, 214, 204, 214, 35, 37, true, "are stored", "are stored"], ["verb", "compound-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 11296839647862937485, 4164176593204929584, 18446744073709551615, 18446744073709551615, 334, 350, 334, 350, 58, 60, true, "embedded scanned", "embedded scanned"], ["verb", "compound-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 13646629520376931899, 8519320198473003477, 18446744073709551615, 18446744073709551615, 358, 372, 358, 372, 61, 63, true, "requires first", "requires first"], ["verb", "compound-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 17982373942613951464, 3937427824351288252, 18446744073709551615, 18446744073709551615, 554, 571, 554, 571, 97, 100, true, "are only executed", "are only executed"], ["verb", "compound-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 12714940176042944879, 15936925534730964046, 18446744073709551615, 18446744073709551615, 588, 611, 588, 611, 103, 105, true, "terminated successfully", "terminated successfully"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 6168537129726002426, 152533883599703009, 18446744073709551615, 18446744073709551615, 32, 41, 32, 41, 7, 8, true, "schedules", "schedules"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106475907566715134, 4681167473274342910, 18446744073709551615, 18446744073709551615, 269, 276, 269, 276, 47, 48, true, "perform", "perform"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106479143794098783, 12365847001997489621, 18446744073709551615, 18446744073709551615, 310, 317, 310, 317, 53, 54, true, "parsing", "parsing"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106397496930289884, 8772117640065310876, 18446744073709551615, 18446744073709551615, 415, 422, 415, 422, 71, 72, true, "extract", "extract"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106397496930289884, 8772117640065258856, 18446744073709551615, 18446744073709551615, 461, 468, 461, 468, 80, 81, true, "extract", "extract"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 329104161556625920, 8062732787574674007, 18446744073709551615, 18446744073709551615, 514, 519, 514, 519, 90, 91, true, "chain", "chain"], ["verb", "single-verb", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206569317834029, 14824105829638492947, 18446744073709551615, 18446744073709551615, 627, 633, 627, 633, 108, 109, true, "allows", "allows"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 6165459236568015364, 2354429920637518397, 18446744073709551615, 18446744073709551615, 527, 536, 527, 536, 93, 95, true, "such that", "such that"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 8106397727991264470, 2795094083464015695, 18446744073709551615, 18446744073709551615, 52, 59, 52, 59, 10, 12, true, "for the", "for the"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 14638857868319795209, 456352319925648448, 18446744073709551615, 18446744073709551615, 151, 159, 151, 159, 27, 29, true, "with the", "with the"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206560518651853, 3478068517407956556, 18446744073709551615, 18446744073709551615, 215, 221, 215, 221, 37, 39, true, "in the", "in the"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15441160910541480354, 5910392698548049506, 18446744073709551615, 18446744073709551615, 257, 259, 257, 259, 44, 45, true, "In", "In"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 389609625618037948, 4452313474781359012, 18446744073709551615, 18446744073709551615, 329, 333, 329, 333, 57, 58, true, "with", "with"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206565712212855, 14812884387065852796, 18446744073709551615, 18446744073709551615, 383, 389, 383, 389, 65, 67, true, "of the", "of the"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16057368201763467386, 5837363433449722185, 18446744073709551615, 18446744073709551615, 479, 489, 479, 489, 83, 85, true, "from these", "from these"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 16381206478470086874, 3195302339494553733, 18446744073709551615, 18446744073709551615, 572, 578, 572, 578, 100, 102, true, "if the", "if the"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 329104161711024499, 8003597956646423596, 18446744073709551615, 18446744073709551615, 634, 639, 634, 639, 109, 111, true, "for a", "for a"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 389609625618037948, 4452313474781410633, 18446744073709551615, 18446744073709551615, 676, 680, 676, 680, 116, 117, true, "with", "with"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15441160910541485865, 5910392672594327793, 18446744073709551615, 18446744073709551615, 266, 268, 266, 268, 46, 47, true, "to", "to"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15441160910541485865, 5910392672594322553, 18446744073709551615, 18446744073709551615, 412, 414, 412, 414, 70, 71, true, "to", "to"], ["conn", "single-conn", 11194226403360998426, "TEXT", "#/texts/75", 1.0, 15441160910541485865, 5910392672594307233, 18446744073709551615, 18446744073709551615, 458, 460, 458, 460, 79, 80, true, "to", "to"], ["numval", "ival", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 17767354399704235163, 6623757277320803060, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "3", "3"], ["numval", "ival", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 17767354399704235163, 6623757277320810717, 18446744073709551615, 18446744073709551615, 74, 75, 74, 75, 13, 14, true, "3", "3"], ["parenthesis", "reference", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 12178341415896394992, 10915561974328134756, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 3, true, "(3)", "(3)"], ["parenthesis", "round brackets", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 9121140803212188746, 5062856742962380004, 18446744073709551615, 18446744073709551615, 148, 200, 148, 200, 26, 38, true, "(e.g. parsing, training, predictions, assembly, etc)", "(e.g. parsing, training, predictions, assembly, etc)"], ["expression", "common", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541487324, 13616139199714584790, 18446744073709551615, 18446744073709551615, 149, 153, 149, 153, 27, 28, true, "eg", "e.g."], ["expression", "wtoken-concatenation", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 10737622929664928958, 8651677615518322335, 18446744073709551615, 18446744073709551615, 332, 346, 332, 346, 61, 62, true, "library^{16}", "library$^{16}$"], ["sentence", "", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 2864828402709972468, 11332229229505390779, 18446744073709551615, 18446744073709551615, 4, 201, 4, 201, 3, 39, true, "A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc).", "A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc)."], ["sentence", "", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15435795280083407866, 4623434773980135134, 18446744073709551615, 18446744073709551615, 202, 347, 202, 347, 39, 63, true, "In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$.", "In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$."], ["sentence", "", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 11964442506319969125, 6249550519038030170, 18446744073709551615, 18446744073709551615, 348, 503, 348, 503, 63, 90, true, "This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker.", "This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker."], ["sentence", "", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 17777896679674985198, 11534624099623021000, 18446744073709551615, 18446744073709551615, 504, 579, 504, 579, 90, 106, true, "The workers are not only consumers of tasks, but may also produce new ones.", "The workers are not only consumers of tasks, but may also produce new ones."], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 5470814617574924291, 724407251113024, 18446744073709551615, 18446744073709551615, 6, 19, 6, 19, 4, 6, true, "compute layer", "compute layer"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 9909278470053653981, 12829744493301079322, 18446744073709551615, 18446744073709551615, 124, 147, 124, 147, 24, 26, true, "available microservices", "available microservices"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14058638345038458245, 12275078423904715575, 18446744073709551615, 18446744073709551615, 149, 161, 149, 161, 27, 29, true, "eg parsing", "e.g. parsing"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 4681591099663460584, 16278920567637920045, 18446744073709551615, 18446744073709551615, 304, 314, 304, 314, 56, 58, true, "task queue", "task queue"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 11198085877607539434, 9973077180559472567, 18446744073709551615, 18446744073709551615, 325, 346, 325, 346, 60, 62, true, "Celery library^{16}", "Celery library$^{16}$"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 4421383392096991748, 16024629256340455225, 18446744073709551615, 18446744073709551615, 388, 405, 388, 405, 70, 72, true, "compute resources", "compute resources"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8115411903316729668, 9549090779302063453, 18446744073709551615, 18446744073709551615, 524, 538, 524, 538, 94, 96, true, "only consumers", "only consumers"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14814151107139696752, 1255249427891598545, 18446744073709551615, 18446744073709551615, 570, 578, 570, 578, 103, 105, true, "new ones", "new ones"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 990358581043194791, 13405780939829855380, 18446744073709551615, 18446744073709551615, 40, 53, 40, 53, 9, 10, true, "microservices", "microservices"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106478708629288965, 12667054332205292279, 18446744073709551615, 18446744073709551615, 66, 73, 66, 73, 12, 13, true, "section", "section"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106478059506484182, 10697147794249982519, 18446744073709551615, 18446744073709551615, 89, 96, 89, 96, 18, 19, true, "workers", "workers"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 329104161624475862, 13848348646967812138, 18446744073709551615, 18446744073709551615, 105, 110, 105, 110, 21, 22, true, "layer", "layer"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14634153919632515335, 7524102672994522753, 18446744073709551615, 18446744073709551615, 163, 171, 163, 171, 30, 31, true, "training", "training"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15175963360124346573, 14989804171272821450, 18446744073709551615, 18446744073709551615, 173, 184, 173, 184, 32, 33, true, "predictions", "predictions"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14650448171968968290, 3790776944315438052, 18446744073709551615, 18446744073709551615, 186, 194, 186, 194, 34, 35, true, "assembly", "assembly"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 329104161571401725, 13849737740624165279, 18446744073709551615, 18446744073709551615, 205, 210, 205, 210, 40, 41, true, "order", "order"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206521526353544, 1782906868391855387, 18446744073709551615, 18446744073709551615, 225, 231, 225, 231, 44, 45, true, "regard", "regard"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 6168338487309432467, 5928008866800453885, 18446744073709551615, 18446744073709551615, 235, 244, 235, 244, 46, 47, true, "resources", "resources"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16682817150367627875, 17485526265701101, 18446744073709551615, 18446744073709551615, 272, 284, 272, 284, 52, 53, true, "microservice", "microservice"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206557159905849, 8006079737033218220, 18446744073709551615, 18446744073709551615, 418, 424, 418, 424, 75, 76, true, "worker", "worker"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106398485449787361, 5675061092301137187, 18446744073709551615, 18446744073709551615, 461, 468, 461, 468, 82, 83, true, "cluster", "cluster"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206570348587859, 15808885045293000288, 18446744073709551615, 18446744073709551615, 496, 502, 496, 502, 88, 89, true, "broker", "broker"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106478059506484182, 10697147794249956396, 18446744073709551615, 18446744073709551615, 508, 515, 508, 515, 91, 92, true, "workers", "workers"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 329104159214088329, 14097861728688353508, 18446744073709551615, 18446744073709551615, 542, 547, 542, 547, 97, 98, true, "tasks", "tasks"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 389609625695123443, 614461596643170667, 18446744073709551615, 18446744073709551615, 592, 596, 592, 596, 109, 110, true, "case", "case"], ["term", "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14634109266514673153, 16089458740036970126, 18446744073709551615, 18446744073709551615, 605, 613, 605, 613, 112, 113, true, "requests", "requests"], ["verb", "compound-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 13859584371553084961, 6953162611440438890, 18446744073709551615, 18446744073709551615, 249, 266, 249, 266, 49, 51, true, "have encapsulated", "have encapsulated"], ["verb", "compound-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 288538720869017437, 4206979805055504968, 18446744073709551615, 18446744073709551615, 425, 453, 425, 453, 76, 80, true, "can be spawned automatically", "can be spawned automatically"], ["verb", "compound-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106397797831668975, 4782227961575271919, 18446744073709551615, 18446744073709551615, 516, 523, 516, 523, 92, 94, true, "are not", "are not"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 5584174880054122043, 15438871383215853010, 18446744073709551615, 18446744073709551615, 25, 35, 25, 35, 7, 8, true, "implements", "implements"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14652261813489544447, 11247279661113316629, 18446744073709551615, 18446744073709551615, 54, 62, 54, 62, 10, 11, true, "detailed", "detailed"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14652255854767583909, 12382046103724054031, 18446744073709551615, 18446744073709551615, 111, 119, 111, 119, 22, 23, true, "executes", "executes"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 329104161785194305, 13850093838201494630, 18446744073709551615, 18446744073709551615, 214, 219, 214, 219, 42, 43, true, "scale", "scale"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 1477344672819384985, 8283526875963376019, 18446744073709551615, 18446744073709551615, 292, 303, 292, 303, 55, 56, true, "distributed", "distributed"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 329104159157798023, 14298779374945162593, 18446744073709551615, 18446744073709551615, 315, 320, 315, 320, 58, 59, true, "using", "using"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206569317834029, 15096333879001227968, 18446744073709551615, 18446744073709551615, 353, 359, 353, 359, 64, 65, true, "allows", "allows"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 329104161785194305, 13850093838201414935, 18446744073709551615, 18446744073709551615, 378, 383, 378, 383, 68, 69, true, "scale", "scale"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14634109580260092070, 816619907238816136, 18446744073709551615, 18446744073709551615, 473, 481, 473, 481, 84, 85, true, "register", "register"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106476000256008955, 10662702206647144879, 18446744073709551615, 18446744073709551615, 562, 569, 562, 569, 102, 103, true, "produce", "produce"], ["verb", "single-verb", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541486535, 13616139232205762064, 18446744073709551615, 18446744073709551615, 585, 587, 585, 587, 107, 108, true, "is", "is"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541486538, 13616139232389586146, 18446744073709551615, 18446744073709551615, 63, 65, 63, 65, 11, 12, true, "in", "in"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 3612640462697257855, 15059124849281620447, 18446744073709551615, 18446744073709551615, 77, 88, 77, 88, 15, 18, true, "Each of the", "Each of the"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106398107541152403, 3040495791226350629, 18446744073709551615, 18446744073709551615, 97, 104, 97, 104, 19, 21, true, "in this", "in this"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541480354, 13616133383081621648, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 39, 40, true, "In", "In"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 389609625618037948, 638487817302062508, 18446744073709551615, 18446744073709551615, 220, 224, 220, 224, 43, 44, true, "with", "with"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206560517276114, 15829604646240034102, 18446744073709551615, 18446744073709551615, 285, 291, 285, 291, 53, 55, true, "into a", "into a"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 14091433066300748251, 5574629252352928036, 18446744073709551615, 18446744073709551615, 407, 417, 407, 417, 73, 75, true, "since each", "since each"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206566339127348, 15152243859443904216, 18446744073709551615, 18446744073709551615, 454, 460, 454, 460, 80, 82, true, "on the", "on the"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541485670, 13616139037782441930, 18446744073709551615, 18446744073709551615, 539, 541, 539, 541, 96, 97, true, "of", "of"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 8106397727991264470, 7248749092306664141, 18446744073709551615, 18446744073709551615, 597, 604, 597, 604, 110, 112, true, "for the", "for the"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541485865, 13616139237691872940, 18446744073709551615, 18446744073709551615, 211, 213, 211, 213, 41, 42, true, "to", "to"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541485865, 13616139237691874671, 18446744073709551615, 18446744073709551615, 232, 234, 232, 234, 45, 46, true, "to", "to"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 15441160910541485865, 13616139237691883462, 18446744073709551615, 18446744073709551615, 363, 365, 363, 365, 66, 67, true, "to", "to"], ["conn", "single-conn", 9005324696118733701, "TEXT", "#/texts/76", 1.0, 16381206519425733256, 1744518844831028097, 18446744073709551615, 18446744073709551615, 489, 495, 489, 495, 86, 88, true, "to the", "to the"], ["parenthesis", "round brackets", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 15451245949012109980, 860212891132498684, 18446744073709551615, 18446744073709551615, 105, 118, 105, 118, 16, 20, true, "(or document)", "(or document)"], ["expression", "word-concatenation", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 5470814635586025487, 3023904040799855893, 18446744073709551615, 18446744073709551615, 68, 81, 68, 81, 11, 12, true, "compute-heavy", "compute-heavy"], ["sentence", "", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 8943027620035512136, 10854655135118326590, 18446744073709551615, 18446744073709551615, 31, 125, 31, 125, 6, 22, true, "Whenever possible we parallelise the compute-heavy operations at the page (or document) level.", "Whenever possible we parallelise the compute-heavy operations at the page (or document) level."], ["term", "single-term", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 7803735128811820247, 6440046018987122155, 18446744073709551615, 18446744073709551615, 17, 29, 17, 29, 3, 5, true, "whole corpus", "whole corpus"], ["term", "single-term", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 13988986336887005746, 1446674937315880970, 18446744073709551615, 18446744073709551615, 68, 92, 68, 92, 11, 13, true, "compute-heavy operations", "compute-heavy operations"], ["term", "single-term", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 389609625632301461, 15632188389001375550, 18446744073709551615, 18446744073709551615, 100, 104, 100, 104, 15, 16, true, "page", "page"], ["term", "single-term", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 14650401089286948001, 1809325515137941529, 18446744073709551615, 18446744073709551615, 109, 117, 109, 117, 18, 19, true, "document", "document"], ["term", "single-term", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 329104161602483077, 5312276037637913177, 18446744073709551615, 18446744073709551615, 119, 124, 119, 124, 20, 21, true, "level", "level"], ["verb", "single-verb", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 6167836358624303500, 7818132889864191879, 18446744073709551615, 18446744073709551615, 0, 9, 0, 9, 0, 1, true, "operating", "operating"], ["verb", "single-verb", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 18223316012831076072, 4378757623349607195, 18446744073709551615, 18446744073709551615, 52, 63, 52, 63, 9, 10, true, "parallelise", "parallelise"], ["conn", "single-conn", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 16381206566339127348, 1222781431356980611, 18446744073709551615, 18446744073709551615, 10, 16, 10, 16, 1, 3, true, "on the", "on the"], ["conn", "single-conn", 8082547756621048511, "TEXT", "#/texts/77", 1.0, 16381206568372064271, 9744783902447945030, 18446744073709551615, 18446744073709551615, 93, 99, 93, 99, 13, 15, true, "at the", "at the"], ["numval", "ival", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 17767354399704235156, 7397297711065841756, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "4", "4"], ["parenthesis", "reference", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 12178341415896395057, 17882276138977820280, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 3, true, "(4)", "(4)"], ["parenthesis", "round brackets", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 7182556421351177654, 17408013221930808816, 18446744073709551615, 18446744073709551615, 207, 256, 207, 256, 38, 50, true, "(e. g. the parsed PDF pages, trained models, etc)", "(e. g. the parsed PDF pages, trained models, etc)"], ["parenthesis", "round brackets", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8366781084765568282, 12073061348432203105, 18446744073709551615, 18446744073709551615, 541, 576, 541, 576, 103, 112, true, "(in our case we use MongoDB$^{17}$)", "(in our case we use MongoDB$^{17}$)"], ["expression", "common", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541487324, 14307783780196245817, 18446744073709551615, 18446744073709551615, 208, 213, 208, 213, 39, 40, true, "eg", "e. g."], ["expression", "word-concatenation", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5910674217167684246, 3596748896249425734, 18446744073709551615, 18446744073709551615, 147, 159, 147, 159, 30, 31, true, "object-store", "object-store"], ["expression", "word-concatenation", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5910674217167684246, 3596748896249446260, 18446744073709551615, 18446744073709551615, 333, 345, 333, 345, 64, 65, true, "object-store", "object-store"], ["expression", "word-concatenation", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5910674217167684246, 3596748896249438801, 18446744073709551615, 18446744073709551615, 351, 363, 351, 363, 67, 68, true, "object-store", "object-store"], ["expression", "word-concatenation", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 7393395602818997382, 2445570796174034956, 18446744073709551615, 18446744073709551615, 620, 632, 620, 632, 122, 123, true, "access-layer", "access-layer"], ["expression", "latex", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 329104159258632281, 15385845941904838235, 18446744073709551615, 18446744073709551615, 568, 575, 568, 575, 110, 111, true, "^{17}", "$^{17}$"], ["sentence", "", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 17454558322938465001, 17890104706267422072, 18446744073709551615, 18446744073709551615, 4, 346, 4, 346, 3, 66, true, "A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store.", "A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store."], ["sentence", "", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5622932258824479139, 10516707730337709393, 18446744073709551615, 18446744073709551615, 347, 451, 347, 451, 66, 84, true, "The object-store allows us to easily scale the storage with regard to the number of processed documents.", "The object-store allows us to easily scale the storage with regard to the number of processed documents."], ["sentence", "", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8816129983583997199, 15303863633151315650, 18446744073709551615, 18446744073709551615, 452, 633, 452, 633, 84, 124, true, "However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", "However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer."], ["term", "enum-term-mark-2", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 9482072146671435678, 4852192659277363715, 18446744073709551615, 18446744073709551615, 598, 613, 598, 613, 117, 120, true, "storage and act", "storage and act"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 13382702060117711634, 866988340100178668, 18446744073709551615, 18446744073709551615, 6, 19, 6, 19, 4, 6, true, "storage layer", "storage layer"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 13382702060117711634, 866988340100185003, 18446744073709551615, 18446744073709551615, 97, 110, 97, 110, 20, 22, true, "storage layer", "storage layer"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 2903324788977241891, 13900858649375924507, 18446744073709551615, 18446744073709551615, 225, 234, 225, 234, 42, 44, true, "PDF pages", "PDF pages"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 9804322186740216471, 16267225850960798578, 18446744073709551615, 18446744073709551615, 236, 250, 236, 250, 45, 47, true, "trained models", "trained models"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 1944794866286482065, 4090230587991277542, 18446744073709551615, 18446744073709551615, 263, 287, 263, 287, 52, 55, true, "queryable NoSQL database", "queryable NoSQL database"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16772942504422841315, 12153285632385192646, 18446744073709551615, 18446744073709551615, 526, 540, 526, 540, 101, 103, true, "NoSQL database", "NoSQL database"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206578935372333, 11959326053889459522, 18446744073709551615, 18446744073709551615, 25, 31, 25, 31, 7, 8, true, "stores", "stores"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 6167933651658664291, 16849655876428761988, 18446744073709551615, 18446744073709551615, 36, 45, 36, 45, 9, 10, true, "documents", "documents"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8106478445190161533, 10724655109163266628, 18446744073709551615, 18446744073709551615, 61, 68, 61, 68, 14, 15, true, "results", "results"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 990358581043194791, 9157342138188045037, 18446744073709551615, 18446744073709551615, 78, 91, 78, 91, 17, 18, true, "microservices", "microservices"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 14635102416861801722, 13921851080814198183, 18446744073709551615, 18446744073709551615, 134, 142, 134, 142, 27, 28, true, "services", "services"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5910674217167684246, 3596748896249425734, 18446744073709551615, 18446744073709551615, 147, 159, 147, 159, 30, 31, true, "object-store", "object-store"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206578935372333, 11959326053892662509, 18446744073709551615, 18446744073709551615, 165, 171, 165, 171, 32, 33, true, "stores", "stores"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 6167933651658664291, 16849655876428781193, 18446744073709551615, 18446744073709551615, 176, 185, 176, 185, 34, 35, true, "documents", "documents"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206578939110576, 11959779027228109556, 18446744073709551615, 18446744073709551615, 200, 206, 200, 206, 37, 38, true, "stages", "stages"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 14638347573453462708, 11572179125786979791, 18446744073709551615, 18446744073709551615, 304, 312, 304, 312, 58, 59, true, "metadata", "metadata"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 389609625697824016, 10086809110828577778, 18446744073709551615, 18446744073709551615, 321, 325, 321, 325, 61, 62, true, "file", "file"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5910674217167684246, 3596748896249446260, 18446744073709551615, 18446744073709551615, 333, 345, 333, 345, 64, 65, true, "object-store", "object-store"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 5910674217167684246, 3596748896249438801, 18446744073709551615, 18446744073709551615, 351, 363, 351, 363, 67, 68, true, "object-store", "object-store"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8106478700889254291, 5556909971365278069, 18446744073709551615, 18446744073709551615, 394, 401, 394, 401, 74, 75, true, "storage", "storage"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206521526353544, 15388633276980566672, 18446744073709551615, 18446744073709551615, 407, 413, 407, 413, 76, 77, true, "regard", "regard"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206574973295053, 13874751204703215888, 18446744073709551615, 18446744073709551615, 421, 427, 421, 427, 79, 80, true, "number", "number"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 6167933651658664291, 16849655876428863113, 18446744073709551615, 18446744073709551615, 441, 450, 441, 450, 82, 83, true, "documents", "documents"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 389609625695123443, 10086936390401945955, 18446744073709551615, 18446744073709551615, 549, 553, 549, 553, 106, 107, true, "case", "case"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8106471292843117687, 16633247643408803384, 18446744073709551615, 18446744073709551615, 561, 568, 561, 568, 109, 110, true, "MongoDB", "MongoDB"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 12178341415895527965, 17882005209631256296, 18446744073709551615, 18446744073709551615, 580, 583, 580, 583, 113, 114, true, "top", "top"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8106478700889254291, 5556909971365264072, 18446744073709551615, 18446744073709551615, 598, 605, 598, 605, 117, 118, true, "storage", "storage"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 12178341415895571467, 17882001345722866996, 18446744073709551615, 18446744073709551615, 610, 613, 610, 613, 119, 120, true, "act", "act"], ["term", "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 7393395602818997382, 2445570796174034956, 18446744073709551615, 18446744073709551615, 620, 632, 620, 632, 122, 123, true, "access-layer", "access-layer"], ["verb", "compound-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 9001167546411496730, 5107946166734090998, 18446744073709551615, 18446744073709551615, 111, 122, 111, 122, 22, 24, true, "is composed", "is composed"], ["verb", "compound-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 4557608131655756693, 10894415445021592676, 18446744073709551615, 18446744073709551615, 464, 502, 464, 502, 87, 94, true, "is not build to be queried efficiently", "is not build to be queried efficiently"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 6171728176299542016, 16532079825940175611, 18446744073709551615, 18446744073709551615, 190, 199, 190, 199, 36, 37, true, "processed", "processed"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206517379850387, 15480389052081010479, 18446744073709551615, 18446744073709551615, 218, 224, 218, 224, 41, 42, true, "parsed", "parsed"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206578935372333, 11959326053892654989, 18446744073709551615, 18446744073709551615, 293, 299, 293, 299, 56, 57, true, "stores", "stores"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206569317834029, 11936595447040454128, 18446744073709551615, 18446744073709551615, 364, 370, 364, 370, 68, 69, true, "allows", "allows"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 329104161785194305, 774090374362380612, 18446744073709551615, 18446744073709551615, 384, 389, 384, 389, 72, 73, true, "scale", "scale"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 6171728176299542016, 16532079825940159645, 18446744073709551615, 18446744073709551615, 431, 440, 431, 440, 81, 82, true, "processed", "processed"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541486535, 14307783832258935505, 18446744073709551615, 18446744073709551615, 510, 512, 510, 512, 96, 97, true, "is", "is"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 12178341415895640485, 17882295449136083937, 18446744073709551615, 18446744073709551615, 520, 523, 520, 523, 99, 100, true, "put", "put"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 12178341415895516060, 17882004701528519561, 18446744073709551615, 18446744073709551615, 557, 560, 557, 560, 108, 109, true, "use", "use"], ["verb", "single-verb", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206594265787492, 12314552731610003625, 18446744073709551615, 18446744073709551615, 587, 593, 587, 593, 115, 116, true, "manage", "manage"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 389609625631229034, 10076688135462708121, 18446744073709551615, 18446744073709551615, 20, 24, 20, 24, 6, 7, true, "that", "that"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206568455155979, 11882547791157225115, 18446744073709551615, 18446744073709551615, 54, 60, 54, 60, 12, 14, true, "as the", "as the"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 14637917359887717745, 15358780278905995948, 18446744073709551615, 18446744073709551615, 69, 77, 69, 77, 15, 17, true, "from the", "from the"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 12178341415895623120, 17882259379782471845, 18446744073709551615, 18446744073709551615, 123, 126, 123, 126, 24, 25, true, "out", "out"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541485670, 14307783790885956650, 18446744073709551615, 18446744073709551615, 127, 129, 127, 129, 25, 26, true, "of", "of"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 389609625631229034, 10076688135462665568, 18446744073709551615, 18446744073709551615, 160, 164, 160, 164, 31, 32, true, "that", "that"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206564601699726, 11976296382474591180, 18446744073709551615, 18446744073709551615, 208, 217, 208, 217, 39, 41, true, "eg the", "e. g. the"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 8106342927225405366, 3870861981915582763, 18446744073709551615, 18446744073709551615, 313, 320, 313, 320, 59, 61, true, "of each", "of each"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206560518651853, 14827520425559992801, 18446744073709551615, 18446744073709551615, 326, 332, 326, 332, 62, 64, true, "in the", "in the"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 389609625618037948, 10078261007606591819, 18446744073709551615, 18446744073709551615, 402, 406, 402, 406, 75, 76, true, "with", "with"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541485670, 14307783790886352415, 18446744073709551615, 18446744073709551615, 428, 430, 428, 430, 80, 81, true, "of", "of"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541486538, 14307783830786016801, 18446744073709551615, 18446744073709551615, 542, 544, 542, 544, 104, 105, true, "in", "in"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541485678, 14307783792085522482, 18446744073709551615, 18446744073709551615, 577, 579, 577, 579, 112, 113, true, "on", "on"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 329104159171729452, 15392320057005504366, 18446744073709551615, 18446744073709551615, 614, 619, 614, 619, 120, 122, true, "as an", "as an"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541485865, 14307783789814212187, 18446744073709551615, 18446744073709551615, 374, 376, 374, 376, 70, 71, true, "to", "to"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 16381206519425733256, 15635996210936257264, 18446744073709551615, 18446744073709551615, 414, 420, 414, 420, 77, 79, true, "to the", "to the"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541485865, 14307783789814206195, 18446744073709551615, 18446744073709551615, 477, 479, 477, 479, 90, 91, true, "to", "to"], ["conn", "single-conn", 7791113385466815951, "TEXT", "#/texts/78", 1.0, 15441160910541485865, 14307783789814213462, 18446744073709551615, 18446744073709551615, 584, 586, 584, 586, 114, 115, true, "to", "to"], ["expression", "common", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15441160910541486545, 3028421707917465902, 18446744073709551615, 18446744073709551615, 69, 73, 69, 73, 13, 14, true, "ie", "i.e."], ["expression", "common", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 12178341415895450733, 6765936968254123994, 18446744073709551615, 18446744073709551615, 561, 565, 561, 565, 98, 99, true, "etc", "etc."], ["expression", "apostrophe", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 389609625696231302, 874520044884738072, 18446744073709551615, 18446744073709551615, 79, 84, 79, 84, 15, 16, true, "dont", "don't"], ["expression", "word-concatenation", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 5044385734724420019, 15039915568025682583, 18446744073709551615, 18446744073709551615, 207, 223, 207, 223, 40, 41, true, "state-of-the-art", "state-of-the-art"], ["expression", "word-concatenation", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15169931585135175826, 5000232017329418031, 18446744073709551615, 18446744073709551615, 296, 307, 296, 307, 57, 58, true, "cloud-based", "cloud-based"], ["expression", "word-concatenation", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 17036338369050073511, 11876619048247685695, 18446744073709551615, 18446744073709551615, 517, 529, 517, 529, 92, 93, true, "data-at-rest", "data-at-rest"], ["sentence", "", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 6380046225039059930, 1331102669406003609, 18446744073709551615, 18446744073709551615, 0, 125, 0, 125, 0, 26, true, "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it.", "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it."], ["sentence", "", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 17685124856943080749, 17041292620595508182, 18446744073709551615, 18446744073709551615, 126, 287, 126, 287, 26, 55, true, "This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ.", "This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ."], ["term", "enum-term-mark-4", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 6417746280621449074, 4697720481231698323, 18446744073709551615, 18446744073709551615, 259, 286, 259, 286, 49, 54, true, "MongoDB, Redis and RabbitMQ", "MongoDB, Redis and RabbitMQ"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 5470814617574924291, 6460878720216756235, 18446744073709551615, 18446744073709551615, 40, 53, 40, 53, 8, 10, true, "compute layer", "compute layer"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 1269674564249719737, 10143729425890314060, 18446744073709551615, 18446744073709551615, 154, 174, 154, 174, 32, 34, true, "additional stability", "additional stability"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 1142162931543826722, 12325457556378121938, 18446744073709551615, 18446744073709551615, 179, 199, 179, 199, 35, 38, true, "data safety concerns", "data safety concerns"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 18398403256162540896, 17634138153411653985, 18446744073709551615, 18446744073709551615, 207, 229, 207, 229, 40, 42, true, "state-of-the-art tools", "state-of-the-art tools"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 12206009578906402256, 16259073236375936761, 18446744073709551615, 18446744073709551615, 296, 316, 296, 316, 57, 59, true, "cloud-based platform", "cloud-based platform"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 5741362909955913015, 11730433319097946883, 18446744073709551615, 18446744073709551615, 348, 363, 348, 363, 65, 67, true, "software assets", "software assets"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 3042489792150624438, 10666821026275440814, 18446744073709551615, 18446744073709551615, 388, 403, 388, 403, 72, 74, true, "main deployment", "main deployment"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 12042865047034155865, 13701796757400581729, 18446744073709551615, 18446744073709551615, 424, 452, 424, 452, 79, 82, true, "specialised vendors services", "specialised vendors services"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 3253150599721255436, 3115708939969640776, 18446744073709551615, 18446744073709551615, 480, 508, 480, 508, 87, 90, true, "latest industry requirements", "latest industry requirements"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 12361947180622931307, 6107159509510404051, 18446744073709551615, 18446744073709551615, 517, 540, 517, 540, 92, 94, true, "data-at-rest encryption", "data-at-rest encryption"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 13310381990336316505, 1214145885617522065, 18446744073709551615, 18446744073709551615, 542, 559, 542, 559, 95, 97, true, "high availability", "high availability"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 16381206568241679420, 4060891951880802543, 18446744073709551615, 18446744073709551615, 3, 9, 3, 9, 1, 2, true, "design", "design"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 990358581043194791, 13959011607976637903, 18446744073709551615, 18446744073709551615, 19, 32, 19, 32, 5, 6, true, "microservices", "microservices"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 389609625696431489, 874321965181516179, 18446744073709551615, 18446744073709551615, 96, 100, 96, 100, 18, 19, true, "data", "data"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 8106471292843117687, 12637598480251227160, 18446744073709551615, 18446744073709551615, 259, 266, 259, 266, 49, 50, true, "MongoDB", "MongoDB"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 329104162172852560, 4837073618500726237, 18446744073709551615, 18446744073709551615, 268, 273, 268, 273, 51, 52, true, "Redis", "Redis"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 14650252519075350211, 3711332974386502064, 18446744073709551615, 18446744073709551615, 278, 286, 278, 286, 53, 54, true, "RabbitMQ", "RabbitMQ"], ["term", "single-term", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 14635106751859230946, 3279977075117953545, 18446744073709551615, 18446744073709551615, 322, 330, 322, 330, 61, 62, true, "solution", "solution"], ["verb", "compound-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 2116256630331469530, 14901345236443982027, 18446744073709551615, 18446744073709551615, 238, 249, 238, 249, 44, 46, true, "have chosen", "have chosen"], ["verb", "compound-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 17302021957935714782, 6387910212371789810, 18446744073709551615, 18446744073709551615, 367, 378, 367, 378, 68, 70, true, "be detached", "be detached"], ["verb", "compound-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 5947520785570047620, 9780743873997466930, 18446744073709551615, 18446744073709551615, 411, 420, 411, 420, 76, 78, true, "be served", "be served"], ["verb", "compound-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 13934224220978156333, 2821183657219865328, 18446744073709551615, 18446744073709551615, 459, 475, 459, 475, 83, 86, true, "are certified to", "are certified to"], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 12178341415895564896, 6765880715380529817, 18446744073709551615, 18446744073709551615, 54, 57, 54, 57, 10, 11, true, "are", "are"], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15441160910541486545, 3028421707917465902, 18446744073709551615, 18446744073709551615, 69, 73, 69, 73, 13, 14, true, "ie", "i.e."], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 16381206594265787492, 6901918250154643292, 18446744073709551615, 18446744073709551615, 85, 91, 85, 91, 16, 17, true, "manage", "manage"], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 8106342542940968443, 7576392723763470277, 18446744073709551615, 18446744073709551615, 111, 118, 111, 118, 22, 23, true, "operate", "operate"], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 16381206569317834029, 4041469951276441022, 18446744073709551615, 18446744073709551615, 131, 137, 131, 137, 27, 28, true, "allows", "allows"], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 329104159241711190, 4780637315619849705, 18446744073709551615, 18446744073709551615, 144, 149, 144, 149, 30, 31, true, "trust", "trust"], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 329104162060230051, 16723674555525513210, 18446744073709551615, 18446744073709551615, 288, 293, 288, 293, 55, 56, true, "Being", "Being"], ["verb", "single-verb", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 16381206569317834029, 4041469951276436768, 18446744073709551615, 18446744073709551615, 331, 337, 331, 337, 62, 63, true, "allows", "allows"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 8106478685702231057, 6290141210502214270, 18446744073709551615, 18446744073709551615, 251, 258, 251, 258, 47, 49, true, "such as", "such as"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 8106478685702231057, 6290141210502197413, 18446744073709551615, 18446744073709551615, 509, 516, 509, 516, 90, 92, true, "such as", "such as"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15441160910541480853, 3028421248297502894, 18446744073709551615, 18446744073709551615, 0, 2, 0, 2, 0, 1, true, "By", "By"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 16381206560518651853, 5658730714977917939, 18446744073709551615, 18446744073709551615, 33, 39, 33, 39, 6, 8, true, "in the", "in the"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15441160910541485678, 3028421580747422282, 18446744073709551615, 18446744073709551615, 119, 121, 119, 121, 23, 24, true, "on", "on"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 389609625631229034, 1006213453265744340, 18446744073709551615, 18446744073709551615, 230, 234, 230, 234, 42, 43, true, "that", "that"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 6187534615926030665, 2333941156720616523, 18446744073709551615, 18446744073709551615, 338, 347, 338, 347, 63, 65, true, "for these", "for these"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 14637917359887717745, 7863405527781876570, 18446744073709551615, 18446744073709551615, 379, 387, 379, 387, 70, 72, true, "from the", "from the"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15441160910541486989, 3028421693438408488, 18446744073709551615, 18446744073709551615, 421, 423, 421, 423, 78, 79, true, "by", "by"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15441160910541485865, 3028421580029648804, 18446744073709551615, 18446744073709551615, 141, 143, 141, 143, 29, 30, true, "to", "to"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 16381206519425733256, 14507313859404429169, 18446744073709551615, 18446744073709551615, 200, 206, 200, 206, 38, 40, true, "to the", "to the"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15441160910541485865, 3028421580029650856, 18446744073709551615, 18446744073709551615, 364, 366, 364, 366, 67, 68, true, "to", "to"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 15441160910541485865, 3028421580029649693, 18446744073709551615, 18446744073709551615, 408, 410, 408, 410, 75, 76, true, "to", "to"], ["conn", "single-conn", 2845012065511066307, "TEXT", "#/texts/79", 1.0, 16381206519425733256, 14507313859404314997, 18446744073709551615, 18446744073709551615, 473, 479, 473, 479, 85, 87, true, "to the", "to the"], ["numval", "ival", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 17767354399704235158, 2662324577726766030, 18446744073709551615, 18446744073709551615, 132, 133, 132, 133, 25, 26, true, "6", "6"], ["parenthesis", "round brackets", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 14654063839594813536, 538351782817809335, 18446744073709551615, 18446744073709551615, 126, 134, 126, 134, 22, 27, true, "(Fig. 6)", "(Fig. 6)"], ["expression", "common", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541487324, 13392634893759554933, 18446744073709551615, 18446744073709551615, 302, 307, 302, 307, 55, 56, true, "eg", "e. g."], ["expression", "apostrophe", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 329104162099298038, 4763741573118152283, 18446744073709551615, 18446744073709551615, 432, 438, 432, 438, 74, 75, true, "didnt", "didn't"], ["expression", "word-concatenation", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 12953096966692611490, 12141023217086338960, 18446744073709551615, 18446744073709551615, 416, 431, 416, 431, 73, 74, true, "result-backends", "result-backends"], ["expression", "word-concatenation", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15312996304332666827, 6035818222047083309, 18446744073709551615, 18446744073709551615, 449, 462, 449, 462, 77, 78, true, "auto-cleaning", "auto-cleaning"], ["sentence", "", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 530368145582943314, 13295761460699684392, 18446744073709551615, 18446744073709551615, 0, 109, 0, 109, 0, 19, true, "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform.", "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform."], ["sentence", "", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 13690589465324431830, 729303492509750058, 18446744073709551615, 18446744073709551615, 110, 243, 110, 243, 19, 46, true, "From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services.", "From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services."], ["sentence", "", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 3799550530227447837, 10235095163850190658, 18446744073709551615, 18446744073709551615, 244, 397, 244, 397, 46, 70, true, "During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks.", "During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks."], ["term", "enum-term-mark-2", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 17670119798254759554, 11044523659752658873, 18446744073709551615, 18446744073709551615, 362, 384, 362, 384, 65, 68, true, "performance or scaling", "performance or scaling"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 14228775800347505852, 1270934594966554784, 18446744073709551615, 18446744073709551615, 40, 52, 40, 52, 8, 10, true, "crucial role", "crucial role"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 7308464677014704448, 5082168475013008068, 18446744073709551615, 18446744073709551615, 71, 91, 71, 91, 13, 15, true, "scaling requirements", "scaling requirements"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 5470814617574924291, 14078314495213552738, 18446744073709551615, 18446744073709551615, 157, 170, 157, 170, 33, 35, true, "compute layer", "compute layer"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 2732848371272418679, 13963794921672736533, 18446744073709551615, 18446744073709551615, 177, 196, 177, 196, 37, 39, true, "considerable amount", "considerable amount"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 9137804915913150128, 4468637458257639239, 18446744073709551615, 18446744073709551615, 225, 242, 225, 242, 43, 45, true, "external services", "external services"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 2183649553633451561, 2020007629353495081, 18446744073709551615, 18446744073709551615, 280, 296, 280, 296, 51, 53, true, "multiple options", "multiple options"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 8453966769027728994, 16465619217972769818, 18446744073709551615, 18446744073709551615, 351, 373, 351, 373, 64, 66, true, "inadequate performance", "inadequate performance"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 5368659910297958112, 6849806055120467904, 18446744073709551615, 18446744073709551615, 377, 396, 377, 396, 67, 69, true, "scaling bottlenecks", "scaling bottlenecks"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15755629669707778659, 16473859664425870651, 18446744073709551615, 18446744073709551615, 402, 438, 402, 438, 71, 75, true, "example other result-backends didnt", "example other result-backends didn't"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 17990474265978324021, 4558094210649300135, 18446744073709551615, 18446744073709551615, 449, 476, 449, 476, 77, 79, true, "auto-cleaning functionality", "auto-cleaning functionality"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 17678246672778617788, 10086356137613940726, 18446744073709551615, 18446744073709551615, 519, 534, 519, 534, 88, 90, true, "custom solution", "custom solution"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 9431890275510275757, 3557840365711048098, 18446744073709551615, 18446744073709551615, 558, 572, 558, 572, 94, 96, true, "object storage", "object storage"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 3103749419018501051, 16238812461129121885, 18446744073709551615, 18446744073709551615, 587, 602, 587, 602, 99, 101, true, "other solutions", "other solutions"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 16381206532620919857, 14992785282357168667, 18446744073709551615, 18446744073709551615, 4, 10, 4, 10, 1, 2, true, "choice", "choice"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 14635102416861801722, 6944138304193469372, 18446744073709551615, 18446744073709551615, 18, 26, 18, 26, 4, 5, true, "services", "services"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 14814125365076808131, 259608735306547128, 18446744073709551615, 18446744073709551615, 100, 108, 100, 108, 17, 18, true, "platform", "platform"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 16381206578503830159, 5707938631766375304, 18446744073709551615, 18446744073709551615, 119, 125, 119, 125, 21, 22, true, "sketch", "sketch"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 12178341415896108354, 15797295600457378135, 18446744073709551615, 18446744073709551615, 127, 130, 127, 130, 23, 24, true, "Fig", "Fig"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 10844940863803374990, 9655000055558243874, 18446744073709551615, 18446744073709551615, 200, 213, 200, 213, 40, 41, true, "communication", "communication"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 1525875096007260836, 12799154604316470877, 18446744073709551615, 18446744073709551615, 255, 266, 255, 266, 48, 49, true, "development", "development"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 14635102416861801722, 6944138304193064763, 18446744073709551615, 18446744073709551615, 331, 339, 331, 339, 61, 62, true, "services", "services"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 329104162172852560, 4801873179480423203, 18446744073709551615, 18446744073709551615, 488, 493, 488, 493, 81, 82, true, "Redis", "Redis"], ["term", "single-term", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 8106471292843117687, 4264742177449418379, 18446744073709551615, 18446744073709551615, 542, 549, 542, 549, 91, 92, true, "MongoDB", "MongoDB"], ["verb", "compound-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 13963960872604267983, 9741187444941953278, 18446744073709551615, 18446744073709551615, 27, 37, 27, 37, 5, 7, true, "plays also", "plays also"], ["verb", "compound-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 3304740881499173399, 16233133484570305536, 18446744073709551615, 18446744073709551615, 311, 325, 311, 325, 57, 60, true, "had to replace", "had to replace"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15360283586477443351, 2607233536085319416, 18446744073709551615, 18446744073709551615, 56, 66, 56, 66, 11, 12, true, "addressing", "addressing"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541486535, 13392635867599855620, 18446744073709551615, 18446744073709551615, 139, 141, 139, 141, 29, 30, true, "is", "is"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 12178341415895601584, 15797287691917345479, 18446744073709551615, 18446744073709551615, 171, 174, 171, 174, 35, 36, true, "has", "has"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 6172092587891830137, 6944235926622048042, 18446744073709551615, 18446744073709551615, 270, 279, 270, 279, 50, 51, true, "evaluated", "evaluated"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541487324, 13392634893759554933, 18446744073709551615, 18446744073709551615, 302, 307, 302, 307, 55, 56, true, "eg", "e. g."], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 329104161714029917, 4812815912515288423, 18446744073709551615, 18446744073709551615, 439, 444, 439, 444, 75, 76, true, "offer", "offer"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 8106342926111180787, 3313437457918926336, 18446744073709551615, 18446744073709551615, 477, 484, 477, 484, 79, 80, true, "offered", "offered"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 16381206512275563974, 17971932688596796644, 18446744073709551615, 18446744073709551615, 506, 512, 506, 512, 85, 86, true, "opting", "opting"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 16381206594558792283, 4831452492039043525, 18446744073709551615, 18446744073709551615, 535, 541, 535, 541, 90, 91, true, "mixing", "mixing"], ["verb", "single-verb", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 6172092587891830137, 6944235926622064431, 18446744073709551615, 18446744073709551615, 577, 586, 577, 586, 98, 99, true, "evaluated", "evaluated"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 2617690495147367356, 3995802380905838725, 18446744073709551615, 18446744073709551615, 142, 152, 142, 152, 30, 32, true, "clear that", "clear that"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 16381206565712212855, 5357051908763334798, 18446744073709551615, 18446744073709551615, 11, 17, 11, 17, 2, 4, true, "of the", "of the"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541486538, 13392635867609642731, 18446744073709551615, 18446744073709551615, 53, 55, 53, 55, 10, 11, true, "in", "in"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 8106397727991264470, 7534731816831827800, 18446744073709551615, 18446744073709551615, 92, 99, 92, 99, 15, 17, true, "for the", "for the"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 14652309564084901216, 17139314077627797878, 18446744073709551615, 18446744073709551615, 110, 118, 110, 118, 19, 21, true, "From the", "From the"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541485670, 13392635753038274381, 18446744073709551615, 18446744073709551615, 197, 199, 197, 199, 39, 40, true, "of", "of"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 8601401817206609046, 13002288130139499420, 18446744073709551615, 18446744073709551615, 214, 224, 214, 224, 41, 43, true, "with these", "with these"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 1703385011780833119, 14455781933540325166, 18446744073709551615, 18446744073709551615, 244, 254, 244, 254, 46, 48, true, "During the", "During the"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 8106397858129277841, 5242979235823403228, 18446744073709551615, 18446744073709551615, 340, 347, 340, 347, 62, 63, true, "because", "because"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541485670, 13392635753038231319, 18446744073709551615, 18446744073709551615, 348, 350, 348, 350, 63, 64, true, "of", "of"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 12178341415896108722, 15797298594107170267, 18446744073709551615, 18446744073709551615, 398, 401, 398, 401, 70, 71, true, "For", "For"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541486989, 13392635875319467060, 18446744073709551615, 18446744073709551615, 485, 487, 485, 487, 80, 81, true, "by", "by"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 16381206569837301772, 4752428903634883173, 18446744073709551615, 18446744073709551615, 499, 505, 499, 505, 84, 85, true, "before", "before"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 329104161711024499, 4813260785890145002, 18446744073709551615, 18446744073709551615, 513, 518, 513, 518, 86, 88, true, "for a", "for a"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 8106477988572616406, 8588614024698076211, 18446744073709551615, 18446744073709551615, 550, 557, 550, 557, 92, 94, true, "with an", "with an"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541487053, 13392635876351472074, 18446744073709551615, 18446744073709551615, 603, 605, 603, 605, 101, 102, true, "as", "as"], ["conn", "single-conn", 15072914837937068796, "TEXT", "#/texts/80", 1.0, 15441160910541485865, 13392635755399877181, 18446744073709551615, 18446744073709551615, 315, 317, 315, 317, 58, 59, true, "to", "to"], ["expression", "apostrophe", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 329104162099298038, 2422170512955612338, 18446744073709551615, 18446744073709551615, 27, 33, 27, 33, 6, 7, true, "didnt", "didn't"], ["sentence", "", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 9738445166753142519, 7077409306408156246, 18446744073709551615, 18446744073709551615, 4, 87, 4, 87, 1, 16, true, "GridFS storage, but it didn't fit to the constraints of typical cloud environments.", "GridFS storage, but it didn't fit to the constraints of typical cloud environments."], ["term", "single-term", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 3553616603590296979, 16097117960287067168, 18446744073709551615, 18446744073709551615, 4, 18, 4, 18, 1, 3, true, "GridFS storage", "GridFS storage"], ["term", "single-term", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 3164946639114553222, 7659937814652463492, 18446744073709551615, 18446744073709551615, 60, 86, 60, 86, 12, 15, true, "typical cloud environments", "typical cloud environments"], ["term", "single-term", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 12178341415895625823, 10663577172675311427, 18446744073709551615, 18446744073709551615, 34, 37, 34, 37, 7, 8, true, "fit", "fit"], ["term", "single-term", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 2343820404875251124, 4748486300187076231, 18446744073709551615, 18446744073709551615, 45, 56, 45, 56, 10, 11, true, "constraints", "constraints"], ["verb", "single-verb", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 329104162099298038, 2422170512955612338, 18446744073709551615, 18446744073709551615, 27, 33, 27, 33, 6, 7, true, "didnt", "didn't"], ["conn", "single-conn", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 15441160910541485670, 15469104452822855430, 18446744073709551615, 18446744073709551615, 57, 59, 57, 59, 11, 12, true, "of", "of"], ["conn", "single-conn", 15263283599394646155, "TEXT", "#/texts/81", 1.0, 16381206519425733256, 10289373630862252080, 18446744073709551615, 18446744073709551615, 38, 44, 38, 44, 8, 10, true, "to the", "to the"], ["numval", "fval", 11417717357379295278, "TEXT", "#/texts/82", 1.0, 12178341415896306586, 2376192024093454144, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "4.2", "4.2"], ["numval", "ival", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15441160910541481862, 10500741044532715512, 18446744073709551615, 18446744073709551615, 50, 52, 50, 52, 7, 8, true, "18", "18"], ["numval", "ival", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15441160910541481863, 10500741044517231196, 18446744073709551615, 18446744073709551615, 155, 157, 155, 157, 24, 25, true, "19", "19"], ["expression", "common", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15441160910541487324, 10500757812195718645, 18446744073709551615, 18446744073709551615, 121, 126, 121, 126, 18, 19, true, "eg", "e. g."], ["expression", "word-concatenation", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 14042857724397157868, 17436499209420645038, 18446744073709551615, 18446744073709551615, 95, 105, 95, 105, 15, 16, true, "on-premise", "on-premise"], ["sentence", "", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 16473487772931696221, 1361496787505182232, 18446744073709551615, 18446744073709551615, 0, 171, 0, 171, 0, 27, true, "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution.", "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution."], ["sentence", "", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 13604474430867440219, 15920079442920273776, 18446744073709551615, 18446744073709551615, 172, 302, 172, 302, 27, 48, true, "Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", "Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints."], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 4315218641775224883, 3783623336096074444, 18446744073709551615, 18446744073709551615, 30, 49, 30, 49, 5, 7, true, "Kubernetes clusters", "Kubernetes clusters"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 7578678502347528407, 16606690075113593003, 18446744073709551615, 18446744073709551615, 66, 86, 66, 86, 10, 13, true, "many cloud providers", "many cloud providers"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 17157390005033639285, 14551521890127263578, 18446744073709551615, 18446744073709551615, 95, 119, 95, 119, 15, 17, true, "on-premise installations", "on-premise installations"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15250872047548077430, 7534455339628786157, 18446744073709551615, 18446744073709551615, 137, 154, 137, 154, 21, 24, true, "IBM Cloud Private", "IBM Cloud Private"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 17140401278227586491, 11321802952630178709, 18446744073709551615, 18446744073709551615, 207, 223, 207, 223, 33, 35, true, "storage services", "storage services"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 4047423525975715659, 7947778581648084546, 18446744073709551615, 18446744073709551615, 248, 260, 248, 260, 39, 41, true, "same cluster", "same cluster"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 14814125365076808131, 2443639570324462603, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 1, 2, true, "platform", "platform"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15441160910541487324, 10500757812195718645, 18446744073709551615, 18446744073709551615, 121, 126, 121, 126, 18, 19, true, "eg", "e. g."], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 16659280385198228594, 13641186927945667101, 18446744073709551615, 18446744073709551615, 158, 170, 158, 170, 25, 26, true, "distribution", "distribution"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 13240311013633905449, 2445508371176550978, 18446744073709551615, 18446744073709551615, 189, 201, 189, 201, 30, 31, true, "requirements", "requirements"], ["term", "single-term", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 6165987386346442673, 17011861032528540321, 18446744073709551615, 18446744073709551615, 292, 301, 292, 301, 46, 47, true, "endpoints", "endpoints"], ["verb", "compound-verb", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 12677136892665844646, 2032089139232006155, 18446744073709551615, 18446744073709551615, 224, 236, 224, 236, 35, 37, true, "are launched", "are launched"], ["verb", "compound-verb", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 12855573301475655422, 1573892996858218554, 18446744073709551615, 18446744073709551615, 264, 291, 264, 291, 42, 46, true, "linked to externally hosted", "linked to externally hosted"], ["verb", "single-verb", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15441160910541486535, 10500757786703375297, 18446744073709551615, 18446744073709551615, 13, 15, 13, 15, 2, 3, true, "is", "is"], ["verb", "single-verb", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 329104159157798023, 6671752901319384085, 18446744073709551615, 18446744073709551615, 127, 132, 127, 132, 19, 20, true, "using", "using"], ["verb", "single-verb", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 2906423210345501303, 16309307450075852923, 18446744073709551615, 18446744073709551615, 172, 181, 172, 181, 27, 28, true, "Depending", "Depending"], ["conn", "single-conn", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 3013597407861734098, 13645835485872550225, 18446744073709551615, 18446744073709551615, 16, 29, 16, 29, 3, 5, true, "deployable on", "deployable on"], ["conn", "single-conn", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15601168207941439665, 15242156125190384917, 18446744073709551615, 18446744073709551615, 53, 65, 53, 65, 8, 10, true, "available on", "available on"], ["conn", "single-conn", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 16381206566339127348, 12939125612892018463, 18446744073709551615, 18446744073709551615, 182, 188, 182, 188, 28, 30, true, "on the", "on the"], ["conn", "single-conn", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 5386255170026914598, 10161453367619815898, 18446744073709551615, 18446744073709551615, 237, 247, 237, 247, 37, 39, true, "inside the", "inside the"], ["conn", "single-conn", 9031137420247852045, "TEXT", "#/texts/83", 1.0, 15441160910541485865, 10500757793681888901, 18446744073709551615, 18446744073709551615, 271, 273, 271, 273, 43, 44, true, "to", "to"], ["expression", "word-concatenation", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 14352418754681794071, 3129213562618289639, 18446744073709551615, 18446744073709551615, 192, 212, 192, 212, 35, 36, true, "parsing-microservice", "parsing-microservice"], ["sentence", "", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 13622028562599160608, 11838629416984110843, 18446744073709551615, 18446744073709551615, 0, 76, 0, 76, 0, 14, true, "The common parts of all deployments are the interface and the compute layer.", "The common parts of all deployments are the interface and the compute layer."], ["sentence", "", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 8982322851077994049, 14574053650340581887, 18446744073709551615, 18446744073709551615, 77, 173, 77, 173, 14, 31, true, "The compute layer is designed for dynamically adapt the number of resources on the current load.", "The compute layer is designed for dynamically adapt the number of resources on the current load."], ["sentence", "", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 8231516465306254293, 5376868082789191066, 18446744073709551615, 18446744073709551615, 174, 445, 174, 445, 31, 77, true, "For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents.", "For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents."], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 4575700335406167488, 1041889512884127769, 18446744073709551615, 18446744073709551615, 4, 16, 4, 16, 1, 3, true, "common parts", "common parts"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 5470814617574924291, 1119950227308354530, 18446744073709551615, 18446744073709551615, 62, 75, 62, 75, 11, 13, true, "compute layer", "compute layer"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 5470814617574924291, 1119950227308355535, 18446744073709551615, 18446744073709551615, 81, 94, 81, 94, 15, 17, true, "compute layer", "compute layer"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 5679217233562387039, 10306293013634943918, 18446744073709551615, 18446744073709551615, 160, 172, 160, 172, 28, 30, true, "current load", "current load"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 7165121732645597150, 17919093041593160462, 18446744073709551615, 18446744073709551615, 192, 222, 192, 222, 35, 37, true, "parsing-microservice instances", "parsing-microservice instances"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 11579811611053762862, 5792740568999225626, 18446744073709551615, 18446744073709551615, 247, 261, 247, 261, 42, 44, true, "large document", "large document"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 5574297910769420540, 7415408366124113138, 18446744073709551615, 18446744073709551615, 374, 390, 374, 390, 66, 68, true, "other components", "other components"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 1526165531385019099, 3702094867294947886, 18446744073709551615, 18446744073709551615, 24, 35, 24, 35, 5, 6, true, "deployments", "deployments"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 6182600923960960908, 8662044929949820827, 18446744073709551615, 18446744073709551615, 44, 53, 44, 53, 8, 9, true, "interface", "interface"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 16381206574973295053, 6957832894321474609, 18446744073709551615, 18446744073709551615, 133, 139, 133, 139, 23, 24, true, "number", "number"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 6168338487309432467, 14015407302848006245, 18446744073709551615, 18446744073709551615, 143, 152, 143, 152, 25, 26, true, "resources", "resources"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 8106397496085150773, 1241946393555377686, 18446744073709551615, 18446744073709551615, 178, 185, 178, 185, 32, 33, true, "example", "example"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 12178341415895456504, 15511050211190565407, 18446744073709551615, 18446744073709551615, 320, 323, 320, 323, 54, 55, true, "end", "end"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 389609625631210899, 6431357524637287554, 18446744073709551615, 18446744073709551615, 331, 335, 331, 335, 57, 58, true, "task", "task"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 6168338487309432467, 14015407302847964601, 18446744073709551615, 18446744073709551615, 351, 360, 351, 360, 62, 63, true, "resources", "resources"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 14634153919632515335, 2667013412527336630, 18446744073709551615, 18446744073709551615, 397, 405, 397, 405, 70, 71, true, "training", "training"], ["term", "single-term", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 6167933651658664291, 16134555370198793815, 18446744073709551615, 18446744073709551615, 435, 444, 435, 444, 75, 76, true, "documents", "documents"], ["verb", "compound-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 9165036765200707500, 14015747365861821834, 18446744073709551615, 18446744073709551615, 95, 106, 95, 106, 17, 19, true, "is designed", "is designed"], ["verb", "compound-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 14891459320562646805, 9156075874686645300, 18446744073709551615, 18446744073709551615, 223, 239, 223, 239, 37, 40, true, "could be spawned", "could be spawned"], ["verb", "compound-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 9165313840036679968, 18156907374285878295, 18446744073709551615, 18446744073709551615, 262, 273, 262, 273, 44, 46, true, "is uploaded", "is uploaded"], ["verb", "single-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 12178341415895564896, 15510992411047215180, 18446744073709551615, 18446744073709551615, 36, 39, 36, 39, 6, 7, true, "are", "are"], ["verb", "single-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 329104159173548808, 1957182172439438990, 18446744073709551615, 18446744073709551615, 123, 128, 123, 128, 21, 22, true, "adapt", "adapt"], ["verb", "single-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 16381206579178669319, 6938028188806274364, 18446744073709551615, 18446744073709551615, 301, 307, 301, 307, 50, 51, true, "scaled", "scaled"], ["verb", "single-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 12178341415895564896, 15510992411043340629, 18446744073709551615, 18446744073709551615, 361, 364, 361, 364, 63, 64, true, "are", "are"], ["verb", "single-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 5615554093848987331, 14336271604284028764, 18446744073709551615, 18446744073709551615, 410, 420, 410, 420, 72, 73, true, "assembling", "assembling"], ["verb", "single-verb", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 6171728176299542016, 3957840262356218692, 18446744073709551615, 18446744073709551615, 425, 434, 425, 434, 74, 75, true, "processed", "processed"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 6165459236568015364, 6061326913510427821, 18446744073709551615, 18446744073709551615, 337, 346, 337, 346, 59, 61, true, "such that", "such that"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 14637917385401805410, 5428309875437807875, 18446744073709551615, 18446744073709551615, 365, 373, 365, 373, 64, 66, true, "free for", "free for"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 16381206565712007226, 7724692166486276181, 18446744073709551615, 18446744073709551615, 17, 23, 17, 23, 3, 5, true, "of all", "of all"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 12178341415895625940, 15510989710886502910, 18446744073709551615, 18446744073709551615, 107, 110, 107, 110, 19, 20, true, "for", "for"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 15441160910541485670, 18358916429929728660, 18446744073709551615, 18446744073709551615, 140, 142, 140, 142, 24, 25, true, "of", "of"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 16381206566339127348, 7820316500598827267, 18446744073709551615, 18446744073709551615, 153, 159, 153, 159, 26, 28, true, "on the", "on the"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 12178341415896108722, 15510983418444691685, 18446744073709551615, 18446744073709551615, 174, 177, 174, 177, 31, 32, true, "For", "For"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 16381206568372064271, 10868552521626828999, 18446744073709551615, 18446744073709551615, 313, 319, 313, 319, 52, 54, true, "at the", "at the"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 16381206565712212855, 7724660172286794609, 18446744073709551615, 18446744073709551615, 324, 330, 324, 330, 55, 57, true, "of the", "of the"], ["conn", "single-conn", 18436578077535696718, "TEXT", "#/texts/84", 1.0, 389609625633313393, 6432600486678620238, 18446744073709551615, 18446744073709551615, 392, 396, 392, 396, 69, 70, true, "like", "like"], ["sentence", "", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 9089347946185436978, 12322779939098932937, 18446744073709551615, 18446744073709551615, 0, 223, 0, 223, 0, 34, true, "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements.", "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements."], ["sentence", "", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 5619374035914941215, 13388056321170730470, 18446744073709551615, 18446744073709551615, 224, 307, 224, 307, 34, 47, true, "The parse component is indeed more demanding than the simple annotation components.", "The parse component is indeed more demanding than the simple annotation components."], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 5470814617574924291, 8565468289586536983, 18446744073709551615, 18446744073709551615, 30, 43, 30, 43, 5, 7, true, "compute layer", "compute layer"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 220880112941331342, 10757099033331540513, 18446744073709551615, 18446744073709551615, 69, 85, 69, 85, 11, 13, true, "different queues", "different queues"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 5487575286069153569, 13394910817957544454, 18446744073709551615, 18446744073709551615, 157, 176, 157, 176, 26, 28, true, "different component", "different component"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 8988400645948795194, 16485622837841306210, 18446744073709551615, 18446744073709551615, 196, 222, 196, 222, 31, 33, true, "computational requirements", "computational requirements"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 5037855592482871690, 16562149494420733590, 18446744073709551615, 18446744073709551615, 228, 243, 228, 243, 35, 37, true, "parse component", "parse component"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 1786684417185012154, 17458883524654231766, 18446744073709551615, 18446744073709551615, 278, 306, 278, 306, 43, 46, true, "simple annotation components", "simple annotation components"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 2703018952916355661, 2103701956309679472, 18446744073709551615, 18446744073709551615, 4, 14, 4, 14, 1, 2, true, "components", "components"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 14637917407223052431, 233568237166781340, 18446744073709551615, 18446744073709551615, 116, 124, 116, 124, 20, 21, true, "fraction", "fraction"], ["term", "single-term", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 6168338487309432467, 7308226084005327662, 18446744073709551615, 18446744073709551615, 128, 137, 128, 137, 22, 23, true, "resources", "resources"], ["verb", "compound-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 6181919818947346503, 7425024011998385797, 18446744073709551615, 18446744073709551615, 244, 253, 244, 253, 37, 39, true, "is indeed", "is indeed"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 8106478500389476193, 7333002514628894973, 18446744073709551615, 18446744073709551615, 15, 22, 15, 22, 2, 3, true, "running", "running"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 12178341415895564896, 2069865895983944783, 18446744073709551615, 18446744073709551615, 44, 47, 44, 47, 7, 8, true, "are", "are"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 6167774653473311671, 6290589207758732495, 18446744073709551615, 18446744073709551615, 56, 65, 56, 65, 9, 10, true, "organized", "organized"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 14892592691709012982, 6239765786557836013, 18446744073709551615, 18446744073709551615, 100, 111, 100, 111, 17, 19, true, "can control", "can control"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 5946734708345938643, 10385794168888707641, 18446744073709551615, 18446744073709551615, 138, 147, 138, 147, 23, 24, true, "allocated", "allocated"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 6180152660545840784, 10572035524213656485, 18446744073709551615, 18446744073709551615, 177, 186, 177, 186, 28, 29, true, "depending", "depending"], ["verb", "single-verb", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 6180164155127649426, 12351505444317336995, 18446744073709551615, 18446744073709551615, 259, 268, 259, 268, 40, 41, true, "demanding", "demanding"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 6165459236568015364, 15229237459031979782, 18446744073709551615, 18446744073709551615, 87, 96, 87, 96, 14, 16, true, "such that", "such that"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 16381206560518651853, 15245488941580331570, 18446744073709551615, 18446744073709551615, 23, 29, 23, 29, 3, 5, true, "in the", "in the"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 15441160910541486538, 16667658716295011841, 18446744073709551615, 18446744073709551615, 66, 68, 66, 68, 10, 11, true, "in", "in"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 15441160910541485670, 16667656100672477854, 18446744073709551615, 18446744073709551615, 125, 127, 125, 127, 21, 22, true, "of", "of"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 14637917333167503367, 16189411727226984898, 18446744073709551615, 18446744073709551615, 148, 156, 148, 156, 24, 26, true, "for each", "for each"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 15441160910541485678, 16667656110763672808, 18446744073709551615, 18446744073709551615, 187, 189, 187, 189, 29, 30, true, "on", "on"], ["conn", "single-conn", 11734907767490759865, "TEXT", "#/texts/85", 1.0, 14634130760851708851, 14161918089512738998, 18446744073709551615, 18446744073709551615, 269, 277, 269, 277, 41, 43, true, "than the", "than the"], ["numval", "ival", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 17767354399704235157, 4252787363852102188, 18446744073709551615, 18446744073709551615, 39, 40, 39, 40, 7, 8, true, "5", "5"], ["numval", "ival", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 17767354399704235156, 4252787353929282716, 18446744073709551615, 18446744073709551615, 63, 64, 63, 64, 11, 12, true, "4", "4"], ["numval", "ival", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 17767354399704235152, 4252787363047610230, 18446744073709551615, 18446744073709551615, 79, 80, 79, 80, 15, 16, true, "8", "8"], ["numval", "ival", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 17767354399704235152, 4252787363047621876, 18446744073709551615, 18446744073709551615, 132, 133, 132, 133, 26, 27, true, "8", "8"], ["numval", "ival", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 17767354399704235157, 4252787363852130202, 18446744073709551615, 18446744073709551615, 342, 343, 342, 343, 63, 64, true, "5", "5"], ["sentence", "", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 6583879206628208074, 18118287548868180601, 18446744073709551615, 18446744073709551615, 0, 218, 0, 218, 0, 42, true, "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks.", "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks."], ["sentence", "", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 4563023981242652318, 13772109345187795922, 18446744073709551615, 18446744073709551615, 219, 331, 219, 331, 42, 61, true, "Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment.", "Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment."], ["sentence", "", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 14978678634121360006, 2618788565151427954, 18446744073709551615, 18446744073709551615, 332, 438, 332, 438, 61, 80, true, "Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer.", "Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer."], ["term", "enum-term-mark-2", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 2528135788265244608, 5344652883238296137, 18446744073709551615, 18446744073709551615, 179, 211, 179, 211, 36, 40, true, "learning training and prediction", "learning training and prediction"], ["term", "enum-term-mark-2", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 14743433718696772273, 15154440347503938098, 18446744073709551615, 18446744073709551615, 408, 437, 408, 437, 75, 79, true, "orchestration and store layer", "orchestration and store layer"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 16269569412982647766, 2024757386881102351, 18446744073709551615, 18446744073709551615, 15, 26, 15, 26, 3, 5, true, "main system", "main system"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 13444630328481412471, 2581523576540413652, 18446744073709551615, 18446744073709551615, 41, 57, 41, 57, 8, 10, true, "Kubernetes nodes", "Kubernetes nodes"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 6563416156472488864, 2213523264527249618, 18446744073709551615, 18446744073709551615, 65, 74, 65, 74, 12, 14, true, "CPU cores", "CPU cores"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 16269569728729474655, 9476002452497745608, 18446744073709551615, 18446744073709551615, 87, 98, 87, 98, 18, 20, true, "main memory", "main memory"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 3613081198034507866, 9231793791806678387, 18446744073709551615, 18446744073709551615, 174, 196, 174, 196, 35, 38, true, "deep learning training", "deep learning training"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 11816234786078857760, 13747687809735994367, 18446744073709551615, 18446744073709551615, 201, 217, 201, 217, 39, 41, true, "prediction tasks", "prediction tasks"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 1277611218502696979, 1791114475320884340, 18446744073709551615, 18446744073709551615, 229, 245, 229, 245, 45, 47, true, "flexible binding", "flexible binding"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15130402117130351741, 1356464078922122453, 18446744073709551615, 18446744073709551615, 266, 280, 266, 280, 50, 52, true, "specific nodes", "specific nodes"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 5422119649868232113, 9343245046888260619, 18446744073709551615, 18446744073709551615, 286, 301, 286, 301, 54, 56, true, "great advantage", "great advantage"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 3499436126781074633, 11463332423358967580, 18446744073709551615, 18446744073709551615, 309, 330, 309, 330, 58, 60, true, "Kubernetes deployment", "Kubernetes deployment"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 6641605220774829847, 5925222761573375364, 18446744073709551615, 18446744073709551615, 344, 366, 344, 366, 64, 67, true, "other virtual machines", "other virtual machines"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 3081516039280483029, 12956841069716270529, 18446744073709551615, 18446744073709551615, 426, 437, 426, 437, 77, 79, true, "store layer", "store layer"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541479948, 9352056469740640944, 18446744073709551615, 18446744073709551615, 81, 83, 81, 83, 16, 17, true, "GB", "GB"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 329104162118942300, 7223786837846062912, 18446744073709551615, 18446744073709551615, 126, 131, 126, 131, 25, 26, true, "POWER", "POWER"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 389609625621164460, 9971368100247265020, 18446744073709551615, 18446744073709551615, 134, 138, 134, 138, 27, 28, true, "node", "node"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 389609625538377862, 10024562876350301328, 18446744073709551615, 18446744073709551615, 149, 153, 149, 153, 30, 31, true, "GPUs", "GPUs"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 990358581043194791, 3870495549619153573, 18446744073709551615, 18446744073709551615, 249, 262, 249, 262, 48, 49, true, "microservices", "microservices"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 14635102416861801722, 13295514760162013366, 18446744073709551615, 18446744073709551615, 392, 400, 392, 400, 72, 73, true, "services", "services"], ["term", "single-term", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 4327709553742697698, 1943965779924640606, 18446744073709551615, 18446744073709551615, 408, 421, 408, 421, 75, 76, true, "orchestration", "orchestration"], ["verb", "compound-verb", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 16914551794403134749, 14289767005593609982, 18446744073709551615, 18446744073709551615, 154, 169, 154, 169, 31, 34, true, "is dedicated to", "is dedicated to"], ["verb", "compound-verb", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 556334503717216086, 14184460239514695626, 18446744073709551615, 18446744073709551615, 367, 387, 367, 387, 67, 71, true, "are employed to host", "are employed to host"], ["verb", "single-verb", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 14814150617868433693, 2828843033811804789, 18446744073709551615, 18446744073709551615, 27, 35, 27, 35, 5, 6, true, "operates", "operates"], ["verb", "single-verb", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541486535, 9351998670672415825, 18446744073709551615, 18446744073709551615, 281, 283, 281, 283, 52, 53, true, "is", "is"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541485678, 9351998466637895866, 18446744073709551615, 18446744073709551615, 36, 38, 36, 38, 6, 7, true, "on", "on"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 389609625618037948, 9971074391070552914, 18446744073709551615, 18446744073709551615, 58, 62, 58, 62, 10, 11, true, "with", "with"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541485670, 9351998465977112201, 18446744073709551615, 18446744073709551615, 84, 86, 84, 86, 17, 18, true, "of", "of"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 389609625618037948, 9971074391070570820, 18446744073709551615, 18446744073709551615, 139, 143, 139, 143, 28, 29, true, "with", "with"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541485670, 9351998465978041953, 18446744073709551615, 18446744073709551615, 246, 248, 246, 248, 47, 48, true, "of", "of"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 16381206565712212855, 15798433650653459254, 18446744073709551615, 18446744073709551615, 302, 308, 302, 308, 56, 58, true, "of the", "of the"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 16381206560518651853, 15995808466227689457, 18446744073709551615, 18446744073709551615, 401, 407, 401, 407, 73, 75, true, "in the", "in the"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 16381206519425733256, 10817242341263362701, 18446744073709551615, 18446744073709551615, 167, 173, 167, 173, 33, 35, true, "to the", "to the"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541485865, 9351998575526350776, 18446744073709551615, 18446744073709551615, 263, 265, 263, 265, 49, 50, true, "to", "to"], ["conn", "single-conn", 7845460979782401889, "TEXT", "#/texts/86", 1.0, 15441160910541485865, 9351998575526358427, 18446744073709551615, 18446744073709551615, 380, 382, 380, 382, 69, 70, true, "to", "to"], ["numval", "fval", 17769988780693768120, "TEXT", "#/texts/87", 1.0, 12178341415896306587, 11831950895164487341, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "4.3", "4.3"], ["numval", "ival", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 17767354399704235159, 6323623135901186785, 18446744073709551615, 18446744073709551615, 258, 259, 258, 259, 48, 49, true, "7", "7"], ["sentence", "", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 11703520391970010536, 357834892882144608, 18446744073709551615, 18446744073709551615, 0, 56, 0, 56, 0, 11, true, "Let us now discuss some scaling results on our platform.", "Let us now discuss some scaling results on our platform."], ["sentence", "", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8157900891627315247, 15072204804485786404, 18446744073709551615, 18446744073709551615, 57, 247, 57, 247, 11, 46, true, "As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources.", "As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources."], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 4421383392096991748, 9783447214836928971, 18446744073709551615, 18446744073709551615, 229, 246, 229, 246, 43, 45, true, "compute resources", "compute resources"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8106478445190161533, 16203321325185840639, 18446744073709551615, 18446744073709551615, 32, 39, 32, 39, 6, 7, true, "results", "results"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 14814125365076808131, 8660743237002823027, 18446744073709551615, 18446744073709551615, 47, 55, 47, 55, 9, 10, true, "platform", "platform"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 5948159060234732715, 1856499645219012237, 18446744073709551615, 18446744073709551615, 82, 91, 82, 91, 17, 18, true, "beginning", "beginning"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8106478708629288965, 9823809706360263062, 18446744073709551615, 18446744073709551615, 99, 106, 99, 106, 20, 21, true, "section", "section"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 13240311013633905449, 13295854927356281099, 18446744073709551615, 18446744073709551615, 112, 124, 112, 124, 23, 24, true, "requirements", "requirements"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 14814125365076808131, 8660743237002811080, 18446744073709551615, 18446744073709551615, 133, 141, 133, 141, 26, 27, true, "platform", "platform"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206521526353544, 17652704280141269029, 18446744073709551615, 18446744073709551615, 160, 166, 160, 166, 30, 31, true, "regard", "regard"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206574973295053, 10204329654469875979, 18446744073709551615, 18446744073709551615, 174, 180, 174, 180, 33, 34, true, "number", "number"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 329104159157820437, 7766615889727787026, 18446744073709551615, 18446744073709551615, 184, 189, 184, 189, 35, 36, true, "users", "users"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206574973295053, 10204329654469873734, 18446744073709551615, 18446744073709551615, 195, 201, 195, 201, 38, 39, true, "number", "number"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 6167933651658664291, 11552308682759832261, 18446744073709551615, 18446744073709551615, 215, 224, 215, 224, 41, 42, true, "documents", "documents"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206514091025767, 14670700706856427543, 18446744073709551615, 18446744073709551615, 251, 257, 251, 257, 47, 48, true, "Figure", "Figure"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206574973295053, 10204329654469868686, 18446744073709551615, 18446744073709551615, 273, 279, 273, 279, 53, 54, true, "number", "number"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 329104159157820437, 7766615889727784459, 18446744073709551615, 18446744073709551615, 283, 288, 283, 288, 55, 56, true, "users", "users"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206574973295053, 10204329654469868200, 18446744073709551615, 18446744073709551615, 297, 303, 297, 303, 58, 59, true, "number", "number"], ["term", "single-term", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 12178341415896289890, 7943963832689990500, 18446744073709551615, 18446744073709551615, 317, 320, 317, 320, 61, 62, true, "PDF", "PDF"], ["verb", "compound-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 17858839733535377008, 1920907252286073734, 18446744073709551615, 18446744073709551615, 142, 154, 142, 154, 27, 29, true, "were scaling", "were scaling"], ["verb", "single-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 12178341415896275389, 7943964340963228966, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "Let", "Let"], ["verb", "single-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8106397868479560363, 17872032239065412285, 18446744073709551615, 18446744073709551615, 11, 18, 11, 18, 3, 4, true, "discuss", "discuss"], ["verb", "single-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8106478648771436891, 8278854775081679845, 18446744073709551615, 18446744073709551615, 24, 31, 24, 31, 5, 6, true, "scaling", "scaling"], ["verb", "single-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8106476015433464060, 11919317671569973370, 18446744073709551615, 18446744073709551615, 63, 70, 63, 70, 13, 14, true, "pointed", "pointed"], ["verb", "single-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 6171728176299542016, 13908821060191020107, 18446744073709551615, 18446744073709551615, 205, 214, 205, 214, 40, 41, true, "processed", "processed"], ["verb", "single-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 389609625741152123, 16671401016536211852, 18446744073709551615, 18446744073709551615, 264, 268, 264, 268, 51, 52, true, "show", "show"], ["verb", "single-verb", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 6171728176299542016, 13908821060190998226, 18446744073709551615, 18446744073709551615, 307, 316, 307, 316, 60, 61, true, "processed", "processed"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 15441160910541485678, 1498640040591994871, 18446744073709551615, 18446744073709551615, 40, 42, 40, 42, 7, 8, true, "on", "on"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 15441160910541480533, 1498641809232943552, 18446744073709551615, 18446744073709551615, 57, 59, 57, 59, 11, 12, true, "As", "As"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206560518651853, 18249880271754047870, 18446744073709551615, 18446744073709551615, 75, 81, 75, 81, 15, 17, true, "in the", "in the"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206565712212855, 14962824842694991931, 18446744073709551615, 18446744073709551615, 92, 98, 92, 98, 18, 20, true, "of the", "of the"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 8106397727991264470, 14674040507008400644, 18446744073709551615, 18446744073709551615, 125, 132, 125, 132, 24, 26, true, "for the", "for the"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 389609625618037948, 16164147193980002015, 18446744073709551615, 18446744073709551615, 155, 159, 155, 159, 29, 30, true, "with", "with"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 15441160910541485670, 1498640040717785247, 18446744073709551615, 18446744073709551615, 181, 183, 181, 183, 34, 35, true, "of", "of"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 15441160910541485670, 1498640040717786992, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 39, 40, true, "of", "of"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 15441160910541480354, 1498641903762977573, 18446744073709551615, 18446744073709551615, 248, 250, 248, 250, 46, 47, true, "In", "In"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 15441160910541485670, 1498640040717800068, 18446744073709551615, 18446744073709551615, 280, 282, 280, 282, 54, 55, true, "of", "of"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 15441160910541485670, 1498640040717801334, 18446744073709551615, 18446744073709551615, 304, 306, 304, 306, 59, 60, true, "of", "of"], ["conn", "single-conn", 12387489643011067991, "TEXT", "#/texts/88", 1.0, 16381206519425733256, 17614093764484085203, 18446744073709551615, 18446744073709551615, 167, 173, 167, 173, 31, 33, true, "to the", "to the"], ["numval", "year", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625548777057, 52173736134253972, 18446744073709551615, 18446744073709551615, 172, 176, 172, 176, 35, 36, true, "2017", "2017"], ["numval", "ival", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541481786, 6866904732321432818, 18446744073709551615, 18446744073709551615, 6, 8, 6, 8, 1, 2, true, "20", "20"], ["sentence", "", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 7518545641936550538, 13327477621696107517, 18446744073709551615, 18446744073709551615, 32, 177, 32, 177, 8, 37, true, "As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017.", "As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017."], ["sentence", "", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 10434596786350098942, 4246700910361462765, 18446744073709551615, 18446744073709551615, 178, 363, 178, 363, 37, 71, true, "It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time.", "It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time."], ["sentence", "", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15858606414680046310, 9355067359629881245, 18446744073709551615, 18446744073709551615, 364, 504, 364, 504, 71, 99, true, "Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", "Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity."], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 2903324788977241891, 10047065559827135054, 18446744073709551615, 18446744073709551615, 82, 91, 82, 91, 19, 21, true, "PDF pages", "PDF pages"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 2245603532715892325, 16478559053695087323, 18446744073709551615, 18446744073709551615, 226, 237, 226, 237, 46, 48, true, "sharp steps", "sharp steps"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 11942859038914222878, 15085431028673446657, 18446744073709551615, 18446744073709551615, 286, 301, 286, 301, 56, 58, true, "massive amounts", "massive amounts"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 7252014402665196659, 15261261573536593307, 18446744073709551615, 18446744073709551615, 342, 354, 342, 354, 66, 68, true, "small amount", "small amount"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 2245697320636800498, 85672314451497322, 18446744073709551615, 18446744073709551615, 472, 483, 472, 483, 93, 95, true, "short burst", "short burst"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 16558536334265483368, 11847226154466128513, 18446744073709551615, 18446744073709551615, 487, 503, 487, 503, 96, 98, true, "extreme activity", "extreme activity"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 329104161667992688, 9871171554517434066, 18446744073709551615, 18446744073709551615, 0, 5, 0, 5, 0, 1, true, "pages", "pages"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 14637915316557309079, 12708841971050605893, 18446744073709551615, 18446744073709551615, 14, 22, 14, 22, 4, 5, true, "function", "function"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625631241985, 100194020203453107, 18446744073709551615, 18446744073709551615, 26, 30, 26, 30, 6, 7, true, "time", "time"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 16381206574973295053, 8846230013490521873, 18446744073709551615, 18446744073709551615, 52, 58, 52, 58, 14, 15, true, "number", "number"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 329104159157820437, 8249018447781337774, 18446744073709551615, 18446744073709551615, 62, 67, 62, 67, 16, 17, true, "users", "users"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625631241985, 100194020203438184, 18446744073709551615, 18446744073709551615, 126, 130, 126, 130, 26, 27, true, "time", "time"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 16381206590630165717, 11719327657280904751, 18446744073709551615, 18446744073709551615, 141, 147, 141, 147, 29, 30, true, "launch", "launch"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 8106478708506632112, 18231497537744338632, 18446744073709551615, 18446744073709551615, 155, 162, 155, 162, 32, 33, true, "service", "service"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 329104161963544245, 9857237958615296698, 18446744073709551615, 18446744073709551615, 166, 171, 166, 171, 34, 35, true, "April", "April"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 329104159157820437, 8249018447781333663, 18446744073709551615, 18446744073709551615, 260, 265, 260, 265, 52, 53, true, "users", "users"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 6167933651658664291, 12240764636372283946, 18446744073709551615, 18446744073709551615, 305, 314, 305, 314, 59, 60, true, "documents", "documents"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 8106478708506632112, 18231497537744787108, 18446744073709551615, 18446744073709551615, 324, 331, 324, 331, 62, 63, true, "service", "service"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625631241985, 100194020203425198, 18446744073709551615, 18446744073709551615, 358, 362, 358, 362, 69, 70, true, "time", "time"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 16381206568241679420, 9368345895491961575, 18446744073709551615, 18446744073709551615, 375, 381, 375, 381, 74, 75, true, "design", "design"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 8106476000253296785, 450489361038114021, 18446744073709551615, 18446744073709551615, 396, 403, 396, 403, 80, 81, true, "problem", "problem"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 329104161666914718, 9871052204414646047, 18446744073709551615, 18446744073709551615, 425, 430, 425, 430, 84, 85, true, "peaks", "peaks"], ["term", "single-term", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 8106478708506632112, 18231497537744780505, 18446744073709551615, 18446744073709551615, 439, 446, 439, 446, 87, 88, true, "service", "service"], ["verb", "compound-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 11953671505157202285, 11937015333801488817, 18446744073709551615, 18446744073709551615, 92, 120, 92, 120, 21, 25, true, "has been increasing steadily", "has been increasing steadily"], ["verb", "compound-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15603889104119874938, 7803556645500016268, 18446744073709551615, 18446744073709551615, 181, 191, 181, 191, 38, 40, true, "is however", "is however"], ["verb", "compound-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 7806959182595507225, 2570245507595885020, 18446744073709551615, 18446744073709551615, 266, 285, 266, 285, 53, 56, true, "have been uploading", "have been uploading"], ["verb", "compound-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 8106477873809970266, 16668743459306840426, 18446744073709551615, 18446744073709551615, 386, 393, 386, 393, 77, 79, true, "was not", "was not"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 8106397812083771063, 12986611233641368860, 18446744073709551615, 18446744073709551615, 39, 46, 39, 46, 10, 12, true, "can see", "can see"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 6171728176299542016, 11463438981088562167, 18446744073709551615, 18446744073709551615, 72, 81, 72, 81, 18, 19, true, "processed", "processed"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 12178341415895638617, 17393490097429873872, 18446744073709551615, 18446744073709551615, 207, 210, 207, 210, 42, 43, true, "see", "see"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 12178341415895564896, 17393508726112862574, 18446744073709551615, 18446744073709551615, 222, 225, 222, 225, 45, 46, true, "are", "are"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 5581574448026047221, 15445029894002382055, 18446744073709551615, 18446744073709551615, 239, 249, 239, 249, 49, 50, true, "indicating", "indicating"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 6807190128157759045, 13053632147510739476, 18446744073709551615, 18446744073709551615, 407, 418, 407, 418, 82, 83, true, "accommodate", "accommodate"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 12178341415895525606, 17393661643866953573, 18446744073709551615, 18446744073709551615, 447, 450, 447, 450, 88, 89, true, "was", "was"], ["verb", "single-verb", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 16381206485955868973, 14033094471219798649, 18446744073709551615, 18446744073709551615, 459, 465, 459, 465, 91, 92, true, "handle", "handle"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625700764258, 94329536986859678, 18446744073709551615, 18446744073709551615, 9, 13, 9, 13, 2, 4, true, "as a", "as a"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485670, 6866903700093394497, 18446744073709551615, 18446744073709551615, 23, 25, 23, 25, 5, 6, true, "of", "of"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541480533, 6866903594362655679, 18446744073709551615, 18446744073709551615, 32, 34, 32, 34, 8, 9, true, "As", "As"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485670, 6866903700093396448, 18446744073709551615, 18446744073709551615, 59, 61, 59, 61, 15, 16, true, "of", "of"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625618865305, 100185561388315538, 18446744073709551615, 18446744073709551615, 121, 125, 121, 125, 25, 26, true, "over", "over"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 6168057894310307081, 494748048694411645, 18446744073709551615, 18446744073709551615, 131, 140, 131, 140, 27, 29, true, "since the", "since the"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485670, 6866903700093386593, 18446744073709551615, 18446744073709551615, 148, 150, 148, 150, 30, 31, true, "of", "of"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541486538, 6866903364400136605, 18446744073709551615, 18446744073709551615, 163, 165, 163, 165, 33, 34, true, "in", "in"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625631229034, 100122430387311494, 18446744073709551615, 18446744073709551615, 211, 215, 211, 215, 43, 44, true, "that", "that"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 3504047303126433547, 11549299435570708613, 18446744073709551615, 18446744073709551615, 250, 259, 250, 259, 50, 52, true, "that some", "that some"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485670, 6866903700093379447, 18446744073709551615, 18446744073709551615, 302, 304, 302, 304, 58, 59, true, "of", "of"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 14637953883063114384, 1261358077151630278, 18446744073709551615, 18446744073709551615, 315, 323, 315, 323, 60, 62, true, "into the", "into the"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 389609625698530964, 94041907290639477, 18446744073709551615, 18446744073709551615, 332, 336, 332, 336, 63, 65, true, "in a", "in a"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485670, 6866903700093367010, 18446744073709551615, 18446744073709551615, 355, 357, 355, 357, 68, 69, true, "of", "of"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485670, 6866903700093361485, 18446744073709551615, 18446744073709551615, 484, 486, 484, 486, 95, 96, true, "of", "of"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485865, 6866903731987646441, 18446744073709551615, 18446744073709551615, 204, 206, 204, 206, 41, 42, true, "to", "to"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485865, 6866903731988159776, 18446744073709551615, 18446744073709551615, 368, 370, 368, 370, 72, 73, true, "to", "to"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485865, 6866903731988157479, 18446744073709551615, 18446744073709551615, 404, 406, 404, 406, 81, 82, true, "to", "to"], ["conn", "single-conn", 10375772475809458895, "TEXT", "#/texts/89", 1.0, 15441160910541485865, 6866903731988157354, 18446744073709551615, 18446744073709551615, 456, 458, 456, 458, 90, 91, true, "to", "to"], ["numval", "ival", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 17767354399704235152, 10891777227864623310, 18446744073709551615, 18446744073709551615, 10, 11, 10, 11, 2, 3, true, "8", "8"], ["parenthesis", "round brackets", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 4022242346074010063, 12541000686584287248, 18446744073709551615, 18446744073709551615, 74, 178, 74, 178, 14, 33, true, "(i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON)", "(i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON)"], ["expression", "common", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541486545, 15841608933708088140, 18446744073709551615, 18446744073709551615, 75, 79, 75, 79, 15, 16, true, "ie", "i.e."], ["expression", "word-concatenation", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 2217258678859216685, 3493505507787421146, 18446744073709551615, 18446744073709551615, 621, 639, 621, 639, 119, 120, true, "better-than-linear", "better-than-linear"], ["expression", "word-concatenation", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 6285955549867796622, 17538568638231419383, 18446744073709551615, 18446744073709551615, 1121, 1137, 1121, 1137, 209, 210, true, "time-to-solution", "time-to-solution"], ["expression", "word-concatenation", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14639522327238241124, 8193922819820873277, 18446744073709551615, 18446744073709551615, 1155, 1163, 1155, 1163, 213, 214, true, "job-size", "job-size"], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 17950606815080185664, 182687704084943809, 18446744073709551615, 18446744073709551615, 0, 228, 0, 228, 0, 42, true, "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources.", "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 9291869836472436551, 7073966199782583842, 18446744073709551615, 18446744073709551615, 229, 320, 229, 320, 42, 58, true, "We show this scaling by displaying the speedup versus the number of worker nodes available.", "We show this scaling by displaying the speedup versus the number of worker nodes available."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 11942797776008272897, 18354315767267706544, 18446744073709551615, 18446744073709551615, 321, 448, 321, 448, 58, 83, true, "Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores.", "Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 1337110641996971981, 12497994289374110365, 18446744073709551615, 18446744073709551615, 449, 580, 449, 580, 83, 111, true, "As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes.", "As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 7140787443244237501, 14548003650603154277, 18446744073709551615, 18446744073709551615, 581, 724, 581, 724, 111, 135, true, "Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker.", "Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 7774794569544328631, 12157835320367080769, 18446744073709551615, 18446744073709551615, 725, 876, 725, 876, 135, 166, true, "The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level.", "The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 11827058603931473819, 1210577326445407272, 18446744073709551615, 18446744073709551615, 877, 1042, 877, 1042, 166, 194, true, "The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes.", "The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes."], ["sentence", "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8702943219455942098, 9096969319153565329, 18446744073709551615, 18446744073709551615, 1043, 1164, 1043, 1164, 194, 215, true, "Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size.", "Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size."], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8172710598775048780, 6087292431822475163, 18446744073709551615, 18446744073709551615, 46, 73, 46, 73, 11, 14, true, "main pipeline microservices", "main pipeline microservices"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 12653831733608918357, 5399694712153694222, 18446744073709551615, 18446744073709551615, 95, 108, 95, 108, 19, 21, true, "PDF documents", "PDF documents"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 12400507963759742880, 6135519514760056473, 18446744073709551615, 18446744073709551615, 297, 309, 297, 309, 54, 56, true, "worker nodes", "worker nodes"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 4940765489471971613, 347303216115352656, 18446744073709551615, 18446744073709551615, 370, 391, 370, 391, 68, 70, true, "pipeline microservice", "pipeline microservice"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 10318072901532559633, 7254172824621403054, 18446744073709551615, 18446744073709551615, 507, 519, 507, 519, 96, 98, true, "tasks scales", "tasks scales"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 18001738063114990140, 7718080442102537061, 18446744073709551615, 18446744073709551615, 621, 647, 621, 647, 119, 121, true, "better-than-linear speedup", "better-than-linear speedup"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 3088520230983972493, 6524782884039209835, 18446744073709551615, 18446744073709551615, 670, 691, 670, 691, 126, 128, true, "bandwidth constraints", "bandwidth constraints"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14290393742330326868, 1869283060159003292, 18446744073709551615, 18446744073709551615, 744, 758, 744, 758, 139, 141, true, "assemble tasks", "assemble tasks"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 13968810274884964698, 7333175022141755015, 18446744073709551615, 18446744073709551615, 865, 875, 865, 875, 163, 165, true, "page level", "page level"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 18404777356709557822, 5867368598465364348, 18446744073709551615, 18446744073709551615, 938, 952, 938, 952, 177, 179, true, "load imbalance", "load imbalance"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 12400507963759742880, 6135519514760170296, 18446744073709551615, 18446744073709551615, 965, 977, 965, 977, 181, 183, true, "worker nodes", "worker nodes"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 12569603855738370264, 1147410557148444790, 18446744073709551615, 18446744073709551615, 1023, 1041, 1023, 1041, 190, 193, true, "large corpus sizes", "large corpus sizes"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 4421383392096991748, 17586453718413772848, 18446744073709551615, 18446744073709551615, 1082, 1099, 1082, 1099, 202, 204, true, "compute resources", "compute resources"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206514091025767, 11298218412956847237, 18446744073709551615, 18446744073709551615, 3, 9, 3, 9, 1, 2, true, "Figure", "Figure"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478648771436891, 15412781195243883400, 18446744073709551615, 18446744073709551615, 25, 32, 25, 32, 7, 8, true, "scaling", "scaling"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106479143794098783, 12796848297776230218, 18446744073709551615, 18446744073709551615, 84, 91, 84, 91, 17, 18, true, "parsing", "parsing"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106464587473865376, 14658563877589949653, 18446744073709551615, 18446744073709551615, 119, 126, 119, 126, 23, 24, true, "machine", "machine"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206567230470443, 6941201434190273501, 18446744073709551615, 18446744073709551615, 135, 141, 135, 141, 25, 26, true, "models", "models"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 2703018679320364082, 5037546725350435645, 18446744073709551615, 18446744073709551615, 146, 156, 146, 156, 27, 28, true, "conversion", "conversion"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 6167933651658664291, 11495196011120521523, 18446744073709551615, 18446744073709551615, 160, 169, 160, 169, 29, 30, true, "documents", "documents"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625541450799, 11852205465525539051, 18446744073709551615, 18446744073709551615, 173, 177, 173, 177, 31, 32, true, "JSON", "JSON"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14814125365076808131, 1145249750150199435, 18446744073709551615, 18446744073709551615, 186, 194, 186, 194, 35, 36, true, "platform", "platform"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206521526353544, 6483551066150257775, 18446744073709551615, 18446744073709551615, 200, 206, 200, 206, 37, 38, true, "regard", "regard"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 6168338487309432467, 8730004420584075578, 18446744073709551615, 18446744073709551615, 218, 227, 218, 227, 40, 41, true, "resources", "resources"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478648771436891, 15412781195243911626, 18446744073709551615, 18446744073709551615, 242, 249, 242, 249, 45, 46, true, "scaling", "scaling"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478695960615463, 15062829371033729664, 18446744073709551615, 18446744073709551615, 268, 275, 268, 275, 49, 50, true, "speedup", "speedup"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206574973295053, 7759102089231672623, 18446744073709551615, 18446744073709551615, 287, 293, 287, 293, 52, 53, true, "number", "number"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478059506484182, 992201111003243269, 18446744073709551615, 18446744073709551615, 349, 356, 349, 356, 65, 66, true, "workers", "workers"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206557159905849, 4500784834839577245, 18446744073709551615, 18446744073709551615, 404, 410, 404, 410, 73, 74, true, "worker", "worker"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625621164460, 11851913345703152408, 18446744073709551615, 18446744073709551615, 427, 431, 427, 431, 78, 79, true, "node", "node"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 329104161555640697, 12658575526930668613, 18446744073709551615, 18446744073709551615, 442, 447, 442, 447, 81, 82, true, "cores", "cores"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478695960615463, 15062829371036718218, 18446744073709551615, 18446744073709551615, 473, 480, 473, 480, 89, 90, true, "speedup", "speedup"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 329104161667983915, 11953058923899695299, 18446744073709551615, 18446744073709551615, 488, 493, 488, 493, 92, 93, true, "parse", "parse"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541480579, 15841608372110957817, 18446744073709551615, 18446744073709551615, 498, 500, 498, 500, 94, 95, true, "ML", "ML"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206574973295053, 7759102089231787311, 18446744073709551615, 18446744073709551615, 542, 548, 542, 548, 102, 103, true, "number", "number"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478059506484182, 992201111003203952, 18446744073709551615, 18446744073709551615, 552, 559, 552, 559, 104, 105, true, "workers", "workers"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 329104161758737773, 12673240055158662957, 18446744073709551615, 18446744073709551615, 574, 579, 574, 579, 109, 110, true, "nodes", "nodes"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14652257141644167489, 8090587240840002892, 18446744073709551615, 18446744073709551615, 699, 707, 699, 707, 130, 131, true, "baseline", "baseline"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206557159905849, 4500784834839606045, 18446744073709551615, 18446744073709551615, 717, 723, 717, 723, 133, 134, true, "worker", "worker"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478695960615463, 15062829371033753418, 18446744073709551615, 18446744073709551615, 729, 736, 729, 736, 136, 137, true, "speedup", "speedup"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 2703018939289543887, 15004756027981769615, 18446744073709551615, 18446744073709551615, 763, 773, 763, 773, 143, 144, true, "comparison", "comparison"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625631210899, 11837631279683138832, 18446744073709551615, 18446744073709551615, 804, 808, 804, 808, 151, 152, true, "task", "task"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14650401089286948001, 16245137145237128880, 18446744073709551615, 18446744073709551615, 841, 849, 841, 849, 158, 159, true, "document", "document"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 11600564911974996302, 14480046541473745390, 18446744073709551615, 18446744073709551615, 881, 892, 881, 892, 167, 168, true, "variability", "variability"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206590668214829, 418453764520998193, 18446744073709551615, 18446744073709551615, 900, 906, 900, 906, 170, 171, true, "length", "length"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 6167933651658664291, 11495196011120532495, 18446744073709551615, 18446744073709551615, 910, 919, 910, 919, 172, 173, true, "documents", "documents"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 329104161571401725, 12673278341361969037, 18446744073709551615, 18446744073709551615, 1103, 1108, 1103, 1108, 205, 206, true, "order", "order"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 6285955549867796622, 17538568638231419383, 18446744073709551615, 18446744073709551615, 1121, 1137, 1121, 1137, 209, 210, true, "time-to-solution", "time-to-solution"], ["term", "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14639522327238241124, 8193922819820873277, 18446744073709551615, 18446744073709551615, 1155, 1163, 1155, 1163, 213, 214, true, "job-size", "job-size"], ["verb", "compound-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16437991637119672281, 10698691307559986577, 18446744073709551615, 18446744073709551615, 330, 343, 330, 343, 61, 64, true, "chose to have", "chose to have"], ["verb", "compound-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15603860935510693939, 14727933870906010759, 18446744073709551615, 18446744073709551615, 411, 421, 411, 421, 74, 76, true, "is running", "is running"], ["verb", "compound-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 4859670139939149227, 6280919696978205559, 18446744073709551615, 18446744073709551615, 818, 833, 818, 833, 154, 156, true, "be parallelised", "be parallelised"], ["verb", "compound-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 2871347585595950403, 17619376144327821929, 18446744073709551615, 18446744073709551615, 920, 932, 920, 932, 173, 175, true, "is reflected", "is reflected"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625741152123, 11952417481958675502, 18446744073709551615, 18446744073709551615, 16, 20, 16, 20, 5, 6, true, "show", "show"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541486545, 15841608933708088140, 18446744073709551615, 18446744073709551615, 75, 79, 75, 79, 15, 16, true, "ie", "i.e."], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14650448030444381648, 6220015674503799158, 18446744073709551615, 18446744073709551615, 110, 118, 110, 118, 22, 23, true, "applying", "applying"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106342444693204894, 3141726382109473264, 18446744073709551615, 18446744073709551615, 127, 134, 127, 134, 24, 25, true, "learned", "learned"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106398484825895017, 9843385479250406664, 18446744073709551615, 18446744073709551615, 210, 217, 210, 217, 39, 40, true, "compute", "compute"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625741152123, 11952417481958832640, 18446744073709551615, 18446744073709551615, 232, 236, 232, 236, 43, 44, true, "show", "show"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 5314879136556773391, 10872235211663904849, 18446744073709551615, 18446744073709551615, 253, 263, 253, 263, 47, 48, true, "displaying", "displaying"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206519567123880, 11458061957152441695, 18446744073709551615, 18446744073709551615, 276, 282, 276, 282, 50, 51, true, "versus", "versus"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106478708506631920, 15215214992224138903, 18446744073709551615, 18446744073709551615, 357, 364, 357, 364, 66, 67, true, "serving", "serving"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14892726175400695403, 11929108714311456052, 18446744073709551615, 18446744073709551615, 456, 467, 456, 467, 85, 87, true, "can observe", "can observe"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 329104159174415764, 15201692130147370947, 18446744073709551615, 18446744073709551615, 501, 506, 501, 506, 95, 96, true, "apply", "apply"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106342033696543838, 12894177882275964000, 18446744073709551615, 18446744073709551615, 602, 609, 602, 609, 116, 117, true, "observe", "observe"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106397800846024988, 10875439300264671294, 18446744073709551615, 18446744073709551615, 655, 662, 655, 662, 123, 124, true, "appears", "appears"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14637929372960545624, 5914110341326941133, 18446744073709551615, 18446744073709551615, 775, 783, 775, 783, 145, 146, true, "flattens", "flattens"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14650440089690345452, 13442210495055254219, 18446744073709551615, 18446744073709551615, 992, 1000, 992, 1000, 186, 187, true, "averages", "averages"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 12178341415895564896, 12750707122609445157, 18446744073709551615, 18446744073709551615, 1060, 1063, 1060, 1063, 197, 198, true, "are", "are"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 329104161785194305, 12659210101863210938, 18446744073709551615, 18446744073709551615, 1072, 1077, 1072, 1077, 200, 201, true, "scale", "scale"], ["verb", "single-verb", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625632420840, 11837667465904018998, 18446744073709551615, 18446744073709551615, 1112, 1116, 1112, 1116, 207, 208, true, "keep", "keep"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 4606782280409462864, 1759956167227045589, 18446744073709551615, 18446744073709551615, 1138, 1150, 1138, 1150, 210, 212, true, "constant for", "constant for"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541480354, 15841607655799874221, 18446744073709551615, 18446744073709551615, 0, 2, 0, 2, 0, 1, true, "In", "In"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206565712212855, 6967163224569431769, 18446744073709551615, 18446744073709551615, 33, 39, 33, 39, 8, 10, true, "of the", "of the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485670, 15841608916207186363, 18446744073709551615, 18446744073709551615, 92, 94, 92, 94, 18, 19, true, "of", "of"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485670, 15841608916207173993, 18446744073709551615, 18446744073709551615, 157, 159, 157, 159, 28, 29, true, "of", "of"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206566339127348, 6948695024569863629, 18446744073709551615, 18446744073709551615, 179, 185, 179, 185, 33, 35, true, "on the", "on the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625618037948, 11853381785047006947, 18446744073709551615, 18446744073709551615, 195, 199, 195, 199, 36, 37, true, "with", "with"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541486989, 15841608542350473314, 18446744073709551615, 18446744073709551615, 250, 252, 250, 252, 46, 47, true, "by", "by"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485670, 15841608916207199049, 18446744073709551615, 18446744073709551615, 294, 296, 294, 296, 53, 54, true, "of", "of"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 14091433066300748251, 10813497490882117614, 18446744073709551615, 18446744073709551615, 393, 403, 393, 403, 71, 73, true, "since each", "since each"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625618762887, 11853298334064133313, 18446744073709551615, 18446744073709551615, 422, 426, 422, 426, 76, 78, true, "on a", "on a"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625618037948, 11853381785046890442, 18446744073709551615, 18446744073709551615, 432, 436, 432, 436, 79, 80, true, "with", "with"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541480533, 15841607645344043171, 18446744073709551615, 18446744073709551615, 449, 451, 449, 451, 83, 84, true, "As", "As"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206560518651853, 299391170561034072, 18446744073709551615, 18446744073709551615, 481, 487, 481, 487, 90, 92, true, "in the", "in the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 5535791613041986682, 1987456193213323417, 18446744073709551615, 18446744073709551615, 529, 541, 529, 541, 99, 102, true, "with the the", "with the the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485670, 15841608916207808715, 18446744073709551615, 18446744073709551615, 549, 551, 549, 551, 103, 104, true, "of", "of"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206566339127348, 6948695024569829645, 18446744073709551615, 18446744073709551615, 692, 698, 692, 698, 128, 130, true, "on the", "on the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625618037948, 11853381785046972707, 18446744073709551615, 18446744073709551615, 708, 712, 708, 712, 131, 132, true, "with", "with"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206566339127348, 6948695024569840929, 18446744073709551615, 18446744073709551615, 737, 743, 737, 743, 137, 139, true, "on the", "on the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541486538, 15841608934679761481, 18446744073709551615, 18446744073709551615, 760, 762, 760, 762, 142, 143, true, "in", "in"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 8106397797884119903, 2253942102629974295, 18446744073709551615, 18446744073709551615, 796, 803, 796, 803, 149, 151, true, "as this", "as this"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206566339127348, 6948695024569822665, 18446744073709551615, 18446744073709551615, 834, 840, 834, 840, 156, 158, true, "on the", "on the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206566339127348, 6948695024569833459, 18446744073709551615, 18446744073709551615, 858, 864, 858, 864, 161, 163, true, "on the", "on the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 16381206560518651853, 299391170561066249, 18446744073709551615, 18446744073709551615, 893, 899, 893, 899, 168, 170, true, "in the", "in the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485670, 15841608916207884744, 18446744073709551615, 18446744073709551615, 907, 909, 907, 909, 171, 172, true, "of", "of"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625698530964, 11943889382951508475, 18446744073709551615, 18446744073709551615, 933, 937, 933, 937, 175, 177, true, "in a", "in a"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 2011002864325523456, 9569710153707003014, 18446744073709551615, 18446744073709551615, 953, 964, 953, 964, 179, 181, true, "between the", "between the"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 389609625618037948, 11853381785046974620, 18446744073709551615, 18446744073709551615, 1005, 1009, 1005, 1009, 188, 189, true, "with", "with"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541486538, 15841608934679158336, 18446744073709551615, 18446744073709551615, 1100, 1102, 1100, 1102, 204, 205, true, "in", "in"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485865, 15841608914925665456, 18446744073709551615, 18446744073709551615, 170, 172, 170, 172, 30, 31, true, "to", "to"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485865, 15841608914925705101, 18446744073709551615, 18446744073709551615, 207, 209, 207, 209, 38, 39, true, "to", "to"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485865, 15841608914925660269, 18446744073709551615, 18446744073709551615, 336, 338, 336, 338, 62, 63, true, "to", "to"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485865, 15841608914925567374, 18446744073709551615, 18446744073709551615, 667, 669, 667, 669, 125, 126, true, "to", "to"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485865, 15841608914925573526, 18446744073709551615, 18446744073709551615, 1069, 1071, 1069, 1071, 199, 200, true, "to", "to"], ["conn", "single-conn", 7054726458191881751, "TEXT", "#/texts/90", 1.0, 15441160910541485865, 15841608914925579244, 18446744073709551615, 18446744073709551615, 1109, 1111, 1109, 1111, 206, 207, true, "to", "to"], ["numval", "ival", 7794115281016062068, "TEXT", "#/texts/91", 1.0, 17767354399704235157, 9706977069123592745, 18446744073709551615, 18446744073709551615, 0, 1, 0, 1, 0, 1, true, "5", "5"], ["sentence", "", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 657005981473069779, 855544429870992132, 18446744073709551615, 18446744073709551615, 0, 276, 0, 276, 0, 48, true, "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation.", "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation."], ["term", "enum-term-mark-2", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 15515663500877316360, 9779616463577804921, 18446744073709551615, 18446744073709551615, 70, 88, 70, 88, 14, 17, true, "parse and annotate", "parse and annotate"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 3764444893564113560, 6998103868267134559, 18446744073709551615, 18446744073709551615, 80, 98, 80, 98, 16, 18, true, "annotate documents", "annotate documents"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 10640406915501670366, 17510700195140757278, 18446744073709551615, 18446744073709551615, 204, 222, 204, 222, 37, 39, true, "ingested documents", "ingested documents"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 7992990666316029472, 588680236579575873, 18446744073709551615, 18446744073709551615, 245, 275, 245, 275, 44, 47, true, "structured data representation", "structured data representation"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 14814125365076808131, 17380690645567101284, 18446744073709551615, 18446744073709551615, 42, 50, 42, 50, 8, 9, true, "platform", "platform"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 329104161667983915, 6897824401029810817, 18446744073709551615, 18446744073709551615, 70, 75, 70, 75, 14, 15, true, "parse", "parse"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 329104159241569908, 5347589032455145571, 18446744073709551615, 18446744073709551615, 118, 123, 118, 123, 22, 23, true, "train", "train"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 8106464587473865376, 2835791627105026255, 18446744073709551615, 18446744073709551615, 141, 148, 141, 148, 26, 27, true, "machine", "machine"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 16381206567230470443, 15747559098284370939, 18446744073709551615, 18446744073709551615, 158, 164, 158, 164, 28, 29, true, "models", "models"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 329104161571401725, 6886205595361632203, 18446744073709551615, 18446744073709551615, 168, 173, 168, 173, 30, 31, true, "order", "order"], ["term", "single-term", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 8106398484416916345, 12761515448611326706, 18446744073709551615, 18446744073709551615, 189, 196, 189, 196, 34, 35, true, "content", "content"], ["verb", "compound-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 13481804153867000640, 5921155616940636913, 18446744073709551615, 18446744073709551615, 3, 17, 3, 17, 1, 3, true, "have presented", "have presented"], ["verb", "single-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 329104159219515955, 5354365758875797437, 18446744073709551615, 18446744073709551615, 36, 41, 36, 41, 7, 8, true, "based", "based"], ["verb", "single-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 2873440693780286732, 4285909795825994377, 18446744073709551615, 18446744073709551615, 58, 68, 58, 68, 11, 13, true, "can ingest", "can ingest"], ["verb", "single-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 14650447832610756948, 58344428677043651, 18446744073709551615, 18446744073709551615, 132, 140, 132, 140, 25, 26, true, "advanced", "advanced"], ["verb", "single-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 14639581097006750428, 9301901239831102358, 18446744073709551615, 18446744073709551615, 149, 157, 149, 157, 27, 28, true, "learning", "learning"], ["verb", "single-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 8106397496930289884, 12659480351226973306, 18446744073709551615, 18446744073709551615, 177, 184, 177, 184, 32, 33, true, "extract", "extract"], ["verb", "single-verb", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 8106398484416229602, 12761525411618135232, 18446744073709551615, 18446744073709551615, 227, 234, 227, 234, 40, 41, true, "convert", "convert"], ["conn", "single-conn", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 15441160910541486538, 5009177319356301500, 18446744073709551615, 18446744073709551615, 165, 167, 165, 167, 29, 30, true, "in", "in"], ["conn", "single-conn", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 16381206565712212855, 15774434041302426793, 18446744073709551615, 18446744073709551615, 197, 203, 197, 203, 35, 37, true, "of the", "of the"], ["conn", "single-conn", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 16381206560517276114, 4138873729787782213, 18446744073709551615, 18446744073709551615, 238, 244, 238, 244, 42, 44, true, "into a", "into a"], ["conn", "single-conn", 7038163015905900647, "TEXT", "#/texts/92", 1.0, 15441160910541485865, 5009177098714228529, 18446744073709551615, 18446744073709551615, 174, 176, 174, 176, 31, 32, true, "to", "to"], ["sentence", "", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 5399734795549420383, 9459426372965111375, 18446744073709551615, 18446744073709551615, 0, 102, 0, 102, 0, 17, true, "The fundamental design choices in our solution have proven to enable scaling in three elementary ways.", "The fundamental design choices in our solution have proven to enable scaling in three elementary ways."], ["sentence", "", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16807999257243449869, 13297903776875612574, 18446744073709551615, 18446744073709551615, 103, 153, 103, 153, 17, 26, true, "First, it can service multiple users concurrently.", "First, it can service multiple users concurrently."], ["sentence", "", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 13213872932381400279, 13188215006770693544, 18446744073709551615, 18446744073709551615, 154, 251, 154, 251, 26, 46, true, "Second, it can ingest, parse and apply machine learned models on many documents at the same time.", "Second, it can ingest, parse and apply machine learned models on many documents at the same time."], ["sentence", "", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 9242556889455087990, 15971994141625715858, 18446744073709551615, 18446744073709551615, 252, 468, 252, 468, 46, 85, true, "Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources.", "Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources."], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 2281965028547407404, 4585290171111099204, 18446744073709551615, 18446744073709551615, 4, 30, 4, 30, 1, 4, true, "fundamental design choices", "fundamental design choices"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 10458141827175777973, 662304186431118688, 18446744073709551615, 18446744073709551615, 86, 101, 86, 101, 14, 16, true, "elementary ways", "elementary ways"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 17200993861033027072, 10058402512815484380, 18446744073709551615, 18446744073709551615, 125, 139, 125, 139, 22, 24, true, "multiple users", "multiple users"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 12462088721494412558, 14795705853314288990, 18446744073709551615, 18446744073709551615, 219, 233, 219, 233, 39, 41, true, "many documents", "many documents"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 6168880476795325400, 1749109350869750559, 18446744073709551615, 18446744073709551615, 241, 250, 241, 250, 43, 45, true, "same time", "same time"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 4421383392096991748, 13841033715090277122, 18446744073709551615, 18446744073709551615, 276, 293, 276, 293, 52, 54, true, "compute resources", "compute resources"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 13127417780425802861, 18242234552519957632, 18446744073709551615, 18446744073709551615, 298, 313, 298, 313, 55, 57, true, "different tasks", "different tasks"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 871079831703051200, 5077405699917193499, 18446744073709551615, 18446744073709551615, 349, 364, 349, 364, 63, 65, true, "respective load", "respective load"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 10442897704134762600, 3285893567564519587, 18446744073709551615, 18446744073709551615, 451, 467, 451, 467, 82, 84, true, "enough resources", "enough resources"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 14635106751859230946, 764401693493390910, 18446744073709551615, 18446744073709551615, 38, 46, 38, 46, 6, 7, true, "solution", "solution"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 329104161667983915, 8353591573424153686, 18446744073709551615, 18446744073709551615, 177, 182, 177, 182, 32, 33, true, "parse", "parse"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 8106464587473865376, 16865370909529075844, 18446744073709551615, 18446744073709551615, 193, 200, 193, 200, 35, 36, true, "machine", "machine"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16381206567230470443, 6296565769111805720, 18446744073709551615, 18446744073709551615, 209, 215, 209, 215, 37, 38, true, "models", "models"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 329104161844229707, 8223163135175074012, 18446744073709551615, 18446744073709551615, 252, 257, 252, 257, 46, 47, true, "Third", "Third"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 14814125365076808131, 2596919094696196606, 18446744073709551615, 18446744073709551615, 321, 329, 321, 329, 59, 60, true, "platform", "platform"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 2703018679320364082, 16899905581150215026, 18446744073709551615, 18446744073709551615, 372, 382, 372, 382, 67, 68, true, "conversion", "conversion"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 6167933651658664291, 2405213947196016063, 18446744073709551615, 18446744073709551615, 386, 395, 386, 395, 69, 70, true, "documents", "documents"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 14814125365076808131, 2596919094696188986, 18446744073709551615, 18446744073709551615, 403, 411, 403, 411, 72, 73, true, "platform", "platform"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 329104159219994925, 8265223761504278760, 18446744073709551615, 18446744073709551615, 422, 427, 422, 427, 76, 77, true, "times", "times"], ["term", "single-term", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 389609625631241985, 2218225659402359325, 18446744073709551615, 18446744073709551615, 439, 443, 439, 443, 79, 80, true, "time", "time"], ["verb", "compound-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 3403952970044578622, 10903875917460680118, 18446744073709551615, 18446744073709551615, 47, 76, 47, 76, 7, 12, true, "have proven to enable scaling", "have proven to enable scaling"], ["verb", "compound-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15412069422981600492, 6547325180036345245, 18446744073709551615, 18446744073709551615, 330, 342, 330, 342, 60, 62, true, "according to", "according to"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 14892726286148891751, 10384184194505177525, 18446744073709551615, 18446744073709551615, 113, 124, 113, 124, 20, 22, true, "can service", "can service"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 2873440693780286732, 15985974084754193151, 18446744073709551615, 18446744073709551615, 165, 175, 165, 175, 29, 31, true, "can ingest", "can ingest"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 329104159174415764, 8268242647359376883, 18446744073709551615, 18446744073709551615, 187, 192, 187, 192, 34, 35, true, "apply", "apply"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 8106342444693204894, 6388168354172051323, 18446744073709551615, 18446744073709551615, 201, 208, 201, 208, 36, 37, true, "learned", "learned"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 5949049089925459445, 6157765644161528738, 18446744073709551615, 18446744073709551615, 262, 271, 262, 271, 49, 51, true, "can scale", "can scale"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541486535, 12447978246358110993, 18446744073709551615, 18446744073709551615, 412, 414, 412, 414, 73, 74, true, "is", "is"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 8106396909840561507, 15824344309645083727, 18446744073709551615, 18446744073709551615, 428, 435, 428, 435, 77, 78, true, "bounded", "bounded"], ["verb", "single-verb", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 329104159209890620, 8264779023608497036, 18446744073709551615, 18446744073709551615, 445, 450, 445, 450, 81, 82, true, "given", "given"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541486538, 12447978247236708799, 18446744073709551615, 18446744073709551615, 31, 33, 31, 33, 4, 5, true, "in", "in"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541486538, 12447978247236781944, 18446744073709551615, 18446744073709551615, 77, 79, 77, 79, 12, 13, true, "in", "in"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541485678, 12447978235992890146, 18446744073709551615, 18446744073709551615, 216, 218, 216, 218, 38, 39, true, "on", "on"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16381206568372064271, 7263818147332248111, 18446744073709551615, 18446744073709551615, 234, 240, 234, 240, 41, 43, true, "at the", "at the"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 12178341415895625940, 10930510655083395949, 18446744073709551615, 18446744073709551615, 294, 297, 294, 297, 54, 55, true, "for", "for"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16381206566339127348, 6281427824769892480, 18446744073709551615, 18446744073709551615, 314, 320, 314, 320, 57, 59, true, "on the", "on the"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16381206579218901666, 7932367388675800903, 18446744073709551615, 18446744073709551615, 365, 371, 365, 371, 65, 67, true, "so the", "so the"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541485670, 12447978245548248810, 18446744073709551615, 18446744073709551615, 383, 385, 383, 385, 68, 69, true, "of", "of"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16381206566339127348, 6281427824769909768, 18446744073709551615, 18446744073709551615, 396, 402, 396, 402, 70, 72, true, "on the", "on the"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 16381206568372178543, 7263503225869480888, 18446744073709551615, 18446744073709551615, 415, 421, 415, 421, 74, 76, true, "at all", "at all"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541486538, 12447978247236683376, 18446744073709551615, 18446744073709551615, 436, 438, 436, 438, 78, 79, true, "in", "in"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541485865, 12447978233189958175, 18446744073709551615, 18446744073709551615, 59, 61, 59, 61, 9, 10, true, "to", "to"], ["conn", "single-conn", 1508626318915838319, "TEXT", "#/texts/93", 1.0, 15441160910541485865, 12447978233189813654, 18446744073709551615, 18446744073709551615, 340, 342, 340, 342, 61, 62, true, "to", "to"], ["numval", "ival", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 17767354399704235157, 6666235790308819566, 18446744073709551615, 18446744073709551615, 720, 721, 720, 721, 134, 135, true, "5", "5"], ["parenthesis", "round brackets", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 772704748867907067, 17873771936385193962, 18446744073709551615, 18446744073709551615, 215, 286, 215, 286, 42, 57, true, "(e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc)", "(e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc)"], ["parenthesis", "round brackets", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 7596548548401207156, 5106769991605743942, 18446744073709551615, 18446744073709551615, 697, 722, 697, 722, 128, 136, true, "(as is shown in Figure 5)", "(as is shown in Figure 5)"], ["expression", "common", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541487324, 12448443577304465400, 18446744073709551615, 18446744073709551615, 216, 220, 216, 220, 43, 44, true, "eg", "e.g."], ["expression", "word-concatenation", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 13953038768306043326, 2217483007470679809, 18446744073709551615, 18446744073709551615, 253, 263, 253, 263, 50, 51, true, "pie-charts", "pie-charts"], ["expression", "word-concatenation", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 5428486186575573840, 17552603483030949066, 18446744073709551615, 18446744073709551615, 412, 428, 412, 428, 80, 81, true, "image-classifier", "image-classifier"], ["sentence", "", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 9576287605285270893, 7775032662306861151, 18446744073709551615, 18446744073709551615, 0, 65, 0, 65, 0, 15, true, "In the future, we plan to extend the platform in two major areas.", "In the future, we plan to extend the platform in two major areas."], ["sentence", "", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 7980828285556281738, 2544051083396498287, 18446744073709551615, 18446744073709551615, 66, 172, 66, 172, 15, 34, true, "First, we would like to extend the number of microservices, especially with regard to image understanding.", "First, we would like to extend the number of microservices, especially with regard to image understanding."], ["sentence", "", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16816675794156539317, 4106452168371569212, 18446744073709551615, 18446744073709551615, 173, 287, 173, 287, 34, 58, true, "The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc).", "The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc)."], ["sentence", "", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16727745954821675360, 18300594417076082954, 18446744073709551615, 18446744073709551615, 288, 429, 288, 429, 58, 82, true, "The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier.", "The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier."], ["sentence", "", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 10448641789434054504, 14093320860906874170, 18446744073709551615, 18446744073709551615, 430, 513, 430, 513, 82, 98, true, "Second, we would like to improve the quality and performance of our default models.", "Second, we would like to improve the quality and performance of our default models."], ["sentence", "", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 3084657715463842285, 15630767766630582663, 18446744073709551615, 18446744073709551615, 514, 723, 514, 723, 98, 137, true, "We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5).", "We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5)."], ["term", "enum-term-mark-2", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 18219039247346551478, 2994436876810062612, 18446744073709551615, 18446744073709551615, 216, 239, 216, 239, 43, 47, true, "eg line & scatterplot", "e.g. line & scatterplot"], ["term", "enum-term-mark-2", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 2459701502714558679, 5298793252682520889, 18446744073709551615, 18446744073709551615, 467, 490, 467, 490, 90, 93, true, "quality and performance", "quality and performance"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16589376492252179077, 7295144040672653108, 18446744073709551615, 18446744073709551615, 53, 64, 53, 64, 12, 14, true, "major areas", "major areas"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8106398377967204844, 1921596529468359029, 18446744073709551615, 18446744073709551615, 216, 225, 216, 225, 43, 45, true, "eg line", "e.g. line"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 5358230985886796623, 5106522770952356562, 18446744073709551615, 18446744073709551615, 265, 280, 265, 280, 52, 54, true, "geographic maps", "geographic maps"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15357232380281159303, 112568471828176926, 18446744073709551615, 18446744073709551615, 344, 359, 344, 359, 70, 72, true, "individual type", "individual type"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 3849116425022465253, 9034086680124657749, 18446744073709551615, 18446744073709551615, 378, 403, 378, 403, 76, 78, true, "successful identification", "successful identification"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 1915006193249717419, 4993787564856558201, 18446744073709551615, 18446744073709551615, 498, 512, 498, 512, 95, 97, true, "default models", "default models"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 3374009463271020691, 3843260871587525071, 18446744073709551615, 18446744073709551615, 585, 600, 585, 600, 110, 112, true, "neural networks", "neural networks"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 10900025937134233159, 18131173731884203799, 18446744073709551615, 18446744073709551615, 636, 655, 636, 655, 118, 120, true, "photographic images", "photographic images"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 5766847864654328399, 4382574540747563376, 18446744073709551615, 18446744073709551615, 675, 696, 675, 696, 125, 128, true, "parsed document pages", "parsed document pages"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 6604953305718748559, 8562548416720689057, 18446744073709551615, 18446744073709551615, 756, 776, 756, 776, 143, 146, true, "deep learning models", "deep learning models"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206565274670318, 14238598565348925208, 18446744073709551615, 18446744073709551615, 7, 13, 7, 13, 2, 3, true, "future", "future"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 14814125365076808131, 2312829961765099304, 18446744073709551615, 18446744073709551615, 37, 45, 37, 45, 9, 10, true, "platform", "platform"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206574973295053, 1926660952952474766, 18446744073709551615, 18446744073709551615, 101, 107, 101, 107, 23, 24, true, "number", "number"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 990358581043194791, 6104208925519602427, 18446744073709551615, 18446744073709551615, 111, 124, 111, 124, 25, 26, true, "microservices", "microservices"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206521526353544, 4410027063676095069, 18446744073709551615, 18446744073709551615, 142, 148, 142, 148, 29, 30, true, "regard", "regard"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 11827147635933835345, 3554885262491213918, 18446744073709551615, 18446744073709551615, 158, 171, 158, 171, 32, 33, true, "understanding", "understanding"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206574973295053, 1926660952951671347, 18446744073709551615, 18446744073709551615, 177, 183, 177, 183, 35, 36, true, "number", "number"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 329104159243796903, 7082055202846522668, 18446744073709551615, 18446744073709551615, 187, 192, 187, 192, 37, 38, true, "types", "types"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206560620045048, 3914201981705366923, 18446744073709551615, 18446744073709551615, 196, 202, 196, 202, 39, 40, true, "images", "images"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 1839290100020230611, 2397938769091018318, 18446744073709551615, 18446744073709551615, 228, 239, 228, 239, 46, 47, true, "scatterplot", "scatterplot"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16102584389807428912, 3793139059914902481, 18446744073709551615, 18446744073709551615, 241, 251, 241, 251, 48, 49, true, "histograms", "histograms"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 13953038768306043326, 2217483007470679809, 18446744073709551615, 18446744073709551615, 253, 263, 253, 263, 50, 51, true, "pie-charts", "pie-charts"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 389609625699055241, 7447546965782188814, 18446744073709551615, 18446744073709551615, 292, 296, 292, 296, 59, 60, true, "goal", "goal"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 389609625696431489, 7440840763973745685, 18446744073709551615, 18446744073709551615, 326, 330, 326, 330, 66, 67, true, "data", "data"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206560620045048, 3914201981705340835, 18446744073709551615, 18446744073709551615, 363, 369, 363, 369, 73, 74, true, "images", "images"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 5428486186575573840, 17552603483030949066, 18446744073709551615, 18446744073709551615, 412, 428, 412, 428, 80, 81, true, "image-classifier", "image-classifier"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8106477781724488761, 4422931059285339225, 18446744073709551615, 18446744073709551615, 467, 474, 467, 474, 90, 91, true, "quality", "quality"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 5731695876385560379, 12754564211995509475, 18446744073709551615, 18446744073709551615, 479, 490, 479, 490, 92, 93, true, "performance", "performance"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8106478445190161533, 8668956716153119308, 18446744073709551615, 18446744073709551615, 543, 550, 543, 550, 103, 104, true, "results", "results"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206560620045048, 3914201981705337550, 18446744073709551615, 18446744073709551615, 665, 671, 665, 671, 123, 124, true, "images", "images"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16381206514091025767, 4428872138347593094, 18446744073709551615, 18446744073709551615, 713, 719, 713, 719, 133, 134, true, "Figure", "Figure"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 14639580755784032837, 10701824725110310827, 18446744073709551615, 18446744073709551615, 727, 735, 727, 735, 138, 139, true, "leverage", "leverage"], ["term", "single-term", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 12178341415895516060, 598493059994502436, 18446744073709551615, 18446744073709551615, 749, 752, 749, 752, 141, 142, true, "use", "use"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 6843908984328718198, 4424980337438809569, 18446744073709551615, 18446744073709551615, 18, 32, 18, 32, 5, 8, true, "plan to extend", "plan to extend"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 14998042519330616781, 177101627084045088, 18446744073709551615, 18446744073709551615, 76, 96, 76, 96, 18, 22, true, "would like to extend", "would like to extend"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 4420603704750285605, 14167669410101881458, 18446744073709551615, 18446744073709551615, 302, 321, 302, 321, 61, 65, true, "would be to extract", "would be to extract"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16290083057699948816, 15990868729997335654, 18446744073709551615, 18446744073709551615, 441, 462, 441, 462, 85, 89, true, "would like to improve", "would like to improve"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 17236050900252224747, 9854267715107878317, 18446744073709551615, 18446744073709551615, 551, 574, 551, 574, 104, 108, true, "can be greatly improved", "can be greatly improved"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8208641893359681869, 16932607482672372426, 18446744073709551615, 18446744073709551615, 614, 631, 614, 631, 114, 117, true, "use are optimised", "use are optimised"], ["verb", "compound-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 14637951881518043285, 16016201078485034145, 18446744073709551615, 18446744073709551615, 701, 709, 701, 709, 130, 132, true, "is shown", "is shown"], ["verb", "single-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 329104161828335551, 7191149074974692359, 18446744073709551615, 18446744073709551615, 152, 157, 152, 157, 31, 32, true, "image", "image"], ["verb", "single-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541486535, 12448443551126566363, 18446744073709551615, 18446744073709551615, 203, 205, 203, 205, 40, 41, true, "is", "is"], ["verb", "single-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8106397860663428876, 7464848062962649547, 18446744073709551615, 18446744073709551615, 526, 533, 526, 533, 100, 101, true, "believe", "believe"], ["verb", "single-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8106397113586492286, 6559895150961820650, 18446744073709551615, 18446744073709551615, 741, 748, 741, 748, 140, 141, true, "growing", "growing"], ["verb", "single-verb", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 6182665070913771698, 13573081868742307876, 18446744073709551615, 18446744073709551615, 799, 808, 799, 808, 150, 151, true, "introduce", "introduce"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 16380809977974811061, 16065202910059383934, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 2, true, "In the", "In the"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541486538, 12448443553082805214, 18446744073709551615, 18446744073709551615, 46, 48, 46, 48, 10, 11, true, "in", "in"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485670, 12448449173148059932, 18446744073709551615, 18446744073709551615, 108, 110, 108, 110, 24, 25, true, "of", "of"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 389609625618037948, 7445535260585538379, 18446744073709551615, 18446744073709551615, 137, 141, 137, 141, 28, 29, true, "with", "with"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485670, 12448449173148052031, 18446744073709551615, 18446744073709551615, 184, 186, 184, 186, 36, 37, true, "of", "of"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485670, 12448449173148050529, 18446744073709551615, 18446744073709551615, 193, 195, 193, 195, 38, 39, true, "of", "of"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 12178341415895623120, 598445003466491402, 18446744073709551615, 18446744073709551615, 331, 334, 331, 334, 67, 68, true, "out", "out"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 14814148868025447689, 6811951436730744836, 18446744073709551615, 18446744073709551615, 335, 343, 335, 343, 68, 70, true, "of these", "of these"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485670, 12448449173147950427, 18446744073709551615, 18446744073709551615, 360, 362, 360, 362, 72, 73, true, "of", "of"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8106398472718381934, 9575911640413642094, 18446744073709551615, 18446744073709551615, 370, 377, 370, 377, 74, 76, true, "after a", "after a"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 8106477988572616406, 15264315134668474563, 18446744073709551615, 18446744073709551615, 404, 411, 404, 411, 78, 80, true, "with an", "with an"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485670, 12448449173148007931, 18446744073709551615, 18446744073709551615, 491, 493, 491, 493, 93, 94, true, "of", "of"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 14634130761162415388, 3833651190149238108, 18446744073709551615, 18446744073709551615, 534, 542, 534, 542, 101, 103, true, "that the", "that the"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 6168057894310307081, 11769172586530017585, 18446744073709551615, 18446744073709551615, 575, 584, 575, 584, 108, 110, true, "since the", "since the"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 12178341415895625940, 598444982766319560, 18446744073709551615, 18446744073709551615, 632, 635, 632, 635, 117, 118, true, "for", "for"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485670, 12448449173147963145, 18446744073709551615, 18446744073709551615, 672, 674, 672, 674, 124, 125, true, "of", "of"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541487053, 12448443593105791703, 18446744073709551615, 18446744073709551615, 698, 700, 698, 700, 129, 130, true, "as", "as"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541486538, 12448443553082893684, 18446744073709551615, 18446744073709551615, 710, 712, 710, 712, 132, 133, true, "in", "in"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485670, 12448449173147964365, 18446744073709551615, 18446744073709551615, 753, 755, 753, 755, 142, 143, true, "of", "of"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485865, 12448449225007565696, 18446744073709551615, 18446744073709551615, 23, 25, 23, 25, 6, 7, true, "to", "to"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485865, 12448449225007823792, 18446744073709551615, 18446744073709551615, 87, 89, 87, 89, 20, 21, true, "to", "to"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485865, 12448449225007818811, 18446744073709551615, 18446744073709551615, 149, 151, 149, 151, 30, 31, true, "to", "to"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485865, 12448449224998732616, 18446744073709551615, 18446744073709551615, 311, 313, 311, 313, 63, 64, true, "to", "to"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541485865, 12448449224998740693, 18446744073709551615, 18446744073709551615, 452, 454, 452, 454, 87, 88, true, "to", "to"], ["conn", "single-conn", 17247086344435786796, "TEXT", "#/texts/94", 1.0, 15441160910541487889, 12448443566585721442, 18446744073709551615, 18446744073709551615, 724, 726, 724, 726, 137, 138, true, "To", "To"], ["expression", "word-concatenation", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 17249225789261661029, 3807297211102715149, 18446744073709551615, 18446744073709551615, 12, 28, 12, 28, 1, 2, true, "data-parallelism", "data-parallelism"], ["expression", "word-concatenation", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 8685358683472264781, 17027290145523372529, 18446744073709551615, 18446744073709551615, 87, 105, 87, 105, 12, 13, true, "user-customisation", "user-customisation"], ["sentence", "", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 10588183979877639592, 1367000647117206524, 18446744073709551615, 18446744073709551615, 12, 119, 12, 119, 1, 15, true, "data-parallelism in order to speed up the training and provide interactive user-customisation capabilities.", "data-parallelism in order to speed up the training and provide interactive user-customisation capabilities."], ["term", "single-term", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 9998261106336570604, 4856078764969945002, 18446744073709551615, 18446744073709551615, 75, 118, 75, 118, 11, 14, true, "interactive user-customisation capabilities", "interactive user-customisation capabilities"], ["term", "single-term", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 17249225789261661029, 3807297211102715149, 18446744073709551615, 18446744073709551615, 12, 28, 12, 28, 1, 2, true, "data-parallelism", "data-parallelism"], ["term", "single-term", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 329104161571401725, 3792502362005124423, 18446744073709551615, 18446744073709551615, 32, 37, 32, 37, 3, 4, true, "order", "order"], ["term", "single-term", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 14634153919632515335, 3840780376526095372, 18446744073709551615, 18446744073709551615, 54, 62, 54, 62, 8, 9, true, "training", "training"], ["verb", "single-verb", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 541003494147177743, 2376460771711104984, 18446744073709551615, 18446744073709551615, 0, 11, 0, 11, 0, 1, true, "specialised", "specialised"], ["verb", "single-verb", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 329104161639049345, 3799043945253257651, 18446744073709551615, 18446744073709551615, 41, 46, 41, 46, 5, 6, true, "speed", "speed"], ["verb", "single-verb", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 8106476000214061408, 9620881782228868220, 18446744073709551615, 18446744073709551615, 67, 74, 67, 74, 10, 11, true, "provide", "provide"], ["conn", "single-conn", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 15441160910541486538, 14667436044722575629, 18446744073709551615, 18446744073709551615, 29, 31, 29, 31, 2, 3, true, "in", "in"], ["conn", "single-conn", 10287541089279789496, "TEXT", "#/texts/95", 1.0, 15441160910541485865, 14667435948507858038, 18446744073709551615, 18446744073709551615, 38, 40, 38, 40, 4, 5, true, "to", "to"], ["sentence", "", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 5556222901900980902, 15746519596852768008, 18446744073709551615, 18446744073709551615, 0, 127, 0, 127, 0, 22, true, "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system.", "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system."], ["term", "enum-term-mark-4", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 13556182311682325280, 9761797471225359212, 18446744073709551615, 18446744073709551615, 32, 66, 32, 66, 6, 11, true, "Roxana Istrate and Matthieu Mottet", "Roxana Istrate and Matthieu Mottet"], ["term", "single-term", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 7949755686502200390, 668350583233417234, 18446744073709551615, 18446744073709551615, 32, 46, 32, 46, 6, 8, true, "Roxana Istrate", "Roxana Istrate"], ["term", "single-term", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 422584487912656734, 11698320462170095527, 18446744073709551615, 18446744073709551615, 51, 66, 51, 66, 9, 11, true, "Matthieu Mottet", "Matthieu Mottet"], ["term", "single-term", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 244635901031456436, 9625433519480199700, 18446744073709551615, 18446744073709551615, 116, 126, 116, 126, 19, 21, true, "CCS system", "CCS system"], ["term", "single-term", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 8106397759446161562, 5642353918280438479, 18446744073709551615, 18446744073709551615, 4, 11, 4, 11, 1, 2, true, "authors", "authors"], ["term", "single-term", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 4603153860084293890, 9630773090599505701, 18446744073709551615, 18446744073709551615, 77, 89, 77, 89, 13, 14, true, "contribution", "contribution"], ["term", "single-term", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 1525875096007260836, 16905177906202921866, 18446744073709551615, 18446744073709551615, 97, 108, 97, 108, 16, 17, true, "development", "development"], ["verb", "compound-verb", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 17737636287413194494, 18246863768738587194, 18446744073709551615, 18446744073709551615, 12, 31, 12, 31, 2, 6, true, "would like to thank", "would like to thank"], ["conn", "single-conn", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 12178341415895625940, 9042585404458343529, 18446744073709551615, 18446744073709551615, 67, 70, 67, 70, 11, 12, true, "for", "for"], ["conn", "single-conn", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 16381206565712212855, 15703671923459609107, 18446744073709551615, 18446744073709551615, 109, 115, 109, 115, 17, 19, true, "of the", "of the"], ["conn", "single-conn", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 15441160910541485865, 16366793807298640842, 18446744073709551615, 18446744073709551615, 23, 25, 23, 25, 4, 5, true, "to", "to"], ["conn", "single-conn", 15983582675278266440, "TEXT", "#/texts/97", 1.0, 16381206519425733256, 17710263008813390102, 18446744073709551615, 18446744073709551615, 90, 96, 90, 96, 14, 16, true, "to the", "to the"], ["numval", "year", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 389609625548777262, 15175051322594687321, 18446744073709551615, 18446744073709551615, 175, 179, 175, 179, 39, 40, true, "2020", "2020"], ["numval", "ival", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 16380810010060182105, 2087358970220343258, 18446744073709551615, 18446744073709551615, 232, 238, 232, 238, 47, 48, true, "721027", "721027"], ["link", "url", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 4558951843677957919, 6153426188298487244, 18446744073709551615, 18446744073709551615, 44, 62, 44, 62, 9, 16, true, "http://nccr-marvel", "http://nccr-marvel"], ["link", "url", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 1840514147198720564, 15805877038624594621, 18446744073709551615, 18446744073709551615, 240, 267, 240, 267, 49, 60, true, "http://the-force-project.eu", "http://the-force-project.eu"], ["parenthesis", "round brackets", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 9725913318321680311, 4961995203860234930, 18446744073709551615, 18446744073709551615, 43, 67, 43, 67, 8, 19, true, "(http://nccr-marvel. ch)", "(http://nccr-marvel. ch)"], ["parenthesis", "round brackets", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 2988796312331131177, 6687402703764012002, 18446744073709551615, 18446744073709551615, 239, 268, 239, 268, 48, 61, true, "(http://the-force-project.eu)", "(http://the-force-project.eu)"], ["expression", "wtoken-concatenation", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 10991632387650324970, 16837241231127303249, 18446744073709551615, 18446744073709551615, 186, 198, 186, 198, 41, 42, true, "NMBP-23-2016", "NMBP-23-2016"], ["sentence", "", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 9490709138959189212, 12250691288144973169, 18446744073709551615, 18446744073709551615, 0, 117, 0, 117, 0, 28, true, "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation.", "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation."], ["sentence", "", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 1102470314652222820, 15601071363037323756, 18446744073709551615, 18446744073709551615, 118, 269, 118, 269, 28, 62, true, "MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", "MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu)."], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 9107120802959375325, 14854418713978759874, 18446744073709551615, 18446744073709551615, 31, 42, 31, 42, 6, 8, true, "NCCR MARVEL", "NCCR MARVEL"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 4312908239263712749, 12629609910975902459, 18446744073709551615, 18446744073709551615, 83, 116, 83, 116, 23, 27, true, "Swiss National Science Foundation", "Swiss National Science Foundation"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 15770732106686559794, 7781941783302435281, 18446744073709551615, 18446744073709551615, 142, 155, 142, 155, 33, 35, true, "FORCE project", "FORCE project"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 4392060515500483083, 8866459764729165903, 18446744073709551615, 18446744073709551615, 209, 231, 209, 231, 44, 47, true, "Grant agreement number", "Grant agreement number"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 389609625633592023, 15303732731624508399, 18446744073709551615, 18446744073709551615, 5, 9, 5, 9, 1, 2, true, "work", "work"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 15441160910541486943, 7829630847478764393, 18446744073709551615, 18446744073709551615, 64, 66, 64, 66, 17, 18, true, "ch", "ch"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 15441160910541480587, 7830721483973022036, 18446744073709551615, 18446744073709551615, 118, 120, 118, 120, 28, 29, true, "MD", "MD"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 8106351288219429194, 18213714777089539961, 18446744073709551615, 18446744073709551615, 167, 174, 167, 174, 38, 39, true, "Horizon", "Horizon"], ["term", "single-term", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 10991632387650324970, 16837241231127303249, 18446744073709551615, 18446744073709551615, 186, 198, 186, 198, 41, 42, true, "NMBP-23-2016", "NMBP-23-2016"], ["verb", "compound-verb", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 13041846394845825316, 5320956231753433918, 18446744073709551615, 18446744073709551615, 10, 23, 10, 23, 2, 4, true, "was supported", "was supported"], ["verb", "compound-verb", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 13041846394845825316, 5320956231753459128, 18446744073709551615, 18446744073709551615, 121, 134, 121, 134, 29, 31, true, "was supported", "was supported"], ["verb", "single-verb", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 16381206565272093797, 5039615147538801699, 18446744073709551615, 18446744073709551615, 69, 75, 69, 75, 20, 21, true, "funded", "funded"], ["verb", "single-verb", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 16381206565272093797, 5039615147538790981, 18446744073709551615, 18446744073709551615, 157, 163, 157, 163, 36, 37, true, "funded", "funded"], ["verb", "single-verb", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 389609625695109591, 15313901038780033729, 18446744073709551615, 18446744073709551615, 199, 203, 199, 203, 42, 43, true, "call", "call"], ["conn", "single-conn", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 16381206574363061705, 5224141779864768374, 18446744073709551615, 18446744073709551615, 24, 30, 24, 30, 4, 6, true, "by the", "by the"], ["conn", "single-conn", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 16381206574363061705, 5224141779864730347, 18446744073709551615, 18446744073709551615, 76, 82, 76, 82, 21, 23, true, "by the", "by the"], ["conn", "single-conn", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 16381206574363061705, 5224141779864726325, 18446744073709551615, 18446744073709551615, 135, 141, 135, 141, 31, 33, true, "by the", "by the"], ["conn", "single-conn", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 15441160910541486989, 7829629584886114826, 18446744073709551615, 18446744073709551615, 164, 166, 164, 166, 37, 38, true, "by", "by"], ["conn", "single-conn", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 329104159159151530, 12430892283433612669, 18446744073709551615, 18446744073709551615, 180, 185, 180, 185, 40, 41, true, "under", "under"], ["conn", "single-conn", 12711351442546714716, "TEXT", "#/texts/98", 1.0, 389609625618037948, 15311592167218177731, 18446744073709551615, 18446744073709551615, 204, 208, 204, 208, 43, 44, true, "with", "with"], ["reference", "author", 1225384713519841338, "TEXT", "#/texts/99", 1.0, 1858797456585454232, 7193470945487579875, 18446744073709551615, 18446744073709551615, 0, 10, 0, 10, 0, 1, true, "REFERENCES", "REFERENCES"], ["reference", "author", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 10921193442290853772, 7808176325166967948, 18446744073709551615, 18446744073709551615, 4, 21, 4, 21, 1, 4, true, "A. Antonacopoulos", "A. Antonacopoulos"], ["reference", "author", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 5181382481262336037, 5307751930075227018, 18446744073709551615, 18446744073709551615, 23, 34, 23, 34, 5, 8, true, "C. Clausner", "C. Clausner"], ["reference", "author", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 18410882341323932977, 3950678732393374894, 18446744073709551615, 18446744073709551615, 36, 51, 36, 51, 9, 12, true, "C. Papadopoulos", "C. Papadopoulos"], ["reference", "author", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 6326253284428776844, 2242368337149903292, 18446744073709551615, 18446744073709551615, 57, 73, 57, 73, 14, 18, true, "S. Pletschacher.", "S. Pletschacher."], ["reference", "citation-number", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 12178341415895551530, 18332345913337968356, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[1]", "[1]"], ["reference", "container-title", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 2527079864200222812, 474810476780653321, 18446744073709551615, 18446744073709551615, 161, 249, 161, 249, 30, 42, true, "In Proceedings of the 13th International Conference on Document Analysis and Recognition", "In Proceedings of the 13th International Conference on Document Analysis and Recognition"], ["reference", "container-title", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 6558131902220562236, 4761966619744782752, 18446744073709551615, 18446744073709551615, 251, 260, 251, 260, 43, 45, true, "ICDAR2015", "ICDAR2015"], ["reference", "date", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 389609625548777059, 4138332198474599496, 18446744073709551615, 18446744073709551615, 74, 78, 74, 78, 18, 19, true, "2015", "2015"], ["reference", "date", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 10303630957638511768, 3815340683710445282, 18446744073709551615, 18446744073709551615, 270, 279, 270, 279, 49, 50, true, "1151-1155", "1151-1155"], ["reference", "location", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 329104162200796337, 14591806354842233425, 18446744073709551615, 18446744073709551615, 263, 268, 263, 268, 47, 48, true, "Nancy", "Nancy"], ["reference", "title", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 17804212744220731295, 13329383501201933373, 18446744073709551615, 18446744073709551615, 80, 159, 80, 159, 20, 29, true, "ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015", "ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015"], ["reference", "author", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 2649929445531557889, 7202581822078924410, 18446744073709551615, 18446744073709551615, 4, 16, 4, 16, 1, 4, true, "Leo Breiman.", "Leo Breiman."], ["reference", "citation-number", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 12178341415895551595, 12282095972636501808, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[2]", "[2]"], ["reference", "date", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 389609625548757414, 14515784463162085628, 18446744073709551615, 18446744073709551615, 17, 21, 17, 21, 4, 5, true, "2001", "2001"], ["reference", "date", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 10551073428908397011, 16087676618282063646, 18446744073709551615, 18446744073709551615, 63, 74, 63, 74, 15, 18, true, "01 Oct 2001", "01 Oct 2001"], ["reference", "journal", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 13278563109182224937, 9894237306486099503, 18446744073709551615, 18446744073709551615, 39, 55, 39, 55, 9, 11, true, "Machine Learning", "Machine Learning"], ["reference", "title", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 2109081024677782429, 14560503901773287747, 18446744073709551615, 18446744073709551615, 23, 37, 23, 37, 6, 8, true, "Random Forests", "Random Forests"], ["reference", "url", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 1225079762841478321, 13531790532415888950, 18446744073709551615, 18446744073709551615, 83, 122, 83, 122, 22, 35, true, "https://doi.org/10.1023/A:1010933404324", "https://doi.org/10.1023/A:1010933404324"], ["reference", "author", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 1401374873664364883, 11647727014815681179, 18446744073709551615, 18446744073709551615, 4, 14, 4, 14, 1, 4, true, "R. Cattoni", "R. Cattoni"], ["reference", "author", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 8489759580118410179, 13292301803598722609, 18446744073709551615, 18446744073709551615, 16, 26, 16, 26, 5, 8, true, "T. Coianiz", "T. Coianiz"], ["reference", "author", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 6842824740074268202, 13861579202330443089, 18446744073709551615, 18446744073709551615, 28, 40, 28, 40, 9, 12, true, "S. Messelodi", "S. Messelodi"], ["reference", "author", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 3186691256225071720, 5893020180892593571, 18446744073709551615, 18446744073709551615, 46, 59, 46, 59, 14, 20, true, "C. M. Modena.", "C. M. Modena."], ["reference", "citation-number", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 12178341415895577000, 12922636114896239788, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[3]", "[3]"], ["reference", "date", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 389609625536085742, 14383425253514843049, 18446744073709551615, 18446744073709551615, 60, 64, 60, 64, 20, 21, true, "1998", "1998"], ["reference", "title", 16943780574244090186, "TEXT", "#/texts/102", 1.0, 10272469742902868819, 13721964765306049914, 18446744073709551615, 18446744073709551615, 66, 145, 66, 145, 22, 33, true, "Geometric layout analysis techniques for document image understanding: a review", "Geometric layout analysis techniques for document image understanding: a review"], ["reference", "author", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 17855541178416775013, 15770720280543811824, 18446744073709551615, 18446744073709551615, 4, 22, 4, 22, 1, 5, true, "Jean-Pierre Chanod", "Jean-Pierre Chanod"], ["reference", "author", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 7554933550167443736, 13411551703313480687, 18446744073709551615, 18446744073709551615, 24, 41, 24, 41, 6, 8, true, "Boris Chidlovskii", "Boris Chidlovskii"], ["reference", "author", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 16299981998052668228, 10120159009512117499, 18446744073709551615, 18446744073709551615, 43, 56, 43, 55, 9, 11, true, "Herv\u00e9 Dejean", "Herv\u00e9 Dejean"], ["reference", "author", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 12186041413076963653, 1815357622671572381, 18446744073709551615, 18446744073709551615, 58, 72, 57, 71, 12, 14, true, "Olivier Fambon", "Olivier Fambon"], ["reference", "author", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 10757542349073996342, 681372576460736923, 18446744073709551615, 18446744073709551615, 74, 91, 73, 88, 15, 17, true, "J\u00e9r\u00f4me Fuselier", "J\u00e9r\u00f4me Fuselier"], ["reference", "author", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 17756104824925179897, 12319066590629211102, 18446744073709551615, 18446744073709551615, 93, 108, 90, 105, 18, 20, true, "Thierry Jacquin", "Thierry Jacquin"], ["reference", "author", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 12029578715874344754, 13070806463269187443, 18446744073709551615, 18446744073709551615, 114, 131, 111, 128, 22, 27, true, "Jean-Luc Meunier.", "Jean-Luc Meunier."], ["reference", "citation-number", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 12178341415895577065, 17281225859930936863, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[4]", "[4]"], ["reference", "date", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 389609625548757410, 11746200903899729970, 18446744073709551615, 18446744073709551615, 132, 136, 129, 133, 27, 28, true, "2005", "2005"], ["reference", "location", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 13464702443011780443, 8119228962970051206, 18446744073709551615, 18446744073709551615, 201, 238, 198, 235, 40, 46, true, "Berlin Heidelberg, Berlin, Heidelberg", "Berlin Heidelberg, Berlin, Heidelberg"], ["reference", "title", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 8741239478611349123, 10862343017243987125, 18446744073709551615, 18446744073709551615, 138, 190, 135, 187, 29, 38, true, "From Legacy Documents to XML: A Conversion Framework", "From Legacy Documents to XML: A Conversion Framework"], ["reference", "url", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 9115058383761225167, 648438667166468655, 18446744073709551615, 18446744073709551615, 248, 282, 245, 279, 49, 62, true, "https://doi.org/10.1007/11551362_9", "https://doi.org/10.1007/11551362_9"], ["reference", "author", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 4582708537308058782, 10402294110981991066, 18446744073709551615, 18446744073709551615, 4, 18, 4, 18, 1, 4, true, "Ross Girshick.", "Ross Girshick."], ["reference", "citation-number", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 12178341415895577901, 15205622006266309913, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[5]", "[5]"], ["reference", "container-title", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 9927524698181440404, 1963067726741096427, 18446744073709551615, 18446744073709551615, 37, 112, 37, 112, 9, 20, true, "In Proceedings of the 2015 IEEE International Conference on Computer Vision", "In Proceedings of the 2015 IEEE International Conference on Computer Vision"], ["reference", "container-title", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 389609625537760670, 1654267914364558446, 18446744073709551615, 18446744073709551615, 114, 118, 114, 118, 21, 22, true, "ICCV", "ICCV"], ["reference", "container-title", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 14650472600731532908, 6597684399889991790, 18446744073709551615, 18446744073709551615, 121, 129, 121, 129, 24, 27, true, "ICCV '15", "ICCV '15"], ["reference", "date", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 389609625548777059, 1587769393776818040, 18446744073709551615, 18446744073709551615, 19, 23, 19, 23, 4, 5, true, "2015", "2015"], ["reference", "date", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 10303975503395430788, 13846363068497305469, 18446744073709551615, 18446744073709551615, 176, 185, 176, 185, 39, 40, true, "1440-1448", "1440-1448"], ["reference", "date", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 389609625548777059, 1587769393776757579, 18446744073709551615, 18446744073709551615, 216, 220, 216, 220, 53, 54, true, "2015", "2015"], ["reference", "location", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 12788924170991110125, 5659206141059843753, 18446744073709551615, 18446744073709551615, 155, 174, 155, 174, 33, 38, true, "Washington, DC, USA", "Washington, DC, USA"], ["reference", "title", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 15491004285883184028, 17483261521377705764, 18446744073709551615, 18446744073709551615, 25, 35, 25, 35, 6, 8, true, "Fast R-CNN", "Fast R-CNN"], ["reference", "url", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 3301781339572596013, 17531137372088121631, 18446744073709551615, 18446744073709551615, 187, 215, 187, 215, 41, 52, true, "https://doi.org/10.1109/ICCV", "https://doi.org/10.1109/ICCV"], ["reference", "author", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 141995704861070506, 4358412458884164235, 18446744073709551615, 18446744073709551615, 4, 20, 4, 20, 1, 5, true, "Ross B. Girshick", "Ross B. Girshick"], ["reference", "author", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 16700235966000105766, 16857612526578801697, 18446744073709551615, 18446744073709551615, 22, 34, 22, 34, 6, 8, true, "Jeff Donahue", "Jeff Donahue"], ["reference", "author", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 3125822382074464058, 13386372949081827875, 18446744073709551615, 18446744073709551615, 36, 50, 36, 50, 9, 11, true, "Trevor Darrell", "Trevor Darrell"], ["reference", "author", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 10076860098015848351, 1698280748488935181, 18446744073709551615, 18446744073709551615, 56, 71, 56, 71, 13, 16, true, "Jitendra Malik.", "Jitendra Malik."], ["reference", "citation-number", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 12178341415895577964, 1023751500620290990, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[6]", "[6]"], ["reference", "date", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 389609625548777061, 894814354396885943, 18446744073709551615, 18446744073709551615, 72, 76, 72, 76, 16, 17, true, "2013", "2013"], ["reference", "date", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 389609625548777061, 894814354396890826, 18446744073709551615, 18446744073709551615, 180, 184, 180, 184, 32, 33, true, "2013", "2013"], ["reference", "journal", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 389609625536419383, 889446752040326567, 18446744073709551615, 18446744073709551615, 160, 164, 160, 164, 29, 30, true, "CoRR", "CoRR"], ["reference", "title", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 4208693923929480551, 3754197794849426338, 18446744073709551615, 18446744073709551615, 78, 158, 78, 158, 18, 28, true, "Rich feature hierarchies for accurate object detection and semantic segmentation", "Rich feature hierarchies for accurate object detection and semantic segmentation"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 8106351942713029604, 15468997146309510455, 18446744073709551615, 18446744073709551615, 4, 11, 4, 11, 1, 3, true, "Wei Liu", "Wei Liu"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 7132768279271695, 1832821379686674159, 18446744073709551615, 18446744073709551615, 13, 30, 13, 30, 4, 6, true, "Dragomir Anguelov", "Dragomir Anguelov"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 12871845148221275510, 11451573001119547147, 18446744073709551615, 18446744073709551615, 32, 45, 32, 45, 7, 9, true, "Dumitru Erhan", "Dumitru Erhan"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 6963214204149412896, 11905902671968880924, 18446744073709551615, 18446744073709551615, 47, 64, 47, 64, 10, 12, true, "Christian Szegedy", "Christian Szegedy"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 1399468129531522089, 15637271748350955016, 18446744073709551615, 18446744073709551615, 66, 76, 66, 76, 13, 15, true, "Scott Reed", "Scott Reed"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 12712965187511148158, 5061563798042056469, 18446744073709551615, 18446744073709551615, 78, 91, 78, 91, 16, 20, true, "Cheng-Yang Fu", "Cheng-Yang Fu"], ["reference", "author", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 3733048493609069913, 12058083979397468329, 18446744073709551615, 18446744073709551615, 97, 115, 97, 115, 22, 27, true, "Alexander C. Berg.", "Alexander C. Berg."], ["reference", "citation-number", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 12178341415895577775, 16834182135958034128, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[7]", "[7]"], ["reference", "date", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 389609625548777056, 12418382060406794776, 18446744073709551615, 18446744073709551615, 116, 120, 116, 120, 27, 28, true, "2016", "2016"], ["reference", "doi", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 3534146179424153776, 1525705277889903310, 18446744073709551615, 18446744073709551615, 206, 224, 206, 224, 44, 45, true, "https://doi.org/10", "https://doi.org/10"], ["reference", "doi", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 3493950482346635177, 14172820134834639105, 18446744073709551615, 18446744073709551615, 226, 250, 226, 250, 46, 54, true, "1007/978-3-319-46448-0_2", "1007/978-3-319-46448-0_2"], ["reference", "location", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 389609625536506042, 12420143175742824125, 18446744073709551615, 18446744073709551615, 193, 197, 193, 197, 40, 41, true, "Cham", "Cham"], ["reference", "title", 11872392946390819176, "TEXT", "#/texts/106", 1.0, 10201684882899222639, 16463858842282873959, 18446744073709551615, 18446744073709551615, 122, 156, 122, 156, 29, 35, true, "SSD: Single Shot MultiBox Detector", "SSD: Single Shot MultiBox Detector"], ["reference", "author", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 5088659084289352829, 5811844525036759114, 18446744073709551615, 18446744073709551615, 4, 17, 4, 17, 1, 3, true, "Joseph Redmon", "Joseph Redmon"], ["reference", "author", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 417695209021750783, 13441950925666715191, 18446744073709551615, 18446744073709551615, 19, 40, 19, 40, 4, 7, true, "Santosh Kumar Divvala", "Santosh Kumar Divvala"], ["reference", "author", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 141995704861070506, 13286696794844996383, 18446744073709551615, 18446744073709551615, 42, 58, 42, 58, 8, 12, true, "Ross B. Girshick", "Ross B. Girshick"], ["reference", "author", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 16947174234018208722, 13965552924856577071, 18446744073709551615, 18446744073709551615, 64, 76, 64, 76, 14, 17, true, "Ali Farhadi.", "Ali Farhadi."], ["reference", "citation-number", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 12178341415895577838, 11018125289094672461, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[8]", "[8]"], ["reference", "container-title", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 17631274803144515959, 18105892991402137032, 18446744073709551615, 18446744073709551615, 140, 203, 140, 203, 32, 41, true, "2016 IEEE Conference on Computer Vision and Pattern Recognition", "2016 IEEE Conference on Computer Vision and Pattern Recognition"], ["reference", "container-title", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 389609625526699487, 17849764824838617245, 18446744073709551615, 18446744073709551615, 205, 209, 205, 209, 42, 43, true, "CVPR", "CVPR"], ["reference", "date", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 389609625548777056, 17837801987031958568, 18446744073709551615, 18446744073709551615, 77, 81, 77, 81, 17, 18, true, "2016", "2016"], ["reference", "date", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 389609625548777056, 17837801987031982734, 18446744073709551615, 18446744073709551615, 212, 216, 212, 216, 45, 46, true, "2016", "2016"], ["reference", "title", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 5895818558987270699, 2974553673873283962, 18446744073709551615, 18446744073709551615, 83, 138, 83, 138, 19, 31, true, "You Only Look Once: Unified, Real-Time Object Detection", "You Only Look Once: Unified, Real-Time Object Detection"], ["reference", "author", 6623297047995432604, "TEXT", "#/texts/108", 1.0, 5088659084289352829, 16235259739729085297, 18446744073709551615, 18446744073709551615, 4, 17, 4, 17, 1, 3, true, "Joseph Redmon", "Joseph Redmon"], ["reference", "author", 6623297047995432604, "TEXT", "#/texts/108", 1.0, 16947174234018208722, 7021580680610188634, 18446744073709551615, 18446744073709551615, 22, 34, 22, 34, 4, 7, true, "Ali Farhadi.", "Ali Farhadi."], ["reference", "citation-number", 6623297047995432604, "TEXT", "#/texts/108", 1.0, 12178341415895577640, 5338477872773862060, 18446744073709551615, 18446744073709551615, 0, 3, 0, 3, 0, 1, true, "[9]", "[9]"], ["reference", "date", 6623297047995432604, "TEXT", "#/texts/108", 1.0, 389609625548777056, 2625243571990787508, 18446744073709551615, 18446744073709551615, 35, 39, 35, 39, 7, 8, true, "2016", "2016"], ["reference", "date", 6623297047995432604, "TEXT", "#/texts/108", 1.0, 389609625548777056, 2625243571990783197, 18446744073709551615, 18446744073709551615, 110, 114, 110, 114, 21, 22, true, "2016", "2016"], ["reference", "author", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 9337887504118347047, 4966377796769374289, 18446744073709551615, 18446744073709551615, 5, 17, 5, 17, 1, 3, true, "Shaoqing Ren", "Shaoqing Ren"], ["reference", "author", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 7339447509685488310, 1490181006860316744, 18446744073709551615, 18446744073709551615, 19, 29, 19, 29, 4, 6, true, "Kaiming He", "Kaiming He"], ["reference", "author", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 13123599834782083842, 7292467665049010344, 18446744073709551615, 18446744073709551615, 31, 44, 31, 44, 7, 9, true, "Ross Girshick", "Ross Girshick"], ["reference", "author", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 2904781337729160811, 16221483782846728585, 18446744073709551615, 18446744073709551615, 50, 59, 50, 59, 11, 14, true, "Jian Sun.", "Jian Sun."], ["reference", "citation-number", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 389609625697296215, 1913545593953328211, 18446744073709551615, 18446744073709551615, 0, 4, 0, 4, 0, 1, true, "[10]", "[10]"], ["reference", "container-title", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 17791264228691503041, 2574823334558986016, 18446744073709551615, 18446744073709551615, 146, 201, 146, 201, 30, 38, true, "In Advances in Neural Information Processing Systems 28", "In Advances in Neural Information Processing Systems 28"], ["reference", "date", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 389609625548777059, 1924763351573441882, 18446744073709551615, 18446744073709551615, 60, 64, 60, 64, 14, 15, true, "2015", "2015"], ["reference", "title", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 695901516261617265, 14331097264748910677, 18446744073709551615, 18446744073709551615, 66, 144, 66, 144, 16, 29, true, "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks", "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"], ["reference", "url", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 3374974501831695503, 17450904193872703176, 18446744073709551615, 18446744073709551615, 309, 420, 309, 420, 76, 78, true, "http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks", "http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks"], ["reference", "url", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 12178341415895634440, 93706065194188109, 18446744073709551615, 18446744073709551615, 422, 425, 422, 425, 79, 80, true, "pdf", "pdf"], ["reference", "author", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 4686361850733567621, 5253767773577297512, 18446744073709551615, 18446744073709551615, 5, 20, 5, 20, 1, 5, true, "Peter W J Staar", "Peter W J Staar"], ["reference", "author", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 1571808557594152175, 1746337992895366641, 18446744073709551615, 18446744073709551615, 22, 35, 22, 35, 6, 8, true, "Michele Dolfi", "Michele Dolfi"], ["reference", "author", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 9737597816447750448, 2973540942666074124, 18446744073709551615, 18446744073709551615, 37, 51, 37, 51, 9, 11, true, "Christoph Auer", "Christoph Auer"], ["reference", "author", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 13732913329338511598, 166477832047526898, 18446744073709551615, 18446744073709551615, 57, 70, 57, 70, 13, 16, true, "Costas Bekas.", "Costas Bekas."], ["reference", "citation-number", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 389609625697296278, 16564150102059325413, 18446744073709551615, 18446744073709551615, 0, 4, 0, 4, 0, 1, true, "[11]", "[11]"], ["reference", "date", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 389609625548777054, 16555452686088781228, 18446744073709551615, 18446744073709551615, 71, 75, 71, 75, 16, 17, true, "2018", "2018"], ["reference", "title", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 16083247419427271197, 18033265608713009513, 18446744073709551615, 18446744073709551615, 77, 133, 77, 133, 18, 26, true, "Corpus Conversion Service poster at the SysML conference", "Corpus Conversion Service poster at the SysML conference"], ["reference", "url", 14905276480471286920, "TEXT", "#/texts/110", 1.0, 18429963590603622561, 12432928173216692023, 18446744073709551615, 18446744073709551615, 135, 166, 135, 166, 27, 31, true, "http://www.sysml.cc/doc/ 76.pdf", "http://www.sysml.cc/doc/ 76.pdf"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 15441160910541481072, 14925187714232052101, 2, 1, 0, 2, 0, 2, 0, 2, true, "72", "72"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 17767354399704235156, 6061612085784771330, 2, 2, 0, 1, 0, 1, 0, 1, true, "4", "4"], ["numval", "fval", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 389609625535995626, 16087508952769745788, 2, 3, 0, 4, 0, 4, 0, 4, true, "0.97", "0.97"], ["numval", "fval", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 389609625535995627, 16087508952857503563, 2, 4, 0, 4, 0, 4, 0, 4, true, "0.98", "0.98"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 17767354399704235162, 6061612085904261025, 3, 0, 5, 6, 5, 6, 1, 2, true, "2", "2"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 17767354399704235153, 6061612080706226208, 3, 1, 0, 1, 0, 1, 0, 1, true, "9", "9"], ["numval", "fval", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 12178341415896431533, 7910815507560570273, 3, 2, 0, 3, 0, 3, 0, 3, true, "0.1", "0.1"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 17767354399704235160, 6061612085871184177, 3, 3, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 15441160910541481353, 14925187695918906548, 3, 3, 4, 6, 4, 6, 2, 4, true, "99", "99"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 17767354399704235160, 6061612085871196320, 3, 4, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16709517892596982787, "TABLE", "#/tables/0", 1.0, 15441160910541481352, 14925187696052464601, 3, 4, 4, 6, 4, 6, 2, 4, true, "98", "98"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 15441160910541480975, 14063371777824517040, 2, 2, 0, 2, 0, 2, 0, 2, true, "75", "75"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100226782560, 2, 3, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100226770099, 2, 4, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100226790530, 2, 5, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100227143682, 2, 6, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100226706291, 2, 7, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235161, 15803300100445307688, 3, 1, 0, 1, 0, 1, 0, 1, true, "1", "1"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 12178341415896199541, 5837267533537259043, 3, 2, 0, 3, 0, 3, 0, 3, true, "670", "670"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100227552359, 3, 3, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100227564948, 3, 4, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100227560517, 3, 5, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100226887469, 3, 6, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100224577771, 4, 1, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100224140347, 4, 2, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 12178341415896434935, 5837266946220083063, 4, 3, 0, 3, 0, 3, 0, 3, true, "325", "325"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100224164889, 4, 4, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100224136680, 4, 5, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100224550808, 4, 6, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235161, 15803300100440618586, 5, 1, 0, 1, 0, 1, 0, 1, true, "1", "1"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 15441160910541481861, 14063371834761578936, 5, 2, 0, 2, 0, 2, 0, 2, true, "17", "17"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100334898942, 5, 3, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147816412516, 1837047046804924097, 5, 4, 0, 5, 0, 5, 0, 5, true, "56460", "56460"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 15441160910541481978, 14063371734014592858, 5, 5, 0, 2, 0, 2, 0, 2, true, "14", "14"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100334911438, 5, 6, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335666817, 6, 1, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335695504, 6, 2, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335691427, 6, 3, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235156, 15803300100423374160, 6, 4, 0, 1, 0, 1, 0, 1, true, "4", "4"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 389609625655502523, 1616926330272763134, 6, 5, 0, 4, 0, 4, 0, 4, true, "4223", "4223"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 15441160910541481788, 14063371729666955983, 6, 6, 0, 2, 0, 2, 0, 2, true, "26", "26"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335410790, 7, 1, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335431255, 7, 2, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335435332, 7, 3, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235160, 15803300100335423029, 7, 4, 0, 1, 0, 1, 0, 1, true, "0", "0"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 17767354399704235161, 15803300100417827613, 7, 5, 0, 1, 0, 1, 0, 1, true, "1", "1"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 389609625549028785, 1615012629921730407, 7, 6, 0, 4, 0, 4, 0, 4, true, "3418", "3418"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 12178341415896426714, 5837506952496953864, 8, 1, 0, 3, 0, 3, 0, 3, true, "100", "100"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147618004574, 1850939892712171199, 8, 2, 0, 5, 0, 5, 0, 5, true, "99.85", "99.85"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 12178341415896426714, 5837506952496995114, 8, 3, 0, 3, 0, 3, 0, 3, true, "100", "100"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147617972634, 1850962284269103299, 8, 4, 0, 5, 0, 5, 0, 5, true, "99.94", "99.94"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147617973201, 1850962300609404744, 8, 5, 0, 5, 0, 5, 0, 5, true, "99.24", "99.24"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147617972639, 1850962292247387325, 8, 6, 0, 5, 0, 5, 0, 5, true, "99.97", "99.97"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147618821186, 1850927305919343478, 9, 1, 0, 5, 0, 5, 0, 5, true, "97.40", "97.40"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147618821120, 1850927276933057753, 9, 2, 0, 5, 0, 5, 0, 5, true, "97.52", "97.52"], ["numval", "ival", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 12178341415896426714, 5837506952496145978, 9, 3, 0, 3, 0, 3, 0, 3, true, "100", "100"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147617972625, 1850962284440201056, 9, 4, 0, 5, 0, 5, 0, 5, true, "99.99", "99.99"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147617972438, 1850969319560412727, 9, 5, 0, 5, 0, 5, 0, 5, true, "99.64", "99.64"], ["numval", "fval", 16041588621504517180, "TABLE", "#/tables/1", 1.0, 329104147617973201, 1850962300608540590, 9, 6, 0, 5, 0, 5, 0, 5, true, "99.24", "99.24"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541481352, 14633884986579423126, 1, 1, 0, 2, 0, 2, 0, 2, true, "98", "98"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541481358, 14633884986289176499, 1, 1, 5, 7, 5, 7, 3, 5, true, "96", "96"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541481353, 14633884986629840445, 1, 2, 0, 2, 0, 2, 0, 2, true, "99", "99"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541481394, 14633884986969604250, 1, 2, 5, 7, 5, 7, 3, 5, true, "83", "83"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541481353, 14633884986621746969, 2, 1, 0, 2, 0, 2, 0, 2, true, "99", "99"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541486270, 14633895233084857259, 2, 1, 5, 7, 5, 7, 3, 5, true, "46", "46"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541481353, 14633884986621702026, 2, 2, 0, 2, 0, 2, 0, 2, true, "99", "99"], ["numval", "ival", 14817357053216629605, "TABLE", "#/tables/2", 1.0, 15441160910541486209, 14633895297101973839, 2, 2, 5, 7, 5, 7, 3, 5, true, "58", "58"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "meta": [{"$ref": "#/footnotes/0"}, {"$ref": "#/footnotes/1"}, {"$ref": "#/footnotes/2"}, {"$ref": "#/footnotes/3"}, {"$ref": "#/footnotes/4"}, {"$ref": "#/footnotes/5"}, {"$ref": "#/figures/0/captions/0"}, {"$ref": "#/footnotes/6"}, {"$ref": "#/footnotes/7"}, {"$ref": "#/footnotes/8"}, {"$ref": "#/footnotes/9"}, {"$ref": "#/footnotes/10"}, {"$ref": "#/figures/2/captions/0"}, {"$ref": "#/figures/1/captions/0"}, {"$ref": "#/footnotes/11"}, {"$ref": "#/footnotes/12"}, {"$ref": "#/footnotes/13"}, {"$ref": "#/figures/3/captions/0"}, {"$ref": "#/figures/4/captions/0"}, {"$ref": "#/footnotes/14"}, {"$ref": "#/footnotes/15"}, {"$ref": "#/footnotes/16"}, {"$ref": "#/footnotes/17"}, {"$ref": "#/footnotes/18"}, {"$ref": "#/footnotes/19"}, {"$ref": "#/figures/6/captions/0"}, {"$ref": "#/footnotes/20"}, {"$ref": "#/figures/7/captions/0"}, {"$ref": "#/footnotes/21"}, {"$ref": "#/footnotes/22"}, {"$ref": "#/footnotes/23"}], "model-application": {"message": "success", "success": true}, "other": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}, {"height": 792.0, "page": 2, "width": 612.0}, {"height": 792.0, "page": 3, "width": 612.0}, {"height": 792.0, "page": 4, "width": 612.0}, {"height": 792.0, "page": 5, "width": 612.0}, {"height": 792.0, "page": 6, "width": 612.0}, {"height": 792.0, "page": 7, "width": 612.0}, {"height": 792.0, "page": 8, "width": 612.0}, {"height": 792.0, "page": 9, "width": 612.0}], "page-elements": [{"bbox": [18.340225219726562, 231.99996948242188, 36.339778900146484, 586.4000244140625], "dref": "#/texts/0", "name": "text", "orig-order": 19, "page": 1, "span": [0, 38], "text-order": 0, "type": "paragraph"}, {"bbox": [61.47460174560547, 672.0942993164062, 552.7999877929688, 708.4287719726562], "dref": "#/texts/1", "name": "title", "orig-order": 0, "page": 1, "span": [0, 84], "text-order": 1, "type": "title"}, {"bbox": [158.54901123046875, 646.95166015625, 454.4521484375, 657.9959716796875], "dref": "#/texts/2", "name": "text", "orig-order": 1, "page": 1, "span": [0, 60], "text-order": 2, "type": "paragraph"}, {"bbox": [179.6484832763672, 635.4270629882812, 433.13836669921875, 644.6961059570312], "dref": "#/texts/3", "name": "text", "orig-order": 2, "page": 1, "span": [0, 30], "text-order": 3, "type": "paragraph"}, {"bbox": [277.5870056152344, 623.4720458984375, 335.40997314453125, 632.3786010742188], "dref": "#/texts/4", "name": "text", "orig-order": 3, "page": 1, "span": [0, 12], "text-order": 4, "type": "paragraph"}, {"bbox": [255.3256378173828, 611.5160522460938, 357.6419982910156, 621.1870727539062], "dref": "#/texts/5", "name": "text", "orig-order": 4, "page": 1, "span": [0, 24], "text-order": 5, "type": "paragraph"}, {"bbox": [53.50812911987305, 592.31494140625, 112.67424011230469, 602.275634765625], "dref": "#/texts/6", "name": "subtitle-level-1", "orig-order": 5, "page": 1, "span": [0, 8], "text-order": 6, "type": "subtitle-level-1"}, {"bbox": [317.7327880859375, 592.2473754882812, 421.26416015625, 602.3604125976562], "dref": "#/texts/7", "name": "subtitle-level-1", "orig-order": 13, "page": 1, "span": [0, 14], "text-order": 7, "type": "subtitle-level-1"}, {"bbox": [53.474998474121094, 326.9052734375, 295.66064453125, 586.9752197265625], "dref": "#/texts/8", "name": "text", "orig-order": 6, "page": 1, "span": [0, 1554], "text-order": 8, "type": "paragraph"}, {"bbox": [53.51100158691406, 294.8792724609375, 138.14549255371094, 302.33953857421875], "dref": "#/texts/9", "name": "subtitle-level-1", "orig-order": 7, "page": 1, "span": [0, 21], "text-order": 9, "type": "subtitle-level-1"}, {"bbox": [53.20000076293945, 235.04745483398438, 295.4400329589844, 292.11370849609375], "dref": "#/texts/10", "name": "text", "orig-order": 8, "page": 1, "span": [0, 366], "text-order": 10, "type": "paragraph"}, {"bbox": [53.79800033569336, 121.27276611328125, 294.28240966796875, 176.01959228515625], "dref": "#/footnotes/0", "name": "footnote", "orig-order": 9, "page": 1, "span": [0, 585], "text-order": 11, "type": "footnote"}, {"bbox": [53.56800079345703, 112.3555908203125, 215.3354034423828, 118.82350158691406], "dref": "#/footnotes/1", "name": "footnote", "orig-order": 10, "page": 1, "span": [0, 53], "text-order": 12, "type": "footnote"}, {"bbox": [53.268001556396484, 94.71673583984375, 286.8135986328125, 110.1262435913086], "dref": "#/footnotes/2", "name": "footnote", "orig-order": 11, "page": 1, "span": [0, 124], "text-order": 13, "type": "footnote"}, {"bbox": [52.780723571777344, 87.53521728515625, 173.61199951171875, 94.18523406982422], "dref": "#/footnotes/3", "name": "footnote", "orig-order": 12, "page": 1, "span": [0, 39], "text-order": 14, "type": "footnote"}, {"bbox": [317.6319885253906, 337.24517822265625, 559.6874389648438, 586.986328125], "dref": "#/texts/11", "name": "text", "orig-order": 14, "page": 1, "span": [0, 1532], "text-order": 15, "type": "paragraph"}, {"bbox": [317.9549865722656, 183.756591796875, 559.7752075195312, 334.59222412109375], "dref": "#/texts/12", "name": "text", "orig-order": 15, "page": 1, "span": [0, 891], "text-order": 16, "type": "paragraph"}, {"bbox": [317.9549865722656, 150.97491455078125, 559.4527587890625, 181.16822814941406], "dref": "#/texts/13", "name": "text", "orig-order": 16, "page": 1, "span": [0, 200], "text-order": 17, "type": "paragraph"}, {"bbox": [317.54400634765625, 100.9158935546875, 559.1497192382812, 123.9642333984375], "dref": "#/footnotes/4", "name": "footnote", "orig-order": 17, "page": 1, "span": [0, 185], "text-order": 18, "type": "footnote"}, {"bbox": [317.54779052734375, 84.349853515625, 559.419189453125, 99.1622314453125], "dref": "#/footnotes/5", "name": "footnote", "orig-order": 18, "page": 1, "span": [0, 130], "text-order": 19, "type": "footnote"}, {"bbox": [57.056358337402344, 581.521484375, 566.21923828125, 705.9985961914062], "dref": "#/figures/0", "name": "picture", "orig-order": 20, "page": 2, "span": [0, 0], "text-order": 20, "type": "figure"}, {"bbox": [53.502044677734375, 488.92645263671875, 560.5620727539062, 562.7618408203125], "dref": "#/figures/0/captions/0", "name": "caption", "orig-order": 21, "page": 2, "span": [0, 820], "text-order": 21, "type": "caption"}, {"bbox": [53.474998474121094, 394.9362487792969, 295.5370178222656, 468.70623779296875], "dref": "#/texts/14", "name": "text", "orig-order": 22, "page": 2, "span": [0, 409], "text-order": 22, "type": "paragraph"}, {"bbox": [53.575965881347656, 370.0102844238281, 173.47894287109375, 380.5594177246094], "dref": "#/texts/15", "name": "subtitle-level-1", "orig-order": 23, "page": 2, "span": [0, 18], "text-order": 23, "type": "subtitle-level-1"}, {"bbox": [53.474998474121094, 203.9712677001953, 295.7048645019531, 365.4122314453125], "dref": "#/texts/16", "name": "text", "orig-order": 24, "page": 2, "span": [0, 955], "text-order": 24, "type": "paragraph"}, {"bbox": [53.79800033569336, 148.84925842285156, 295.53668212890625, 201.0292205810547], "dref": "#/texts/17", "name": "text", "orig-order": 25, "page": 2, "span": [0, 337], "text-order": 25, "type": "paragraph"}, {"bbox": [53.52906036376953, 119.31765747070312, 137.14767456054688, 125.85260009765625], "dref": "#/footnotes/6", "name": "footnote", "orig-order": 26, "page": 2, "span": [0, 32], "text-order": 26, "type": "footnote"}, {"bbox": [53.36406707763672, 110.3837890625, 128.93763732910156, 116.80622863769531], "dref": "#/footnotes/7", "name": "footnote", "orig-order": 27, "page": 2, "span": [0, 31], "text-order": 27, "type": "footnote"}, {"bbox": [53.797996520996094, 101.62908935546875, 125.09330749511719, 108.06022644042969], "dref": "#/footnotes/8", "name": "footnote", "orig-order": 28, "page": 2, "span": [0, 28], "text-order": 28, "type": "footnote"}, {"bbox": [53.797996520996094, 93.07965087890625, 128.44528198242188, 99.42169189453125], "dref": "#/footnotes/9", "name": "footnote", "orig-order": 29, "page": 2, "span": [0, 29], "text-order": 29, "type": "footnote"}, {"bbox": [53.66099548339844, 84.4400634765625, 246.72222900390625, 90.71622467041016], "dref": "#/footnotes/10", "name": "footnote", "orig-order": 30, "page": 2, "span": [0, 68], "text-order": 30, "type": "footnote"}, {"bbox": [317.69781494140625, 416.852783203125, 560.5628051757812, 468.70623779296875], "dref": "#/texts/18", "name": "text", "orig-order": 31, "page": 2, "span": [0, 325], "text-order": 31, "type": "paragraph"}, {"bbox": [317.9549865722656, 392.1359558105469, 440.25689697265625, 402.8952941894531], "dref": "#/texts/19", "name": "subtitle-level-1", "orig-order": 32, "page": 2, "span": [0, 17], "text-order": 32, "type": "subtitle-level-1"}, {"bbox": [317.9549865722656, 357.8152770996094, 559.0849609375, 387.74822998046875], "dref": "#/texts/20", "name": "text", "orig-order": 33, "page": 2, "span": [0, 174], "text-order": 33, "type": "paragraph"}, {"bbox": [317.6319885253906, 248.01971435546875, 559.85888671875, 354.8722229003906], "dref": "#/texts/21", "name": "text", "orig-order": 34, "page": 2, "span": [0, 594], "text-order": 34, "type": "paragraph"}, {"bbox": [317.9549865722656, 83.84225463867188, 559.7321166992188, 245.28321838378906], "dref": "#/texts/22", "name": "text", "orig-order": 35, "page": 2, "span": [0, 983], "text-order": 35, "type": "paragraph"}, {"bbox": [56.01094436645508, 558.7518920898438, 290.9949645996094, 709.7254028320312], "dref": "#/figures/1", "name": "picture", "orig-order": 49, "page": 3, "span": [0, 0], "text-order": 36, "type": "figure"}, {"bbox": [321.3935546875, 558.6827392578125, 554.2520751953125, 709.9332885742188], "dref": "#/figures/2", "name": "picture", "orig-order": 50, "page": 3, "span": [0, 0], "text-order": 37, "type": "figure"}, {"bbox": [53.79798889160156, 472.7510986328125, 295.5321960449219, 546.5648193359375], "dref": "#/figures/2/captions/0", "name": "caption", "orig-order": 36, "page": 3, "span": [0, 389], "text-order": 38, "type": "caption"}, {"bbox": [53.79800033569336, 294.82501220703125, 295.1091003417969, 445.8912353515625], "dref": "#/texts/23", "name": "text", "orig-order": 37, "page": 3, "span": [0, 916], "text-order": 39, "type": "paragraph"}, {"bbox": [53.79800033569336, 272.23712158203125, 137.17259216308594, 282.3864440917969], "dref": "#/texts/24", "name": "subtitle-level-1", "orig-order": 38, "page": 3, "span": [0, 14], "text-order": 40, "type": "subtitle-level-1"}, {"bbox": [53.62799835205078, 214.8350830078125, 295.6110534667969, 266.9012145996094], "dref": "#/texts/25", "name": "text", "orig-order": 39, "page": 3, "span": [0, 280], "text-order": 41, "type": "paragraph"}, {"bbox": [53.50199890136719, 82.777099609375, 295.5345458984375, 212.1072235107422], "dref": "#/texts/26", "name": "text", "orig-order": 40, "page": 3, "span": [0, 799], "text-order": 42, "type": "paragraph"}, {"bbox": [317.9456481933594, 494.0677185058594, 559.70654296875, 546.4738159179688], "dref": "#/figures/1/captions/0", "name": "caption", "orig-order": 41, "page": 3, "span": [0, 272], "text-order": 43, "type": "caption"}, {"bbox": [317.2680969238281, 451.83447265625, 558.4124755859375, 470.8982238769531], "dref": "#/texts/27", "name": "text", "orig-order": 42, "page": 3, "span": [0, 93], "text-order": 44, "type": "paragraph"}, {"bbox": [317.9549865722656, 429.5016174316406, 445.8941345214844, 439.35626220703125], "dref": "#/texts/28", "name": "subtitle-level-1", "orig-order": 43, "page": 3, "span": [0, 24], "text-order": 45, "type": "subtitle-level-1"}, {"bbox": [317.6319885253906, 306.6052551269531, 559.0227661132812, 424.2102355957031], "dref": "#/texts/29", "name": "text", "orig-order": 44, "page": 3, "span": [0, 669], "text-order": 46, "type": "paragraph"}, {"bbox": [317.9549865722656, 152.94097900390625, 559.0300903320312, 303.6622314453125], "dref": "#/texts/30", "name": "text", "orig-order": 45, "page": 3, "span": [0, 900], "text-order": 47, "type": "paragraph"}, {"bbox": [317.542236328125, 119.9617919921875, 560.2256469726562, 150.07322692871094], "dref": "#/texts/31", "name": "text", "orig-order": 46, "page": 3, "span": [0, 199], "text-order": 48, "type": "paragraph"}, {"bbox": [317.8511962890625, 91.70623779296875, 558.1990356445312, 106.6500244140625], "dref": "#/footnotes/11", "name": "footnote", "orig-order": 47, "page": 3, "span": [0, 102], "text-order": 49, "type": "footnote"}, {"bbox": [317.9549865722656, 83.3656005859375, 397.3962707519531, 89.81023406982422], "dref": "#/footnotes/12", "name": "footnote", "orig-order": 48, "page": 3, "span": [0, 34], "text-order": 50, "type": "footnote"}, {"bbox": [53.79800033569336, 608.7432250976562, 295.53790283203125, 704.4302368164062], "dref": "#/texts/32", "name": "text", "orig-order": 51, "page": 4, "span": [0, 542], "text-order": 51, "type": "paragraph"}, {"bbox": [53.79800033569336, 574.2219848632812, 231.56687927246094, 596.9802856445312], "dref": "#/texts/33", "name": "subtitle-level-1", "orig-order": 52, "page": 4, "span": [0, 51], "text-order": 52, "type": "subtitle-level-1"}, {"bbox": [53.79800033569336, 473.106689453125, 295.5303955078125, 568.8822021484375], "dref": "#/texts/34", "name": "text", "orig-order": 53, "page": 4, "span": [0, 557], "text-order": 53, "type": "paragraph"}, {"bbox": [53.250999450683594, 319.60626220703125, 295.61322021484375, 470.2522277832031], "dref": "#/texts/35", "name": "text", "orig-order": 54, "page": 4, "span": [0, 919], "text-order": 54, "type": "paragraph"}, {"bbox": [53.474998474121094, 154.744140625, 296.03668212890625, 316.6632385253906], "dref": "#/texts/36", "name": "text", "orig-order": 55, "page": 4, "span": [0, 1011], "text-order": 55, "type": "paragraph"}, {"bbox": [53.79800033569336, 121.91156005859375, 295.533203125, 152.2802276611328], "dref": "#/texts/37", "name": "text", "orig-order": 56, "page": 4, "span": [0, 195], "text-order": 56, "type": "paragraph"}, {"bbox": [53.387001037597656, 83.17366027832031, 294.92218017578125, 113.5859375], "dref": "#/footnotes/13", "name": "footnote", "orig-order": 57, "page": 4, "span": [0, 290], "text-order": 57, "type": "footnote"}, {"bbox": [326.25421142578125, 539.8611450195312, 548.1567993164062, 703.5318603515625], "dref": "#/figures/3", "name": "picture", "orig-order": 58, "page": 4, "span": [0, 0], "text-order": 58, "type": "figure"}, {"bbox": [317.6319885253906, 415.019287109375, 560.175537109375, 522.0748291015625], "dref": "#/figures/3/captions/0", "name": "caption", "orig-order": 59, "page": 4, "span": [0, 576], "text-order": 59, "type": "caption"}, {"bbox": [317.9549865722656, 304.00390625, 559.1529541015625, 388.9792175292969], "dref": "#/texts/38", "name": "text", "orig-order": 60, "page": 4, "span": [0, 539], "text-order": 60, "type": "paragraph"}, {"bbox": [317.9549865722656, 268.2449951171875, 522.75146484375, 291.0042724609375], "dref": "#/texts/39", "name": "subtitle-level-1", "orig-order": 61, "page": 4, "span": [0, 55], "text-order": 61, "type": "subtitle-level-1"}, {"bbox": [317.9437561035156, 166.98492431640625, 559.7679443359375, 263.0128173828125], "dref": "#/texts/40", "name": "text", "orig-order": 62, "page": 4, "span": [0, 605], "text-order": 62, "type": "paragraph"}, {"bbox": [317.9549865722656, 83.1304931640625, 560.1549682617188, 157.56007385253906], "dref": "#/texts/41", "name": "text", "orig-order": 63, "page": 4, "span": [0, 466], "text-order": 63, "type": "paragraph"}, {"bbox": [55.4039421081543, 459.4396667480469, 294.0187072753906, 709.196533203125], "dref": "#/figures/4", "name": "picture", "orig-order": 79, "page": 5, "span": [0, 0], "text-order": 64, "type": "figure"}, {"bbox": [53.76737594604492, 404.8351745605469, 296.919189453125, 446.1678161621094], "dref": "#/figures/4/captions/0", "name": "caption", "orig-order": 64, "page": 5, "span": [0, 228], "text-order": 65, "type": "caption"}, {"bbox": [53.79800033569336, 353.96826171875, 295.1701354980469, 383.9022216796875], "dref": "#/texts/42", "name": "text", "orig-order": 65, "page": 5, "span": [0, 199], "text-order": 66, "type": "paragraph"}, {"bbox": [53.79800033569336, 332.0502624511719, 294.4319152832031, 351.070068359375], "dref": "#/texts/43", "name": "text", "orig-order": 66, "page": 5, "span": [0, 105], "text-order": 67, "type": "paragraph"}, {"bbox": [117.81383514404297, 304.81182861328125, 294.531494140625, 327.9595642089844], "dref": "#/texts/44", "name": "formula", "orig-order": 67, "page": 5, "span": [0, 73], "text-order": 68, "type": "equation"}, {"bbox": [53.474998474121094, 280.8752746582031, 294.5132751464844, 300.0321044921875], "dref": "#/texts/45", "name": "text", "orig-order": 68, "page": 5, "span": [0, 124], "text-order": 69, "type": "paragraph"}, {"bbox": [53.79800033569336, 154.57626342773438, 295.1409912109375, 272.4860534667969], "dref": "#/texts/46", "name": "text", "orig-order": 69, "page": 5, "span": [0, 715], "text-order": 70, "type": "paragraph"}, {"bbox": [53.79800033569336, 121.69925689697266, 295.1619567871094, 151.6332244873047], "dref": "#/texts/47", "name": "text", "orig-order": 70, "page": 5, "span": [0, 172], "text-order": 71, "type": "paragraph"}, {"bbox": [53.79800033569336, 99.61725616455078, 294.5709533691406, 118.8629150390625], "dref": "#/texts/48", "name": "text", "orig-order": 71, "page": 5, "span": [0, 125], "text-order": 72, "type": "paragraph"}, {"bbox": [53.387001037597656, 83.28266143798828, 294.561279296875, 89.6395263671875], "dref": "#/footnotes/14", "name": "footnote", "orig-order": 72, "page": 5, "span": [0, 93], "text-order": 73, "type": "footnote"}, {"bbox": [316.9908142089844, 622.02099609375, 560.0983276367188, 706.829833984375], "dref": "#/tables/0/captions/0", "name": "text", "orig-order": 73, "page": 5, "span": [0, 461], "text-order": 74, "type": "paragraph"}, {"bbox": [334.4774475097656, 554.5862426757812, 541.1703491210938, 609.4986572265625], "dref": "#/tables/0", "name": "table", "orig-order": 74, "page": 5, "span": [0, 0], "text-order": 75, "type": "table"}, {"bbox": [317.37548828125, 468.0936279296875, 559.939453125, 520.1222534179688], "dref": "#/texts/49", "name": "text", "orig-order": 75, "page": 5, "span": [0, 337], "text-order": 76, "type": "paragraph"}, {"bbox": [317.6319885253906, 303.8862609863281, 561.6922607421875, 465.32720947265625], "dref": "#/texts/50", "name": "text", "orig-order": 76, "page": 5, "span": [0, 955], "text-order": 77, "type": "paragraph"}, {"bbox": [317.6319885253906, 149.8055419921875, 560.1611328125, 300.9432373046875], "dref": "#/texts/51", "name": "text", "orig-order": 77, "page": 5, "span": [0, 913], "text-order": 78, "type": "paragraph"}, {"bbox": [317.6319885253906, 84.708251953125, 559.6876831054688, 147.51922607421875], "dref": "#/texts/52", "name": "text", "orig-order": 78, "page": 5, "span": [0, 398], "text-order": 79, "type": "paragraph"}, {"bbox": [53.50199890136719, 654.8878173828125, 295.74688720703125, 706.829833984375], "dref": "#/texts/53", "name": "text", "orig-order": 80, "page": 6, "span": [0, 310], "text-order": 80, "type": "paragraph"}, {"bbox": [54.41073989868164, 497.82928466796875, 294.0743103027344, 642.206787109375], "dref": "#/tables/1", "name": "table", "orig-order": 81, "page": 6, "span": [0, 0], "text-order": 81, "type": "table"}, {"bbox": [53.474998474121094, 321.0742492675781, 295.6167907714844, 471.55621337890625], "dref": "#/texts/54", "name": "text", "orig-order": 82, "page": 6, "span": [0, 867], "text-order": 82, "type": "paragraph"}, {"bbox": [53.79800033569336, 236.98883056640625, 295.53466796875, 311.1290588378906], "dref": "#/texts/55", "name": "text", "orig-order": 83, "page": 6, "span": [0, 460], "text-order": 83, "type": "paragraph"}, {"bbox": [53.79800033569336, 127.0894775390625, 295.61102294921875, 234.11122131347656], "dref": "#/texts/56", "name": "text", "orig-order": 84, "page": 6, "span": [0, 635], "text-order": 84, "type": "paragraph"}, {"bbox": [53.79800033569336, 83.63025665283203, 295.5378723144531, 124.522216796875], "dref": "#/texts/57", "name": "text", "orig-order": 85, "page": 6, "span": [0, 256], "text-order": 85, "type": "paragraph"}, {"bbox": [317.49591064453125, 643.8729248046875, 560.350341796875, 706.829833984375], "dref": "#/tables/1/captions/0", "name": "text", "orig-order": 86, "page": 6, "span": [0, 356], "text-order": 86, "type": "paragraph"}, {"bbox": [369.7939453125, 587.8507080078125, 506.9258117675781, 631.5213012695312], "dref": "#/tables/2", "name": "table", "orig-order": 87, "page": 6, "span": [0, 0], "text-order": 87, "type": "table"}, {"bbox": [317.9549865722656, 505.94525146484375, 559.6949462890625, 568.7562255859375], "dref": "#/texts/58", "name": "text", "orig-order": 88, "page": 6, "span": [0, 346], "text-order": 88, "type": "paragraph"}, {"bbox": [317.9183349609375, 384.9764404296875, 559.3198852539062, 503.0022277832031], "dref": "#/texts/59", "name": "text", "orig-order": 89, "page": 6, "span": [0, 689], "text-order": 89, "type": "paragraph"}, {"bbox": [317.87957763671875, 351.9661865234375, 559.6873168945312, 382.4542236328125], "dref": "#/texts/60", "name": "text", "orig-order": 90, "page": 6, "span": [0, 198], "text-order": 90, "type": "paragraph"}, {"bbox": [317.9089660644531, 253.72625732421875, 558.7941284179688, 349.57720947265625], "dref": "#/texts/61", "name": "text", "orig-order": 91, "page": 6, "span": [0, 558], "text-order": 91, "type": "paragraph"}, {"bbox": [317.6319885253906, 165.80517578125, 558.5486450195312, 250.78321838378906], "dref": "#/texts/62", "name": "text", "orig-order": 92, "page": 6, "span": [0, 531], "text-order": 92, "type": "paragraph"}, {"bbox": [317.9549865722656, 144.355712890625, 388.1922607421875, 154.5562744140625], "dref": "#/texts/63", "name": "subtitle-level-1", "orig-order": 93, "page": 6, "span": [0, 12], "text-order": 93, "type": "subtitle-level-1"}, {"bbox": [317.9549865722656, 97.82269287109375, 558.6990966796875, 139.1552276611328], "dref": "#/texts/64", "name": "text", "orig-order": 94, "page": 6, "span": [0, 277], "text-order": 94, "type": "paragraph"}, {"bbox": [317.54400634765625, 83.16966247558594, 398.95098876953125, 89.63201904296875], "dref": "#/footnotes/15", "name": "footnote", "orig-order": 95, "page": 6, "span": [0, 35], "text-order": 95, "type": "footnote"}, {"bbox": [53.79800033569336, 687.81005859375, 296.0726318359375, 706.829833984375], "dref": "#/texts/65", "name": "text", "orig-order": 96, "page": 7, "span": [0, 104], "text-order": 96, "type": "paragraph"}, {"bbox": [52.97157669067383, 452.9112548828125, 291.5167236328125, 672.6514282226562], "dref": "#/texts/66", "name": "text", "orig-order": 97, "page": 7, "span": [0, 723], "text-order": 97, "type": "paragraph"}, {"bbox": [53.79800033569336, 388.8714294433594, 295.0650634765625, 430.3962097167969], "dref": "#/texts/67", "name": "text", "orig-order": 98, "page": 7, "span": [0, 226], "text-order": 98, "type": "paragraph"}, {"bbox": [53.474998474121094, 301.7305908203125, 295.7899169921875, 386.56121826171875], "dref": "#/texts/68", "name": "text", "orig-order": 99, "page": 7, "span": [0, 530], "text-order": 99, "type": "paragraph"}, {"bbox": [53.79800033569336, 265.370849609375, 287.7526550292969, 288.25628662109375], "dref": "#/texts/69", "name": "subtitle-level-1", "orig-order": 100, "page": 7, "span": [0, 61], "text-order": 100, "type": "subtitle-level-1"}, {"bbox": [53.474998474121094, 130.9022216796875, 295.88214111328125, 260.0602111816406], "dref": "#/texts/70", "name": "text", "orig-order": 101, "page": 7, "span": [0, 777], "text-order": 101, "type": "paragraph"}, {"bbox": [53.79800033569336, 107.44329833984375, 150.55332946777344, 117.82127380371094], "dref": "#/texts/71", "name": "subtitle-level-1", "orig-order": 102, "page": 7, "span": [0, 19], "text-order": 102, "type": "subtitle-level-1"}, {"bbox": [53.79800033569336, 83.70025634765625, 295.5948791503906, 102.80303955078125], "dref": "#/texts/72", "name": "text", "orig-order": 103, "page": 7, "span": [0, 127], "text-order": 103, "type": "paragraph"}, {"bbox": [319.4678649902344, 591.0667114257812, 563.418212890625, 707.4041137695312], "dref": "#/figures/5", "name": "picture", "orig-order": 104, "page": 7, "span": [0, 0], "text-order": 104, "type": "figure"}, {"bbox": [317.9549865722656, 491.0215148925781, 561.2398681640625, 575.8748168945312], "dref": "#/figures/5/captions/0", "name": "text", "orig-order": 105, "page": 7, "span": [0, 462], "text-order": 105, "type": "paragraph"}, {"bbox": [317.9549865722656, 444.9665222167969, 558.4959106445312, 464.1462097167969], "dref": "#/texts/73", "name": "text", "orig-order": 106, "page": 7, "span": [0, 97], "text-order": 106, "type": "paragraph"}, {"bbox": [327.9239807128906, 375.72027587890625, 560.4287719726562, 438.79620361328125], "dref": "#/texts/74", "name": "list-item", "orig-order": 107, "page": 7, "span": [0, 307], "text-order": 107, "type": "paragraph"}, {"bbox": [326.89788818359375, 244.04925537109375, 561.5510864257812, 372.7772216796875], "dref": "#/texts/75", "name": "list-item", "orig-order": 108, "page": 7, "span": [0, 702], "text-order": 108, "type": "paragraph"}, {"bbox": [327.4911804199219, 133.84515380859375, 560.5987548828125, 241.10621643066406], "dref": "#/texts/76", "name": "list-item", "orig-order": 109, "page": 7, "span": [0, 613], "text-order": 109, "type": "paragraph"}, {"bbox": [317.54400634765625, 110.22366333007812, 398.9919738769531, 117.16583251953125], "dref": "#/footnotes/16", "name": "footnote", "orig-order": 110, "page": 7, "span": [0, 32], "text-order": 110, "type": "footnote"}, {"bbox": [317.54400634765625, 101.18707275390625, 400.1710205078125, 108.2861328125], "dref": "#/footnotes/17", "name": "footnote", "orig-order": 111, "page": 7, "span": [0, 32], "text-order": 111, "type": "footnote"}, {"bbox": [317.54400634765625, 92.2611083984375, 382.0435791015625, 99.16375732421875], "dref": "#/footnotes/18", "name": "footnote", "orig-order": 112, "page": 7, "span": [0, 28], "text-order": 112, "type": "footnote"}, {"bbox": [317.54400634765625, 83.07232666015625, 407.5936279296875, 90.22023010253906], "dref": "#/footnotes/19", "name": "footnote", "orig-order": 113, "page": 7, "span": [0, 36], "text-order": 113, "type": "footnote"}, {"bbox": [58.86375045776367, 545.35546875, 300.35174560546875, 702.7379760742188], "dref": "#/figures/6", "name": "picture", "orig-order": 114, "page": 8, "span": [0, 0], "text-order": 114, "type": "figure"}, {"bbox": [53.79800033569336, 474.97509765625, 297.0106506347656, 526.871826171875], "dref": "#/figures/6/captions/0", "name": "caption", "orig-order": 115, "page": 8, "span": [0, 281], "text-order": 115, "type": "caption"}, {"bbox": [78.20700073242188, 422.1390686035156, 295.529052734375, 452.20721435546875], "dref": "#/texts/77", "name": "text", "orig-order": 116, "page": 8, "span": [0, 125], "text-order": 116, "type": "paragraph"}, {"bbox": [63.44164276123047, 300.48040771484375, 295.35687255859375, 419.33123779296875], "dref": "#/texts/78", "name": "list-item", "orig-order": 117, "page": 8, "span": [0, 633], "text-order": 117, "type": "paragraph"}, {"bbox": [53.79800033569336, 199.9788818359375, 295.3085021972656, 296.0932312011719], "dref": "#/texts/79", "name": "text", "orig-order": 118, "page": 8, "span": [0, 565], "text-order": 118, "type": "paragraph"}, {"bbox": [53.79800033569336, 101.77525329589844, 295.5303649902344, 197.46322631835938], "dref": "#/texts/80", "name": "text", "orig-order": 119, "page": 8, "span": [0, 605], "text-order": 119, "type": "paragraph"}, {"bbox": [53.387001037597656, 83.2796630859375, 137.4241180419922, 89.7896728515625], "dref": "#/footnotes/20", "name": "footnote", "orig-order": 120, "page": 8, "span": [0, 31], "text-order": 120, "type": "footnote"}, {"bbox": [321.94073486328125, 587.7708740234375, 563.5105590820312, 702.5103149414062], "dref": "#/figures/7", "name": "picture", "orig-order": 121, "page": 8, "span": [0, 0], "text-order": 121, "type": "figure"}, {"bbox": [317.9549865722656, 538.82373046875, 558.39794921875, 568.872802734375], "dref": "#/figures/7/captions/0", "name": "caption", "orig-order": 122, "page": 8, "span": [0, 149], "text-order": 122, "type": "caption"}, {"bbox": [317.9549865722656, 490.72216796875, 558.5828857421875, 509.7472229003906], "dref": "#/texts/81", "name": "text", "orig-order": 123, "page": 8, "span": [0, 87], "text-order": 123, "type": "paragraph"}, {"bbox": [317.95361328125, 466.65576171875, 398.97723388671875, 476.83929443359375], "dref": "#/texts/82", "name": "subtitle-level-1", "orig-order": 124, "page": 8, "span": [0, 14], "text-order": 124, "type": "subtitle-level-1"}, {"bbox": [317.9549865722656, 408.89727783203125, 559.3217163085938, 463.36187744140625], "dref": "#/texts/83", "name": "text", "orig-order": 125, "page": 8, "span": [0, 302], "text-order": 125, "type": "paragraph"}, {"bbox": [317.9549865722656, 332.0350341796875, 559.6873779296875, 406.2972106933594], "dref": "#/texts/84", "name": "text", "orig-order": 126, "page": 8, "span": [0, 445], "text-order": 126, "type": "paragraph"}, {"bbox": [317.9181823730469, 277.7332763671875, 559.6890869140625, 329.584228515625], "dref": "#/texts/85", "name": "text", "orig-order": 127, "page": 8, "span": [0, 307], "text-order": 127, "type": "paragraph"}, {"bbox": [317.6409912109375, 200.8416748046875, 559.6902465820312, 274.79022216796875], "dref": "#/texts/86", "name": "text", "orig-order": 128, "page": 8, "span": [0, 438], "text-order": 128, "type": "paragraph"}, {"bbox": [317.9549865722656, 177.281005859375, 438.01214599609375, 187.37762451171875], "dref": "#/texts/87", "name": "subtitle-level-1", "orig-order": 129, "page": 8, "span": [0, 22], "text-order": 129, "type": "subtitle-level-1"}, {"bbox": [317.7309875488281, 119.96771240234375, 558.4498291015625, 171.94122314453125], "dref": "#/texts/88", "name": "text", "orig-order": 130, "page": 8, "span": [0, 320], "text-order": 130, "type": "paragraph"}, {"bbox": [317.54400634765625, 93.00566101074219, 382.23095703125, 99.5784912109375], "dref": "#/footnotes/21", "name": "footnote", "orig-order": 131, "page": 8, "span": [0, 29], "text-order": 131, "type": "footnote"}, {"bbox": [317.54400634765625, 84.25965881347656, 382.0310363769531, 90.77301025390625], "dref": "#/footnotes/22", "name": "footnote", "orig-order": 132, "page": 8, "span": [0, 27], "text-order": 132, "type": "footnote"}, {"bbox": [53.79800033569336, 619.7022094726562, 295.08966064453125, 706.5369262695312], "dref": "#/texts/89", "name": "text", "orig-order": 133, "page": 9, "span": [0, 504], "text-order": 133, "type": "paragraph"}, {"bbox": [53.474998474121094, 421.4378662109375, 295.7029724121094, 616.7592163085938], "dref": "#/texts/90", "name": "text", "orig-order": 134, "page": 9, "span": [0, 1164], "text-order": 134, "type": "paragraph"}, {"bbox": [53.71706771850586, 396.8446350097656, 144.1709442138672, 407.8390808105469], "dref": "#/texts/91", "name": "subtitle-level-1", "orig-order": 135, "page": 9, "span": [0, 12], "text-order": 135, "type": "subtitle-level-1"}, {"bbox": [53.37699890136719, 340.66925048828125, 295.537841796875, 392.5202331542969], "dref": "#/texts/92", "name": "text", "orig-order": 136, "page": 9, "span": [0, 276], "text-order": 136, "type": "paragraph"}, {"bbox": [53.79800033569336, 263.8846435546875, 295.6170654296875, 337.7262268066406], "dref": "#/texts/93", "name": "text", "orig-order": 137, "page": 9, "span": [0, 468], "text-order": 137, "type": "paragraph"}, {"bbox": [53.79800033569336, 131.818603515625, 295.61065673828125, 261.0132141113281], "dref": "#/texts/94", "name": "text", "orig-order": 138, "page": 9, "span": [0, 808], "text-order": 138, "type": "paragraph"}, {"bbox": [53.79800033569336, 84.15838623046875, 295.2939147949219, 106.9937744140625], "dref": "#/footnotes/23", "name": "footnote", "orig-order": 139, "page": 9, "span": [0, 237], "text-order": 139, "type": "footnote"}, {"bbox": [317.6944274902344, 684.9768676757812, 559.6907348632812, 704.4302368164062], "dref": "#/texts/95", "name": "text", "orig-order": 140, "page": 9, "span": [0, 119], "text-order": 140, "type": "paragraph"}, {"bbox": [317.9549865722656, 663.3440551757812, 438.23162841796875, 673.7493286132812], "dref": "#/texts/96", "name": "subtitle-level-1", "orig-order": 141, "page": 9, "span": [0, 15], "text-order": 141, "type": "subtitle-level-1"}, {"bbox": [317.677001953125, 639.6272583007812, 564.3941040039062, 658.7048950195312], "dref": "#/texts/97", "name": "text", "orig-order": 142, "page": 9, "span": [0, 127], "text-order": 142, "type": "paragraph"}, {"bbox": [317.45098876953125, 584.833251953125, 571.9612426757812, 636.8023071289062], "dref": "#/texts/98", "name": "text", "orig-order": 143, "page": 9, "span": [0, 269], "text-order": 143, "type": "paragraph"}, {"bbox": [317.9549865722656, 563.3189697265625, 391.296875, 573.2864990234375], "dref": "#/texts/99", "name": "subtitle-level-1", "orig-order": 144, "page": 9, "span": [0, 10], "text-order": 144, "type": "subtitle-level-1"}, {"bbox": [320.02252197265625, 529.3375854492188, 560.26220703125, 559.6171875], "dref": "#/texts/100", "name": "list-item", "orig-order": 145, "page": 9, "span": [0, 280], "text-order": 145, "type": "paragraph"}, {"bbox": [321.42291259765625, 513.4962158203125, 559.0736694335938, 527.7372436523438], "dref": "#/texts/101", "name": "list-item", "orig-order": 146, "page": 9, "span": [0, 122], "text-order": 146, "type": "paragraph"}, {"bbox": [321.3348388671875, 489.4542236328125, 559.13916015625, 511.7962341308594], "dref": "#/texts/102", "name": "list-item", "orig-order": 147, "page": 9, "span": [0, 164], "text-order": 147, "type": "paragraph"}, {"bbox": [321.1488342285156, 457.43511962890625, 559.233154296875, 487.88623046875], "dref": "#/texts/103", "name": "list-item", "orig-order": 148, "page": 9, "span": [0, 282], "text-order": 148, "type": "paragraph"}, {"bbox": [321.2120056152344, 433.30517578125, 559.0735473632812, 456.0062255859375], "dref": "#/texts/104", "name": "list-item", "orig-order": 149, "page": 9, "span": [0, 224], "text-order": 149, "type": "paragraph"}, {"bbox": [321.4419860839844, 409.4002685546875, 558.4588012695312, 432.0952453613281], "dref": "#/texts/105", "name": "list-item", "orig-order": 150, "page": 9, "span": [0, 233], "text-order": 150, "type": "paragraph"}, {"bbox": [320.9912109375, 378.0406494140625, 559.8010864257812, 408.18524169921875], "dref": "#/texts/106", "name": "list-item", "orig-order": 151, "page": 9, "span": [0, 250], "text-order": 151, "type": "paragraph"}, {"bbox": [321.1594543457031, 346.1596374511719, 560.1024780273438, 376.44775390625], "dref": "#/texts/107", "name": "list-item", "orig-order": 152, "page": 9, "span": [0, 227], "text-order": 152, "type": "paragraph"}, {"bbox": [320.2006530761719, 330.21966552734375, 558.261474609375, 344.5801696777344], "dref": "#/texts/108", "name": "list-item", "orig-order": 153, "page": 9, "span": [0, 116], "text-order": 153, "type": "paragraph"}, {"bbox": [317.95501708984375, 274.36358642578125, 572.77392578125, 328.4842529296875], "dref": "#/texts/109", "name": "list-item", "orig-order": 154, "page": 9, "span": [0, 425], "text-order": 154, "type": "paragraph"}, {"bbox": [317.0665588378906, 250.09552001953125, 560.9763793945312, 272.6932373046875], "dref": "#/texts/110", "name": "list-item", "orig-order": 155, "page": 9, "span": [0, 166], "text-order": 155, "type": "paragraph"}], "page-footers": [], "page-headers": [], "properties": {"data": [["language", 7377574370756688828, "TEXT", "#/texts/0", "en", 0.7799999713897705], ["language", 10227328696767902037, "TEXT", "#/texts/1", "en", 0.699999988079071], ["language", 8770494724746327817, "TEXT", "#/texts/2", "en", 0.25999999046325684], ["language", 18258237174351515285, "TEXT", "#/texts/3", "zh", 0.09000000357627869], ["language", 5704354110496947297, "TEXT", "#/texts/4", "en", 0.5299999713897705], ["language", 11056873211244709904, "TEXT", "#/texts/5", "en", 0.49000000953674316], ["language", 11788868678004267702, "TEXT", "#/texts/6", "en", 0.6499999761581421], ["language", 3624246356859711021, "TEXT", "#/texts/7", "en", 0.550000011920929], ["language", 17999848460847860039, "TEXT", "#/texts/8", "en", 0.9200000166893005], ["language", 14387482728083328702, "TEXT", "#/texts/9", "en", 0.20999999344348907], ["language", 11222145795862225841, "TEXT", "#/texts/10", "en", 0.49000000953674316], ["language", 16923207262044929933, "TEXT", "#/texts/11", "en", 0.9399999976158142], ["language", 3749305213430885773, "TEXT", "#/texts/12", "en", 0.949999988079071], ["language", 3409470577915009676, "TEXT", "#/texts/13", "en", 0.949999988079071], ["language", 17187299362680072378, "TEXT", "#/texts/14", "en", 0.9200000166893005], ["language", 697648145931166262, "TEXT", "#/texts/15", "en", 0.47999998927116394], ["language", 7935233310532930917, "TEXT", "#/texts/16", "en", 0.9200000166893005], ["language", 2762070725424637531, "TEXT", "#/texts/17", "en", 0.9700000286102295], ["language", 7536915191196259776, "TEXT", "#/texts/18", "en", 0.9900000095367432], ["language", 11495493007651807568, "TEXT", "#/texts/19", "en", 0.3100000023841858], ["language", 7650015170039242996, "TEXT", "#/texts/20", "en", 0.9399999976158142], ["language", 14959508657858158650, "TEXT", "#/texts/21", "en", 0.9599999785423279], ["language", 10379300903412882972, "TEXT", "#/texts/22", "en", 0.9399999976158142], ["language", 4994395008195818594, "TEXT", "#/texts/23", "en", 0.9599999785423279], ["language", 4203835122307823579, "TEXT", "#/texts/24", "en", 0.23999999463558197], ["language", 13520362244078084911, "TEXT", "#/texts/25", "en", 0.75], ["language", 1749622367305947670, "TEXT", "#/texts/26", "en", 0.8999999761581421], ["language", 11083736481641202939, "TEXT", "#/texts/27", "en", 0.9200000166893005], ["language", 15403141463083979171, "TEXT", "#/texts/28", "en", 0.6800000071525574], ["language", 12234429517419341922, "TEXT", "#/texts/29", "en", 0.9300000071525574], ["language", 16957857111665886816, "TEXT", "#/texts/30", "en", 0.9399999976158142], ["language", 10390915169360946497, "TEXT", "#/texts/31", "en", 0.8500000238418579], ["language", 15254383206256494278, "TEXT", "#/texts/32", "en", 0.9399999976158142], ["language", 17759618186065566858, "TEXT", "#/texts/33", "en", 0.8299999833106995], ["language", 11638821473906997927, "TEXT", "#/texts/34", "en", 0.9700000286102295], ["language", 13020065077657899116, "TEXT", "#/texts/35", "en", 0.8899999856948853], ["language", 10103841011442966464, "TEXT", "#/texts/36", "en", 0.9399999976158142], ["language", 10982401368140758581, "TEXT", "#/texts/37", "en", 0.9599999785423279], ["language", 887751753527930563, "TEXT", "#/texts/38", "en", 0.949999988079071], ["language", 4695688617288377564, "TEXT", "#/texts/39", "en", 0.800000011920929], ["language", 3275001812318455279, "TEXT", "#/texts/40", "en", 0.9399999976158142], ["language", 15354930767839681193, "TEXT", "#/texts/41", "en", 0.8999999761581421], ["language", 6337233386759158728, "TEXT", "#/texts/42", "en", 0.8999999761581421], ["language", 2249972239307071508, "TEXT", "#/texts/43", "en", 0.8199999928474426], ["language", 12383805870947794174, "TEXT", "#/texts/44", "en", 0.27000001072883606], ["language", 7053654953998543393, "TEXT", "#/texts/45", "en", 0.5799999833106995], ["language", 15921044595687116426, "TEXT", "#/texts/46", "en", 0.949999988079071], ["language", 12234068400463628788, "TEXT", "#/texts/47", "en", 0.9700000286102295], ["language", 4628466594790006384, "TEXT", "#/texts/48", "en", 0.9200000166893005], ["language", 9651706913678711778, "TEXT", "#/texts/49", "en", 0.9200000166893005], ["language", 1363251178266051349, "TEXT", "#/texts/50", "en", 0.9100000262260437], ["language", 18259197018396996238, "TEXT", "#/texts/51", "en", 0.9599999785423279], ["language", 14663676516964431047, "TEXT", "#/texts/52", "en", 0.949999988079071], ["language", 4577067829072175096, "TEXT", "#/texts/53", "en", 0.8600000143051147], ["language", 2569392033451362672, "TEXT", "#/texts/54", "en", 0.9200000166893005], ["language", 14539041145469267811, "TEXT", "#/texts/55", "en", 0.9200000166893005], ["language", 8607014065143641201, "TEXT", "#/texts/56", "en", 0.949999988079071], ["language", 1994904537764312371, "TEXT", "#/texts/57", "en", 0.949999988079071], ["language", 7742256726079628058, "TEXT", "#/texts/58", "en", 0.9200000166893005], ["language", 8810233123818174294, "TEXT", "#/texts/59", "en", 0.9599999785423279], ["language", 16446711449286912460, "TEXT", "#/texts/60", "en", 0.9399999976158142], ["language", 9558434107504657973, "TEXT", "#/texts/61", "en", 0.9100000262260437], ["language", 18349896906192842040, "TEXT", "#/texts/62", "en", 0.9399999976158142], ["language", 10082834006373808153, "TEXT", "#/texts/63", "en", 0.8199999928474426], ["language", 15253541252152665681, "TEXT", "#/texts/64", "en", 0.8899999856948853], ["language", 3904142170608486950, "TEXT", "#/texts/65", "en", 0.7799999713897705], ["language", 6410818076508661508, "TEXT", "#/texts/66", "en", 0.3499999940395355], ["language", 12813875992986832439, "TEXT", "#/texts/67", "en", 0.9800000190734863], ["language", 11030869010407626539, "TEXT", "#/texts/68", "en", 0.949999988079071], ["language", 2142320548375900929, "TEXT", "#/texts/69", "en", 0.33000001311302185], ["language", 12747011194397783283, "TEXT", "#/texts/70", "en", 0.9599999785423279], ["language", 174789262945188010, "TEXT", "#/texts/71", "en", 0.6200000047683716], ["language", 7228893318503650455, "TEXT", "#/texts/72", "en", 0.9399999976158142], ["language", 9230667184712205690, "TEXT", "#/texts/73", "en", 0.9599999785423279], ["language", 17419815751432442882, "TEXT", "#/texts/74", "en", 0.8600000143051147], ["language", 11194226403360998426, "TEXT", "#/texts/75", "en", 0.8899999856948853], ["language", 9005324696118733701, "TEXT", "#/texts/76", "en", 0.8799999952316284], ["language", 8082547756621048511, "TEXT", "#/texts/77", "en", 0.800000011920929], ["language", 7791113385466815951, "TEXT", "#/texts/78", "en", 0.9200000166893005], ["language", 2845012065511066307, "TEXT", "#/texts/79", "en", 0.9599999785423279], ["language", 15072914837937068796, "TEXT", "#/texts/80", "en", 0.949999988079071], ["language", 15263283599394646155, "TEXT", "#/texts/81", "en", 0.9800000190734863], ["language", 11417717357379295278, "TEXT", "#/texts/82", "en", 0.8399999737739563], ["language", 9031137420247852045, "TEXT", "#/texts/83", "en", 0.8500000238418579], ["language", 18436578077535696718, "TEXT", "#/texts/84", "en", 0.9399999976158142], ["language", 11734907767490759865, "TEXT", "#/texts/85", "en", 0.9100000262260437], ["language", 7845460979782401889, "TEXT", "#/texts/86", "en", 0.9399999976158142], ["language", 17769988780693768120, "TEXT", "#/texts/87", "en", 0.38999998569488525], ["language", 12387489643011067991, "TEXT", "#/texts/88", "en", 0.9300000071525574], ["language", 10375772475809458895, "TEXT", "#/texts/89", "en", 0.9900000095367432], ["language", 7054726458191881751, "TEXT", "#/texts/90", "en", 0.9399999976158142], ["language", 7794115281016062068, "TEXT", "#/texts/91", "en", 0.38999998569488525], ["language", 7038163015905900647, "TEXT", "#/texts/92", "en", 0.9200000166893005], ["language", 1508626318915838319, "TEXT", "#/texts/93", "en", 0.949999988079071], ["language", 17247086344435786796, "TEXT", "#/texts/94", "en", 0.9300000071525574], ["language", 10287541089279789496, "TEXT", "#/texts/95", "en", 0.8299999833106995], ["language", 7819882792760965882, "TEXT", "#/texts/96", "en", 0.25], ["language", 15983582675278266440, "TEXT", "#/texts/97", "en", 0.949999988079071], ["language", 12711351442546714716, "TEXT", "#/texts/98", "en", 0.9300000071525574], ["language", 1225384713519841338, "TEXT", "#/texts/99", "en", 0.33000001311302185], ["language", 1712774266196702392, "TEXT", "#/texts/100", "en", 0.6499999761581421], ["language", 14718288547983000340, "TEXT", "#/texts/101", "en", 0.5799999833106995], ["language", 16943780574244090186, "TEXT", "#/texts/102", "en", 0.6700000166893005], ["language", 8004985786049140169, "TEXT", "#/texts/103", "en", 0.3400000035762787], ["language", 12744546813104546377, "TEXT", "#/texts/104", "en", 0.47999998927116394], ["language", 16061746189176848219, "TEXT", "#/texts/105", "en", 0.6299999952316284], ["language", 11872392946390819176, "TEXT", "#/texts/106", "en", 0.38999998569488525], ["language", 2956849475535726296, "TEXT", "#/texts/107", "en", 0.6299999952316284], ["language", 6623297047995432604, "TEXT", "#/texts/108", "en", 0.4399999976158142], ["language", 2507285765516108280, "TEXT", "#/texts/109", "en", 0.5899999737739563], ["language", 14905276480471286920, "TEXT", "#/texts/110", "en", 0.47999998927116394], ["language", 2942883588400666364, "DOCUMENT", "#", "en", 1.0], ["semantic", 7377574370756688828, "TEXT", "#/texts/0", "meta-data", 0.9800000190734863], ["semantic", 10227328696767902037, "TEXT", "#/texts/1", "header", 0.5400000214576721], ["semantic", 8770494724746327817, "TEXT", "#/texts/2", "meta-data", 0.9100000262260437], ["semantic", 18258237174351515285, "TEXT", "#/texts/3", "meta-data", 0.6200000047683716], ["semantic", 5704354110496947297, "TEXT", "#/texts/4", "meta-data", 1.0], ["semantic", 11056873211244709904, "TEXT", "#/texts/5", "meta-data", 1.0], ["semantic", 11788868678004267702, "TEXT", "#/texts/6", "header", 1.0], ["semantic", 3624246356859711021, "TEXT", "#/texts/7", "header", 1.0], ["semantic", 17999848460847860039, "TEXT", "#/texts/8", "text", 1.0], ["semantic", 14387482728083328702, "TEXT", "#/texts/9", "reference", 0.5099999904632568], ["semantic", 11222145795862225841, "TEXT", "#/texts/10", "reference", 0.9599999785423279], ["semantic", 16923207262044929933, "TEXT", "#/texts/11", "text", 0.9900000095367432], ["semantic", 3749305213430885773, "TEXT", "#/texts/12", "text", 1.0], ["semantic", 3409470577915009676, "TEXT", "#/texts/13", "text", 1.0], ["semantic", 17187299362680072378, "TEXT", "#/texts/14", "text", 0.9900000095367432], ["semantic", 697648145931166262, "TEXT", "#/texts/15", "header", 0.8199999928474426], ["semantic", 7935233310532930917, "TEXT", "#/texts/16", "text", 1.0], ["semantic", 2762070725424637531, "TEXT", "#/texts/17", "text", 1.0], ["semantic", 7536915191196259776, "TEXT", "#/texts/18", "text", 1.0], ["semantic", 11495493007651807568, "TEXT", "#/texts/19", "header", 0.949999988079071], ["semantic", 7650015170039242996, "TEXT", "#/texts/20", "text", 1.0], ["semantic", 14959508657858158650, "TEXT", "#/texts/21", "text", 1.0], ["semantic", 10379300903412882972, "TEXT", "#/texts/22", "text", 0.9700000286102295], ["semantic", 4994395008195818594, "TEXT", "#/texts/23", "text", 1.0], ["semantic", 4203835122307823579, "TEXT", "#/texts/24", "header", 1.0], ["semantic", 13520362244078084911, "TEXT", "#/texts/25", "text", 0.949999988079071], ["semantic", 1749622367305947670, "TEXT", "#/texts/26", "text", 0.9900000095367432], ["semantic", 11083736481641202939, "TEXT", "#/texts/27", "text", 1.0], ["semantic", 15403141463083979171, "TEXT", "#/texts/28", "header", 1.0], ["semantic", 12234429517419341922, "TEXT", "#/texts/29", "text", 1.0], ["semantic", 16957857111665886816, "TEXT", "#/texts/30", "text", 0.9900000095367432], ["semantic", 10390915169360946497, "TEXT", "#/texts/31", "text", 0.9800000190734863], ["semantic", 15254383206256494278, "TEXT", "#/texts/32", "text", 0.9800000190734863], ["semantic", 17759618186065566858, "TEXT", "#/texts/33", "header", 0.9300000071525574], ["semantic", 11638821473906997927, "TEXT", "#/texts/34", "text", 1.0], ["semantic", 13020065077657899116, "TEXT", "#/texts/35", "text", 0.9900000095367432], ["semantic", 10103841011442966464, "TEXT", "#/texts/36", "text", 1.0], ["semantic", 10982401368140758581, "TEXT", "#/texts/37", "text", 0.9700000286102295], ["semantic", 887751753527930563, "TEXT", "#/texts/38", "text", 0.9900000095367432], ["semantic", 4695688617288377564, "TEXT", "#/texts/39", "header", 0.9300000071525574], ["semantic", 3275001812318455279, "TEXT", "#/texts/40", "text", 1.0], ["semantic", 15354930767839681193, "TEXT", "#/texts/41", "text", 1.0], ["semantic", 6337233386759158728, "TEXT", "#/texts/42", "text", 1.0], ["semantic", 2249972239307071508, "TEXT", "#/texts/43", "text", 1.0], ["semantic", 12383805870947794174, "TEXT", "#/texts/44", "text", 1.0], ["semantic", 7053654953998543393, "TEXT", "#/texts/45", "text", 0.9900000095367432], ["semantic", 15921044595687116426, "TEXT", "#/texts/46", "text", 0.9700000286102295], ["semantic", 12234068400463628788, "TEXT", "#/texts/47", "text", 1.0], ["semantic", 4628466594790006384, "TEXT", "#/texts/48", "text", 0.949999988079071], ["semantic", 9651706913678711778, "TEXT", "#/texts/49", "text", 1.0], ["semantic", 1363251178266051349, "TEXT", "#/texts/50", "text", 1.0], ["semantic", 18259197018396996238, "TEXT", "#/texts/51", "text", 1.0], ["semantic", 14663676516964431047, "TEXT", "#/texts/52", "text", 1.0], ["semantic", 4577067829072175096, "TEXT", "#/texts/53", "text", 0.9200000166893005], ["semantic", 2569392033451362672, "TEXT", "#/texts/54", "text", 1.0], ["semantic", 14539041145469267811, "TEXT", "#/texts/55", "text", 1.0], ["semantic", 8607014065143641201, "TEXT", "#/texts/56", "text", 1.0], ["semantic", 1994904537764312371, "TEXT", "#/texts/57", "text", 1.0], ["semantic", 7742256726079628058, "TEXT", "#/texts/58", "text", 1.0], ["semantic", 8810233123818174294, "TEXT", "#/texts/59", "text", 1.0], ["semantic", 16446711449286912460, "TEXT", "#/texts/60", "text", 1.0], ["semantic", 9558434107504657973, "TEXT", "#/texts/61", "text", 1.0], ["semantic", 18349896906192842040, "TEXT", "#/texts/62", "text", 1.0], ["semantic", 10082834006373808153, "TEXT", "#/texts/63", "header", 1.0], ["semantic", 15253541252152665681, "TEXT", "#/texts/64", "text", 0.9900000095367432], ["semantic", 3904142170608486950, "TEXT", "#/texts/65", "text", 0.9100000262260437], ["semantic", 6410818076508661508, "TEXT", "#/texts/66", "reference", 0.7200000286102295], ["semantic", 12813875992986832439, "TEXT", "#/texts/67", "text", 1.0], ["semantic", 11030869010407626539, "TEXT", "#/texts/68", "text", 1.0], ["semantic", 2142320548375900929, "TEXT", "#/texts/69", "header", 0.9800000190734863], ["semantic", 12747011194397783283, "TEXT", "#/texts/70", "text", 1.0], ["semantic", 174789262945188010, "TEXT", "#/texts/71", "header", 1.0], ["semantic", 7228893318503650455, "TEXT", "#/texts/72", "text", 1.0], ["semantic", 9230667184712205690, "TEXT", "#/texts/73", "text", 1.0], ["semantic", 17419815751432442882, "TEXT", "#/texts/74", "text", 0.9800000190734863], ["semantic", 11194226403360998426, "TEXT", "#/texts/75", "text", 1.0], ["semantic", 9005324696118733701, "TEXT", "#/texts/76", "text", 1.0], ["semantic", 8082547756621048511, "TEXT", "#/texts/77", "text", 0.9700000286102295], ["semantic", 7791113385466815951, "TEXT", "#/texts/78", "text", 1.0], ["semantic", 2845012065511066307, "TEXT", "#/texts/79", "text", 1.0], ["semantic", 15072914837937068796, "TEXT", "#/texts/80", "text", 1.0], ["semantic", 15263283599394646155, "TEXT", "#/texts/81", "text", 0.9900000095367432], ["semantic", 11417717357379295278, "TEXT", "#/texts/82", "header", 1.0], ["semantic", 9031137420247852045, "TEXT", "#/texts/83", "text", 0.9800000190734863], ["semantic", 18436578077535696718, "TEXT", "#/texts/84", "text", 1.0], ["semantic", 11734907767490759865, "TEXT", "#/texts/85", "text", 1.0], ["semantic", 7845460979782401889, "TEXT", "#/texts/86", "text", 0.9599999785423279], ["semantic", 17769988780693768120, "TEXT", "#/texts/87", "header", 1.0], ["semantic", 12387489643011067991, "TEXT", "#/texts/88", "text", 1.0], ["semantic", 10375772475809458895, "TEXT", "#/texts/89", "text", 1.0], ["semantic", 7054726458191881751, "TEXT", "#/texts/90", "text", 1.0], ["semantic", 7794115281016062068, "TEXT", "#/texts/91", "header", 1.0], ["semantic", 7038163015905900647, "TEXT", "#/texts/92", "text", 0.9900000095367432], ["semantic", 1508626318915838319, "TEXT", "#/texts/93", "text", 1.0], ["semantic", 17247086344435786796, "TEXT", "#/texts/94", "text", 1.0], ["semantic", 10287541089279789496, "TEXT", "#/texts/95", "text", 0.8299999833106995], ["semantic", 7819882792760965882, "TEXT", "#/texts/96", "header", 1.0], ["semantic", 15983582675278266440, "TEXT", "#/texts/97", "text", 0.9399999976158142], ["semantic", 12711351442546714716, "TEXT", "#/texts/98", "text", 0.9900000095367432], ["semantic", 1225384713519841338, "TEXT", "#/texts/99", "reference", 1.0], ["semantic", 1712774266196702392, "TEXT", "#/texts/100", "reference", 1.0], ["semantic", 14718288547983000340, "TEXT", "#/texts/101", "reference", 1.0], ["semantic", 16943780574244090186, "TEXT", "#/texts/102", "reference", 1.0], ["semantic", 8004985786049140169, "TEXT", "#/texts/103", "reference", 0.9900000095367432], ["semantic", 12744546813104546377, "TEXT", "#/texts/104", "reference", 1.0], ["semantic", 16061746189176848219, "TEXT", "#/texts/105", "reference", 1.0], ["semantic", 11872392946390819176, "TEXT", "#/texts/106", "reference", 1.0], ["semantic", 2956849475535726296, "TEXT", "#/texts/107", "reference", 1.0], ["semantic", 6623297047995432604, "TEXT", "#/texts/108", "reference", 1.0], ["semantic", 2507285765516108280, "TEXT", "#/texts/109", "reference", 0.9800000190734863], ["semantic", 14905276480471286920, "TEXT", "#/texts/110", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "tables": [{"#-cols": 5, "#-rows": 4, "captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/54", "hash": 9160199179916979172, "orig": "Table 1: Time-to-solution and performance results for the Faster RCNN and YOLOv2 models. The training of the models was done on 25000 PDF pages. The prediction (per page) and performance numbers (Recall= \u211b and Precision= \ud835\udcab) were obtained on 5000 page images, where the prediction confidence cutoff was tuned to yield the maximum F1 score for each. All time-to-solution measurements for training were obtained on a POWER8 node with a single Pascal P100 GPU.", "prov": [{"$ref": "#/page-elements/74"}], "text": "Table 1: Time-to-solution and performance results for the Faster RCNN and YOLOv2 models. The training of the models was done on 25000 PDF pages. The prediction (per page) and performance numbers (Recall= \u211b and Precision= \ud835\udcab) were obtained on 5000 page images, where the prediction confidence cutoff was tuned to yield the maximum F1 score for each. All time-to-solution measurements for training were obtained on a POWER8 node with a single Pascal P100 GPU.", "text-hash": 17279509228359814482, "type": "paragraph"}], "data": [[{"col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 0]], "text": ""}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 1]], "text": "Time to solution"}, {"col": 2, "col-header": false, "col-span": [1, 3], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 1], [0, 2]], "text": "Time to solution"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 3]], "text": "Performance"}, {"col": 4, "col-header": false, "col-span": [3, 5], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 3], [0, 4]], "text": "Performance"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 0]], "text": ""}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 1]], "text": "Training"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 2]], "text": "Prediction"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 3]], "text": "\ud835\udcab"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 4]], "text": "\u211b"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 0]], "text": "Faster-RCNN"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 1]], "text": "72 hours"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 2]], "text": "4 sec"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 3]], "text": "0.97"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 4]], "text": "0.98"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 0]], "text": "YOLOv2"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 1]], "text": "9 hours"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 2]], "text": "0.1 sec"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 3]], "text": "0 . 99"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 4]], "text": "0 . 98"}]], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/tables/0", "footnotes": [], "hash": 16709517892596982787, "mentions": [], "prov": [{"$ref": "#/page-elements/75"}], "type": "table"}, {"#-cols": 8, "#-rows": 10, "captions": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/64", "hash": 18354136439820865774, "orig": "Table 3: Comparison for two different journal templates showing the aggregated precision and recall averaged over all labels. Each model has been independently trained on a dataset of 400 pages each. The results show that the ML algorithm proves to perform very well for the multiple document templates, simply by providing a different dataset to train on.", "prov": [{"$ref": "#/page-elements/86"}], "text": "Table 3: Comparison for two different journal templates showing the aggregated precision and recall averaged over all labels. Each model has been independently trained on a dataset of 400 pages each. The results show that the ML algorithm proves to perform very well for the multiple document templates, simply by providing a different dataset to train on.", "text-hash": 8085176655901164108, "type": "paragraph"}], "data": [[{"col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 0]], "text": ""}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 1]], "text": ""}, {"col": 2, "col-header": false, "col-span": [2, 7], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2], [0, 3], [0, 4], [0, 5], [0, 6]], "text": "predicted label"}, {"col": 3, "col-header": false, "col-span": [2, 7], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2], [0, 3], [0, 4], [0, 5], [0, 6]], "text": "predicted label"}, {"col": 4, "col-header": false, "col-span": [2, 7], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2], [0, 3], [0, 4], [0, 5], [0, 6]], "text": "predicted label"}, {"col": 5, "col-header": false, "col-span": [2, 7], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2], [0, 3], [0, 4], [0, 5], [0, 6]], "text": "predicted label"}, {"col": 6, "col-header": false, "col-span": [2, 7], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2], [0, 3], [0, 4], [0, 5], [0, 6]], "text": "predicted label"}, {"col": 7, "col-header": false, "col-span": [2, 8], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2], [0, 3], [0, 4], [0, 5], [0, 6], [0, 7]], "text": "predicted label"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 0]], "text": ""}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 1]], "text": "T itle"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 2]], "text": ""}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 3]], "text": "Author"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 4]], "text": "Subtitle"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 5]], "text": "Te xt"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 6]], "text": "Picture"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 7]], "text": "T able"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 1]], "text": "Title"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 2]], "text": "75"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 3]], "text": "0"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 4]], "text": "0"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 5]], "text": "0"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 6]], "text": "0"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 7]], "text": "0"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 3, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label Author"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 1]], "text": "1"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 2]], "text": "670"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 3]], "text": "0"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 4]], "text": "0"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 5]], "text": "0"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 6]], "text": "0"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 3, "row-header": false, "row-span": [3, 4], "spans": [[3, 7]], "text": ""}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 4, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label Subtitle"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 1]], "text": "0"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 2]], "text": "0"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 3]], "text": "325"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 4]], "text": "0"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 5]], "text": "0"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 6]], "text": "0"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 4, "row-header": false, "row-span": [4, 5], "spans": [[4, 7]], "text": ""}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 5, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label Text"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 1]], "text": "1"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 2]], "text": "17"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 3]], "text": "0"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 4]], "text": "56460"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 5]], "text": "14"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 6]], "text": "0"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 5, "row-header": false, "row-span": [5, 6], "spans": [[5, 7]], "text": ""}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 6, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label Picture"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 1]], "text": "0"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 2]], "text": "0"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 3]], "text": "0"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 4]], "text": "4"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 5]], "text": "4223"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 6]], "text": "26"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 6, "row-header": false, "row-span": [6, 7], "spans": [[6, 7]], "text": ""}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 7, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label Table"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 1]], "text": "0"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 2]], "text": "0"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 3]], "text": "0"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 4]], "text": "0"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 5]], "text": "1"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 6]], "text": "3418"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 7, "row-header": false, "row-span": [7, 8], "spans": [[7, 7]], "text": ""}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 8, "row-header": false, "row-span": [2, 9], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0]], "text": "true label Recall"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 1]], "text": "100"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 2]], "text": "99.85"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 3]], "text": "100"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 4]], "text": "99.94"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 5]], "text": "99.24"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 6]], "text": "99.97"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 8, "row-header": false, "row-span": [8, 9], "spans": [[8, 7]], "text": ""}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 9, "row-header": false, "row-span": [2, 10], "spans": [[2, 0], [3, 0], [4, 0], [5, 0], [6, 0], [7, 0], [8, 0], [9, 0]], "text": "true label Precision"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 1]], "text": "97.40"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 2]], "text": "97.52"}, {"col": 3, "col-header": false, "col-span": [3, 4], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 3]], "text": "100"}, {"col": 4, "col-header": false, "col-span": [4, 5], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 4]], "text": "99.99"}, {"col": 5, "col-header": false, "col-span": [5, 6], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 5]], "text": "99.64"}, {"col": 6, "col-header": false, "col-span": [6, 7], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 6]], "text": "99.24"}, {"col": 7, "col-header": false, "col-span": [7, 8], "row": 9, "row-header": false, "row-span": [9, 10], "spans": [[9, 7]], "text": ""}]], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/tables/1", "footnotes": [], "hash": 16041588621504517180, "mentions": [], "prov": [{"$ref": "#/page-elements/81"}], "type": "table"}, {"#-cols": 3, "#-rows": 3, "captions": [], "data": [[{"col": 0, "col-header": false, "col-span": [0, 1], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 0]], "text": "Journal template"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 1]], "text": "\ud835\udcab"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 0, "row-header": false, "row-span": [0, 1], "spans": [[0, 2]], "text": "\u211b"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 0]], "text": "Physical Review B"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 1]], "text": "98 . 96"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 1, "row-header": false, "row-span": [1, 2], "spans": [[1, 2]], "text": "99 . 83"}], [{"col": 0, "col-header": false, "col-span": [0, 1], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 0]], "text": "Elsevier"}, {"col": 1, "col-header": false, "col-span": [1, 2], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 1]], "text": "99 . 46"}, {"col": 2, "col-header": false, "col-span": [2, 3], "row": 2, "row-header": false, "row-span": [2, 3], "spans": [[2, 2]], "text": "99 . 58"}]], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/tables/2", "footnotes": [], "hash": 14817357053216629605, "mentions": [], "prov": [{"$ref": "#/page-elements/87"}], "type": "table"}], "texts": [{"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/0", "hash": 7377574370756688828, "orig": "arXiv:1806.02284v1 [cs.DL] 24 May 2018", "properties": {"data": [["language", 7377574370756688828, "TEXT", "#/texts/0", "en", 0.7799999713897705], ["semantic", 7377574370756688828, "TEXT", "#/texts/0", "meta-data", 0.9800000190734863]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/0"}], "text": "arXiv:1806.02284v1 [cs.DL] 24 May 2018", "text-hash": 605943372629925146, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/1", "hash": 10227328696767902037, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["language", 10227328696767902037, "TEXT", "#/texts/1", "en", 0.699999988079071], ["semantic", 10227328696767902037, "TEXT", "#/texts/1", "header", 0.5400000214576721]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/1"}], "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "title"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/2", "hash": 8770494724746327817, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["language", 8770494724746327817, "TEXT", "#/texts/2", "en", 0.25999999046325684], ["semantic", 8770494724746327817, "TEXT", "#/texts/2", "meta-data", 0.9100000262260437]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/2"}], "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/3", "hash": 18258237174351515285, "orig": "taa,dol,cau,bek@zurich.ibm.com", "properties": {"data": [["language", 18258237174351515285, "TEXT", "#/texts/3", "zh", 0.09000000357627869], ["semantic", 18258237174351515285, "TEXT", "#/texts/3", "meta-data", 0.6200000047683716]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/3"}], "text": "taa,dol,cau,bek@zurich.ibm.com", "text-hash": 7883794643982446593, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/4", "hash": 5704354110496947297, "orig": "IBM Research", "properties": {"data": [["language", 5704354110496947297, "TEXT", "#/texts/4", "en", 0.5299999713897705], ["semantic", 5704354110496947297, "TEXT", "#/texts/4", "meta-data", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/4"}], "text": "IBM Research", "text-hash": 16114797969310195405, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/5", "hash": 11056873211244709904, "orig": "Rueschlikon, Switzerland", "properties": {"data": [["language", 11056873211244709904, "TEXT", "#/texts/5", "en", 0.49000000953674316], ["semantic", 11056873211244709904, "TEXT", "#/texts/5", "meta-data", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/5"}], "text": "Rueschlikon, Switzerland", "text-hash": 10483037511456664190, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/6", "hash": 11788868678004267702, "orig": "ABSTRACT", "properties": {"data": [["language", 11788868678004267702, "TEXT", "#/texts/6", "en", 0.6499999761581421], ["semantic", 11788868678004267702, "TEXT", "#/texts/6", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/6"}], "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/7", "hash": 3624246356859711021, "orig": "1 INTRODUCTION", "properties": {"data": [["language", 3624246356859711021, "TEXT", "#/texts/7", "en", 0.550000011920929], ["semantic", 3624246356859711021, "TEXT", "#/texts/7", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/7"}], "text": "1 INTRODUCTION", "text-hash": 4359834464932974729, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/8", "hash": 17999848460847860039, "orig": "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size. Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable. Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging. In this paper, we present a modular, cloud-based platform to ingest documents at scale. This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format. We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["language", 17999848460847860039, "TEXT", "#/texts/8", "en", 0.9200000166893005], ["semantic", 17999848460847860039, "TEXT", "#/texts/8", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/8"}], "text": "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size. Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable. Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging. In this paper, we present a modular, cloud-based platform to ingest documents at scale. This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format. We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 8142196169563728819, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/9", "hash": 14387482728083328702, "orig": "ACM Reference Format:", "properties": {"data": [["language", 14387482728083328702, "TEXT", "#/texts/9", "en", 0.20999999344348907], ["semantic", 14387482728083328702, "TEXT", "#/texts/9", "reference", 0.5099999904632568]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/9"}], "text": "ACM Reference Format:", "text-hash": 7430992009485070364, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/10", "hash": 11222145795862225841, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas. 2018. Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.. In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom. ACM, New York, NY, USA, 9 pages. https://doi.org/10. 1145/3219819.3219834", "properties": {"data": [["language", 11222145795862225841, "TEXT", "#/texts/10", "en", 0.49000000953674316], ["semantic", 11222145795862225841, "TEXT", "#/texts/10", "reference", 0.9599999785423279]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/10"}], "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas. 2018. Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.. In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom. ACM, New York, NY, USA, 9 pages. https://doi.org/10. 1145/3219819.3219834", "text-hash": 10605881125688857885, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/11", "hash": 16923207262044929933, "orig": "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$. These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery. It is needless to say that valuable qualitative and quantitative information is contained in many of them. However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout. Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery. In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$. This poses a real problem, since more and more information published in the PDF documents is going dark. In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components. First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML. Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context.", "properties": {"data": [["language", 16923207262044929933, "TEXT", "#/texts/11", "en", 0.9399999976158142], ["semantic", 16923207262044929933, "TEXT", "#/texts/11", "text", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/15"}], "text": "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$. These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery. It is needless to say that valuable qualitative and quantitative information is contained in many of them. However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout. Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery. In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$. This poses a real problem, since more and more information published in the PDF documents is going dark. In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components. First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML. Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context.", "text-hash": 9516638039579926761, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/12", "hash": 3749305213430885773, "orig": "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files. The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms. This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components. Each of these microservices can be consumed by its own REST API. This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform. In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures.", "properties": {"data": [["language", 3749305213430885773, "TEXT", "#/texts/12", "en", 0.949999988079071], ["semantic", 3749305213430885773, "TEXT", "#/texts/12", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/16"}], "text": "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files. The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms. This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components. Each of these microservices can be consumed by its own REST API. This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform. In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures.", "text-hash": 3945867624210419433, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/13", "hash": 3409470577915009676, "orig": "To obtain a thorough understanding of what our platform can do and how well it performs, we have structured this paper as follows: In Section 2, we briefly review the current state-of-the-art document", "properties": {"data": [["language", 3409470577915009676, "TEXT", "#/texts/13", "en", 0.949999988079071], ["semantic", 3409470577915009676, "TEXT", "#/texts/13", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/17"}], "text": "To obtain a thorough understanding of what our platform can do and how well it performs, we have structured this paper as follows: In Section 2, we briefly review the current state-of-the-art document", "text-hash": 4583103017707584490, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/15", "hash": 17187299362680072378, "orig": "processing solutions. In Section 3, we present the design of the platform and its components. In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively. Finally, in Section 5, we discuss the open questions w.r.t. research and possible next steps in the development of the platform.", "properties": {"data": [["language", 17187299362680072378, "TEXT", "#/texts/14", "en", 0.9200000166893005], ["semantic", 17187299362680072378, "TEXT", "#/texts/14", "text", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/22"}], "text": "processing solutions. In Section 3, we present the design of the platform and its components. In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively. Finally, in Section 5, we discuss the open questions w.r.t. research and possible next steps in the development of the platform.", "text-hash": 9243393324994873880, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/16", "hash": 697648145931166262, "orig": "2 STATE OF THE ART", "properties": {"data": [["language", 697648145931166262, "TEXT", "#/texts/15", "en", 0.47999998927116394], ["semantic", 697648145931166262, "TEXT", "#/texts/15", "header", 0.8199999928474426]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/23"}], "text": "2 STATE OF THE ART", "text-hash": 2385816824895853732, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/17", "hash": 7935233310532930917, "orig": "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4]. Broadly speaking, there are two types of approaches to this problem. In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document. This can be done through a conversion from PDF towards HTML or MS Word for example. The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format. For example, this could be a JSON/XML file with a particular schema. Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution.", "properties": {"data": [["language", 7935233310532930917, "TEXT", "#/texts/16", "en", 0.9200000166893005], ["semantic", 7935233310532930917, "TEXT", "#/texts/16", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/24"}], "text": "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4]. Broadly speaking, there are two types of approaches to this problem. In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document. This can be done through a conversion from PDF towards HTML or MS Word for example. The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format. For example, this could be a JSON/XML file with a particular schema. Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution.", "text-hash": 57757550267838417, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/18", "hash": 2762070725424637531, "orig": "Many solutions have already been developed that tackle the problem of document conversion. There are well known open-source programs such as Xpdf 3 and Tabula$^{4}$. There are also proprietary solutions, such as Abby$^{5}$, Nuance 6 or DataCap$^{7}$. In contrast to the open-source solutions, all three proprietary solutions support also", "properties": {"data": [["language", 2762070725424637531, "TEXT", "#/texts/17", "en", 0.9700000286102295], ["semantic", 2762070725424637531, "TEXT", "#/texts/17", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/25"}], "text": "Many solutions have already been developed that tackle the problem of document conversion. There are well known open-source programs such as Xpdf 3 and Tabula$^{4}$. There are also proprietary solutions, such as Abby$^{5}$, Nuance 6 or DataCap$^{7}$. In contrast to the open-source solutions, all three proprietary solutions support also", "text-hash": 5230489225511983287, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/19", "hash": 7536915191196259776, "orig": "extraction from scanned documents. Besides the well known opensource and proprietary solutions, there are also countless academic solutions as well as libraries. For example, the challenge of segmenting complex page layouts is actively addressed by recurring competitions posed by ICDAR, as in Ref. [1] and previous editions.", "properties": {"data": [["language", 7536915191196259776, "TEXT", "#/texts/18", "en", 0.9900000095367432], ["semantic", 7536915191196259776, "TEXT", "#/texts/18", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/31"}], "text": "extraction from scanned documents. Besides the well known opensource and proprietary solutions, there are also countless academic solutions as well as libraries. For example, the challenge of segmenting complex page layouts is actively addressed by recurring competitions posed by ICDAR, as in Ref. [1] and previous editions.", "text-hash": 167221319977518894, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/20", "hash": 11495493007651807568, "orig": "3 PLATFORM DESIGN", "properties": {"data": [["language", 11495493007651807568, "TEXT", "#/texts/19", "en", 0.3100000023841858], ["semantic", 11495493007651807568, "TEXT", "#/texts/19", "header", 0.949999988079071]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/32"}], "text": "3 PLATFORM DESIGN", "text-hash": 10322960049580053438, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/21", "hash": 7650015170039242996, "orig": "Given the plethora of existing solutions, we would like to point out how our solution differs from these, and thus approaches the problem of document conversion in a new way.", "properties": {"data": [["language", 7650015170039242996, "TEXT", "#/texts/20", "en", 0.9399999976158142], ["semantic", 7650015170039242996, "TEXT", "#/texts/20", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/33"}], "text": "Given the plethora of existing solutions, we would like to point out how our solution differs from these, and thus approaches the problem of document conversion in a new way.", "text-hash": 333520156392116834, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/22", "hash": 14959508657858158650, "orig": "The key idea is that we do not write any rule-based conversion algorithms, but rather utilize generic machine learning algorithms which produce models that can be easily and quickly trained on ground-truth acquired via human annotation. This flexible mechanism allows us to adapt very quickly to certain templates of documents, achieve very accurate results and ultimately eliminates the time-consuming and costly tuning of traditional rule-based conversion algorithms. This approach is in stark contrast to the previously mentioned state of the art conversion systems, which are all rulebased.", "properties": {"data": [["language", 14959508657858158650, "TEXT", "#/texts/21", "en", 0.9599999785423279], ["semantic", 14959508657858158650, "TEXT", "#/texts/21", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/34"}], "text": "The key idea is that we do not write any rule-based conversion algorithms, but rather utilize generic machine learning algorithms which produce models that can be easily and quickly trained on ground-truth acquired via human annotation. This flexible mechanism allows us to adapt very quickly to certain templates of documents, achieve very accurate results and ultimately eliminates the time-consuming and costly tuning of traditional rule-based conversion algorithms. This approach is in stark contrast to the previously mentioned state of the art conversion systems, which are all rulebased.", "text-hash": 6868109665737773720, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/23", "hash": 10379300903412882972, "orig": "While the approach of swapping rule based solutions with machine learning solutions might appear very natural in the current era of artificial intelligence, it has some serious consequences with regard to its design. First of all, one can not think anymore at the level of a single document. Rather, one should think at the level of a collection of documents (or a corpus of documents). A machine learned model for a single document is not very useful, but a machine learned model for a certain type of documents (e.g. scientific articles, patents, regulations, contracts, etc.) obviously is. This is the first big distinction between the current existing solutions and ours: Existing solutions take one document at a time (no matter its origin) and convert it to a desired output format. Our solution can ingest an entire collection of documents and build machine learned models on top of that. Of course, once the the model is trained, one can convert documents one at a time, too.", "properties": {"data": [["language", 10379300903412882972, "TEXT", "#/texts/22", "en", 0.9399999976158142], ["semantic", 10379300903412882972, "TEXT", "#/texts/22", "text", 0.9700000286102295]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/35"}], "text": "While the approach of swapping rule based solutions with machine learning solutions might appear very natural in the current era of artificial intelligence, it has some serious consequences with regard to its design. First of all, one can not think anymore at the level of a single document. Rather, one should think at the level of a collection of documents (or a corpus of documents). A machine learned model for a single document is not very useful, but a machine learned model for a certain type of documents (e.g. scientific articles, patents, regulations, contracts, etc.) obviously is. This is the first big distinction between the current existing solutions and ours: Existing solutions take one document at a time (no matter its origin) and convert it to a desired output format. Our solution can ingest an entire collection of documents and build machine learned models on top of that. Of course, once the the model is trained, one can convert documents one at a time, too.", "text-hash": 11150916691880738938, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/25", "hash": 4994395008195818594, "orig": "A second discriminator between the existing solutions and ours is that we need to provide the tools to gather ground-truth, since no model can be trained without it. Hence, not only do we need the ability to manage collections of documents, we also need the ability for people to annotate documents and store these annotations in an efficient way. These annotations are then used as ground-truth data to train models. It is clear then that ML models add an extra level of complexity: One has to provide the ability to store a collection of documents, annotate these documents, store the annotations, train a model and ultimately apply this model on unseen documents. For the authors of this paper, it was therefore evident that our solution cannot be a monolithic application. It fits much better the concept of a cloud-based platform that can execute the previously mentioned tasks in an efficient and scalable way.", "properties": {"data": [["language", 4994395008195818594, "TEXT", "#/texts/23", "en", 0.9599999785423279], ["semantic", 4994395008195818594, "TEXT", "#/texts/23", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/39"}], "text": "A second discriminator between the existing solutions and ours is that we need to provide the tools to gather ground-truth, since no model can be trained without it. Hence, not only do we need the ability to manage collections of documents, we also need the ability for people to annotate documents and store these annotations in an efficient way. These annotations are then used as ground-truth data to train models. It is clear then that ML models add an extra level of complexity: One has to provide the ability to store a collection of documents, annotate these documents, store the annotations, train a model and ultimately apply this model on unseen documents. For the authors of this paper, it was therefore evident that our solution cannot be a monolithic application. It fits much better the concept of a cloud-based platform that can execute the previously mentioned tasks in an efficient and scalable way.", "text-hash": 16536368219630364368, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/26", "hash": 4203835122307823579, "orig": "3.1 Components", "properties": {"data": [["language", 4203835122307823579, "TEXT", "#/texts/24", "en", 0.23999999463558197], ["semantic", 4203835122307823579, "TEXT", "#/texts/24", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/40"}], "text": "3.1 Components", "text-hash": 3789103236857293111, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/27", "hash": 13520362244078084911, "orig": "Our platform implements a processing pipeline to ingest, manage, parse, annotate, train and eventually convert the data contained in any type of format (scanned or programmatically created PDF, bitmap images, Word documents, etc.) into a structured data format (e.g. JSON or XML).", "properties": {"data": [["language", 13520362244078084911, "TEXT", "#/texts/25", "en", 0.75], ["semantic", 13520362244078084911, "TEXT", "#/texts/25", "text", 0.949999988079071]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/41"}], "text": "Our platform implements a processing pipeline to ingest, manage, parse, annotate, train and eventually convert the data contained in any type of format (scanned or programmatically created PDF, bitmap images, Word documents, etc.) into a structured data format (e.g. JSON or XML).", "text-hash": 12910497814715733387, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/28", "hash": 1749622367305947670, "orig": "This processing pipeline is formed by five components as depicted in Figure 1: (1) parsing of documents into an internal format optimised for ML, (2) Annotation of the label ground-truth in parsed documents (3) training ML models from the acquired annotations, (4) applying the custom ML model(s), (5) assembling the document(s) into a structured data format. If a trained model is available, only components 1, 4 and 5 are needed to convert the documents. If no template-specific machine learned model is available yet, we provide two additional components 2 and 3, that allow users to gather ground-truth and train custom models. It is important to note that the platform comes with default models, so annotation and training are advised to retrieve the best quality output, yet they are optional.", "properties": {"data": [["language", 1749622367305947670, "TEXT", "#/texts/26", "en", 0.8999999761581421], ["semantic", 1749622367305947670, "TEXT", "#/texts/26", "text", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/42"}], "text": "This processing pipeline is formed by five components as depicted in Figure 1: (1) parsing of documents into an internal format optimised for ML, (2) Annotation of the label ground-truth in parsed documents (3) training ML models from the acquired annotations, (4) applying the custom ML model(s), (5) assembling the document(s) into a structured data format. If a trained model is available, only components 1, 4 and 5 are needed to convert the documents. If no template-specific machine learned model is available yet, we provide two additional components 2 and 3, that allow users to gather ground-truth and train custom models. It is important to note that the platform comes with default models, so annotation and training are advised to retrieve the best quality output, yet they are optional.", "text-hash": 1334541935326461060, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/30", "hash": 11083736481641202939, "orig": "Let us now elaborate on what each of the five components deliver in the rest of this section.", "properties": {"data": [["language", 11083736481641202939, "TEXT", "#/texts/27", "en", 0.9200000166893005], ["semantic", 11083736481641202939, "TEXT", "#/texts/27", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/44"}], "text": "Let us now elaborate on what each of the five components deliver in the rest of this section.", "text-hash": 10456209429844276823, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/31", "hash": 15403141463083979171, "orig": "3.2 Parsing of Documents", "properties": {"data": [["language", 15403141463083979171, "TEXT", "#/texts/28", "en", 0.6800000071525574], ["semantic", 15403141463083979171, "TEXT", "#/texts/28", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/45"}], "text": "3.2 Parsing of Documents", "text-hash": 6127225399482532623, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/32", "hash": 12234429517419341922, "orig": "In the parsing component of the pipeline, we solve the following straightforward but non-trivial task: Find the bounding boxes of all text-snippets that appear on each PDF page. For simplicity, we will refer to the bounding boxes of the text-snippets as cells in the remainder of the paper. There are two reasons why we are interested in these cells. First, they provide us with the crucial geometric features which are later used in the machine learning models to determine the layout semantic label. Second, the concept of a cell can be easily transferred to scanned documents. In Figure 2, we show the cells obtained from an example PDF page after the parsing stage.", "properties": {"data": [["language", 12234429517419341922, "TEXT", "#/texts/29", "en", 0.9300000071525574], ["semantic", 12234429517419341922, "TEXT", "#/texts/29", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/46"}], "text": "In the parsing component of the pipeline, we solve the following straightforward but non-trivial task: Find the bounding boxes of all text-snippets that appear on each PDF page. For simplicity, we will refer to the bounding boxes of the text-snippets as cells in the remainder of the paper. There are two reasons why we are interested in these cells. First, they provide us with the crucial geometric features which are later used in the machine learning models to determine the layout semantic label. Second, the concept of a cell can be easily transferred to scanned documents. In Figure 2, we show the cells obtained from an example PDF page after the parsing stage.", "text-hash": 13908173772261346000, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/33", "hash": 16957857111665886816, "orig": "While the task of finding the cells might appear intuitive from a conceptual point of view, it is not in practice, since there does not exist a unique, precise definition of the cells. This lack of a precise definition has its origins not only in the ISO-standard 8 detailing the PDF document code but also in the variability of the quality of PDFs. Older PDFs which were created from scanned images using OCR typically return cells for each word, while more recent PDFs allow us to create cells for full text-lines. This variability in the geometric features of the cell (e.g. the width of the cell) can negatively impact the performance of later machine learning models. As a consequence, we reduce the variability of the geometric features as much as possible. The more consistent and homogeneous the geometric features of a cell are, the better the machine learning algorithms can do predictions.", "properties": {"data": [["language", 16957857111665886816, "TEXT", "#/texts/30", "en", 0.9399999976158142], ["semantic", 16957857111665886816, "TEXT", "#/texts/30", "text", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/47"}], "text": "While the task of finding the cells might appear intuitive from a conceptual point of view, it is not in practice, since there does not exist a unique, precise definition of the cells. This lack of a precise definition has its origins not only in the ISO-standard 8 detailing the PDF document code but also in the variability of the quality of PDFs. Older PDFs which were created from scanned images using OCR typically return cells for each word, while more recent PDFs allow us to create cells for full text-lines. This variability in the geometric features of the cell (e.g. the width of the cell) can negatively impact the performance of later machine learning models. As a consequence, we reduce the variability of the geometric features as much as possible. The more consistent and homogeneous the geometric features of a cell are, the better the machine learning algorithms can do predictions.", "text-hash": 9481411723883903182, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/34", "hash": 10390915169360946497, "orig": "For programmatic PDFs, the text cells are contructed from raw streams of symbols and transforms defined in the PDF document. This operation relies on the iterators provided by the QPDF library$^{9}$.", "properties": {"data": [["language", 10390915169360946497, "TEXT", "#/texts/31", "en", 0.8500000238418579], ["semantic", 10390915169360946497, "TEXT", "#/texts/31", "text", 0.9800000190734863]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/48"}], "text": "For programmatic PDFs, the text cells are contructed from raw streams of symbols and transforms defined in the PDF document. This operation relies on the iterators provided by the QPDF library$^{9}$.", "text-hash": 11149022357700220845, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/35", "hash": 15254383206256494278, "orig": "For scanned PDFs, we use a two step approach to find the cells by first running all bitmap resources in the PDF through an OCR engine and then merging the extracted text-snippets from the images with the remaining cells from the programmatically created content. Eventually, all the created cells and line paths are stored in an internal JSON format, which also keeps references to the bitmap resources embedded in the PDF document. From this point, all further processing does not need to distinguish between scanned or programmatic sources.", "properties": {"data": [["language", 15254383206256494278, "TEXT", "#/texts/32", "en", 0.9399999976158142], ["semantic", 15254383206256494278, "TEXT", "#/texts/32", "text", 0.9800000190734863]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/51"}], "text": "For scanned PDFs, we use a two step approach to find the cells by first running all bitmap resources in the PDF through an OCR engine and then merging the extracted text-snippets from the images with the remaining cells from the programmatically created content. Eventually, all the created cells and line paths are stored in an internal JSON format, which also keeps references to the bitmap resources embedded in the PDF document. From this point, all further processing does not need to distinguish between scanned or programmatic sources.", "text-hash": 6573226034038831156, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/36", "hash": 17759618186065566858, "orig": "3.3 Ground-truth gathering through human-annotation", "properties": {"data": [["language", 17759618186065566858, "TEXT", "#/texts/33", "en", 0.8299999833106995], ["semantic", 17759618186065566858, "TEXT", "#/texts/33", "header", 0.9300000071525574]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/52"}], "text": "3.3 Ground-truth gathering through human-annotation", "text-hash": 8679681341332585960, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/37", "hash": 11638821473906997927, "orig": "In this component, we collect ground-truth for the custom machine learning models to be trained on. Representative ground-truth data is of paramount importance to obtain machine learned models with excellent recall and precision. Unfortunately, it is often very hard to obtain lots of representative ground-truth data, primarily due the the enormous variability across the layout of documents. As a consequence, the concept of annotators for documents were incorporated into the platform from the very beginning. The purpose of these annotators is two-fold.", "properties": {"data": [["language", 11638821473906997927, "TEXT", "#/texts/34", "en", 0.9700000286102295], ["semantic", 11638821473906997927, "TEXT", "#/texts/34", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/53"}], "text": "In this component, we collect ground-truth for the custom machine learning models to be trained on. Representative ground-truth data is of paramount importance to obtain machine learned models with excellent recall and precision. Unfortunately, it is often very hard to obtain lots of representative ground-truth data, primarily due the the enormous variability across the layout of documents. As a consequence, the concept of annotators for documents were incorporated into the platform from the very beginning. The purpose of these annotators is two-fold.", "text-hash": 14503768930839698451, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/38", "hash": 13020065077657899116, "orig": "First and foremost, the annotators on the platform allow us to gather ground-truth at scale using a crowd-sourcing approach. In each annotation task, we retrieve the original PDF page and its associated parsed components, containing the cells (see Figure 2). We then ask the (human) annotator to assign each cell a layout semantic label. Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$. In the annotator tool, each layout semantic label is visually represented by a colour. By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3. Since humans are very efficient in visual recognition, this task comes very natural to us. The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns.", "properties": {"data": [["language", 13020065077657899116, "TEXT", "#/texts/35", "en", 0.8899999856948853], ["semantic", 13020065077657899116, "TEXT", "#/texts/35", "text", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/54"}], "text": "First and foremost, the annotators on the platform allow us to gather ground-truth at scale using a crowd-sourcing approach. In each annotation task, we retrieve the original PDF page and its associated parsed components, containing the cells (see Figure 2). We then ask the (human) annotator to assign each cell a layout semantic label. Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$. In the annotator tool, each layout semantic label is visually represented by a colour. By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3. Since humans are very efficient in visual recognition, this task comes very natural to us. The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns.", "text-hash": 13130850271187616458, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/39", "hash": 10103841011442966464, "orig": "The second purpose of the annotators is to visually inspect the quality of our machine learned models. The goal of the models is to emulate the action of the annotators, i.e. to assign a layout semantic label to each cell. Clearly, the result of a prediction for each page can therefore be displayed as if it were an annotated page. This allows the users to directly inspect the results of the models on unseen pages. A direct consequence of this inspection capability in the annotators is that the annotation task can be transformed easily into a correction task, i.e. the human annotators only need to correct the incorrectly predicted labels. Of course, as the models become better over time, the number of corrections needed to be made become less and less. This allows us to significantly reduce the annotation time per document. Since annotations are typically created by professionals with a high hourly rate, the colouring technique allowed us to significantly reduce the cost of ground-truth gathering.", "properties": {"data": [["language", 10103841011442966464, "TEXT", "#/texts/36", "en", 0.9399999976158142], ["semantic", 10103841011442966464, "TEXT", "#/texts/36", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/55"}], "text": "The second purpose of the annotators is to visually inspect the quality of our machine learned models. The goal of the models is to emulate the action of the annotators, i.e. to assign a layout semantic label to each cell. Clearly, the result of a prediction for each page can therefore be displayed as if it were an annotated page. This allows the users to directly inspect the results of the models on unseen pages. A direct consequence of this inspection capability in the annotators is that the annotation task can be transformed easily into a correction task, i.e. the human annotators only need to correct the incorrectly predicted labels. Of course, as the models become better over time, the number of corrections needed to be made become less and less. This allows us to significantly reduce the annotation time per document. Since annotations are typically created by professionals with a high hourly rate, the colouring technique allowed us to significantly reduce the cost of ground-truth gathering.", "text-hash": 11435379797753757998, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/40", "hash": 10982401368140758581, "orig": "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute. The vertical red lines indicate that a training was performed on the annotated pages, and a new, improved model is", "properties": {"data": [["language", 10982401368140758581, "TEXT", "#/texts/37", "en", 0.9599999785423279], ["semantic", 10982401368140758581, "TEXT", "#/texts/37", "text", 0.9700000286102295]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/56"}], "text": "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute. The vertical red lines indicate that a training was performed on the annotated pages, and a new, improved model is", "text-hash": 10548529097098469537, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/42", "hash": 887751753527930563, "orig": "used from that point to predict the labels. Since the corrections become less and less, the rate of annotation goes up. It is needless to say that this inter-leaving of training models (based on annotated ground-truth) and annotation benefits directly from our platform approach, since each task (submitting page-annotations, training the model, applying the model for predicting the labels) comes down to an asynchronous call to a microservice. The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection.", "properties": {"data": [["language", 887751753527930563, "TEXT", "#/texts/38", "en", 0.949999988079071], ["semantic", 887751753527930563, "TEXT", "#/texts/38", "text", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/60"}], "text": "used from that point to predict the labels. Since the corrections become less and less, the rate of annotation goes up. It is needless to say that this inter-leaving of training models (based on annotated ground-truth) and annotation benefits directly from our platform approach, since each task (submitting page-annotations, training the model, applying the model for predicting the labels) comes down to an asynchronous call to a microservice. The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection.", "text-hash": 2205427981859754031, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/43", "hash": 4695688617288377564, "orig": "3.4 Machine Learning: Training models & Applying models", "properties": {"data": [["language", 4695688617288377564, "TEXT", "#/texts/39", "en", 0.800000011920929], ["semantic", 4695688617288377564, "TEXT", "#/texts/39", "header", 0.9300000071525574]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/61"}], "text": "3.4 Machine Learning: Training models & Applying models", "text-hash": 16834670239362291258, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/44", "hash": 3275001812318455279, "orig": "In the CCS, there are essentially two types of machine-learning models. On the one hand, we have default models, which are designed to be layout independent. They take a raster image of the page to identify and locate basic objects, such as tables, figures, formulas, etc. On the other hand, we also support the training of custom, templatespecific models, which are designed to specialize on a particular layout template and allow us to convert and extract the data out of documents with very high precision and recall. They will classify each cell in the page with regard to their layout semantic label.", "properties": {"data": [["language", 3275001812318455279, "TEXT", "#/texts/40", "en", 0.9399999976158142], ["semantic", 3275001812318455279, "TEXT", "#/texts/40", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/62"}], "text": "In the CCS, there are essentially two types of machine-learning models. On the one hand, we have default models, which are designed to be layout independent. They take a raster image of the page to identify and locate basic objects, such as tables, figures, formulas, etc. On the other hand, we also support the training of custom, templatespecific models, which are designed to specialize on a particular layout template and allow us to convert and extract the data out of documents with very high precision and recall. They will classify each cell in the page with regard to their layout semantic label.", "text-hash": 4429706140044408651, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/45", "hash": 15354930767839681193, "orig": "3.4.1 Metrics. Before discussing the performance of the models, let us first define the precision and recall metrics used to evaluate the results. The first observation is that the output of a machine learned model is exactly the same of what a human annotator would produce, i.e. it will assign a text cell a semantic label. The correctness of this label is what we aim to measure with the recall and precision metrics. The second observation is that we deal with a", "properties": {"data": [["language", 15354930767839681193, "TEXT", "#/texts/41", "en", 0.8999999761581421], ["semantic", 15354930767839681193, "TEXT", "#/texts/41", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/63"}], "text": "3.4.1 Metrics. Before discussing the performance of the models, let us first define the precision and recall metrics used to evaluate the results. The first observation is that the output of a machine learned model is exactly the same of what a human annotator would produce, i.e. it will assign a text cell a semantic label. The correctness of this label is what we aim to measure with the recall and precision metrics. The second observation is that we deal with a", "text-hash": 6184852591532473349, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/47", "hash": 6337233386759158728, "orig": "multi-class classification problem, i.e. we don't have only two labels, but many possible semantic labels, hence the performance result will be the average of the recall and precision for each label.", "properties": {"data": [["language", 6337233386759158728, "TEXT", "#/texts/42", "en", 0.8999999761581421], ["semantic", 6337233386759158728, "TEXT", "#/texts/42", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/66"}], "text": "multi-class classification problem, i.e. we don't have only two labels, but many possible semantic labels, hence the performance result will be the average of the recall and precision for each label.", "text-hash": 15490331838172880166, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/48", "hash": 2249972239307071508, "orig": "The recall (= \u211b) and precision (= \ud835\udcab) for a given label on a page is defined by the standard formulas", "properties": {"data": [["language", 2249972239307071508, "TEXT", "#/texts/43", "en", 0.8199999928474426], ["semantic", 2249972239307071508, "TEXT", "#/texts/43", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/67"}], "text": "The recall (= \u211b) and precision (= \ud835\udcab) for a given label on a page is defined by the standard formulas", "text-hash": 1131271437908497026, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/49", "hash": 12383805870947794174, "orig": "\u211b = t$_{p}$ t$_{p}$ + f$_{p}$ , \ud835\udcab = t$_{p}$ t$_{p}$ + f$_{n}$ , (1)", "properties": {"data": [["language", 12383805870947794174, "TEXT", "#/texts/44", "en", 0.27000001072883606], ["semantic", 12383805870947794174, "TEXT", "#/texts/44", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/68"}], "text": "\u211b = t$_{p}$ t$_{p}$ + f$_{p}$ , \ud835\udcab = t$_{p}$ t$_{p}$ + f$_{n}$ , (1)", "text-hash": 14055366495763095132, "type": "equation"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/50", "hash": 7053654953998543393, "orig": "where t$_{p}$, f$_{p}$ and f$_{n}$ represent respectively true positive, false positive and false negative predicted labels.", "properties": {"data": [["language", 7053654953998543393, "TEXT", "#/texts/45", "en", 0.5799999833106995], ["semantic", 7053654953998543393, "TEXT", "#/texts/45", "text", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/69"}], "text": "where t$_{p}$, f$_{p}$ and f$_{n}$ represent respectively true positive, false positive and false negative predicted labels.", "text-hash": 642098605774556301, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/51", "hash": 15921044595687116426, "orig": "3.4.2 Default Models. The aim of the default models is to identify specific, ubiquitous objects in documents. Examples of such objects are tables, figures with their captions, mathematical formulas, etc. Due to the high variability in both the document layout as well as in the representation of these objects, we need very robust object detection methods. Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7]. On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions.", "properties": {"data": [["language", 15921044595687116426, "TEXT", "#/texts/46", "en", 0.949999988079071], ["semantic", 15921044595687116426, "TEXT", "#/texts/46", "text", 0.9700000286102295]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/70"}], "text": "3.4.2 Default Models. The aim of the default models is to identify specific, ubiquitous objects in documents. Examples of such objects are tables, figures with their captions, mathematical formulas, etc. Due to the high variability in both the document layout as well as in the representation of these objects, we need very robust object detection methods. Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7]. On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions.", "text-hash": 5618307884355612648, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/52", "hash": 12234068400463628788, "orig": "In this paper, we will focus only on the detection of table objects, but the same principles described in the following analysis are also applied for other type of objects.", "properties": {"data": [["language", 12234068400463628788, "TEXT", "#/texts/47", "en", 0.9700000286102295], ["semantic", 12234068400463628788, "TEXT", "#/texts/47", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/71"}], "text": "In this paper, we will focus only on the detection of table objects, but the same principles described in the following analysis are also applied for other type of objects.", "text-hash": 13907813772802190178, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/53", "hash": 4628466594790006384, "orig": "The networks available on our platform have been trained on arXiv data$^{11}$. We have annotated 30000 PDF pages and know the", "properties": {"data": [["language", 4628466594790006384, "TEXT", "#/texts/48", "en", 0.9200000166893005], ["semantic", 4628466594790006384, "TEXT", "#/texts/48", "text", 0.949999988079071]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/72"}], "text": "The networks available on our platform have been trained on arXiv data$^{11}$. We have annotated 30000 PDF pages and know the", "text-hash": 16911352314006995166, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/55", "hash": 9651706913678711778, "orig": "location of at least one table on each page. From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation. Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms.", "properties": {"data": [["language", 9651706913678711778, "TEXT", "#/texts/49", "en", 0.9200000166893005], ["semantic", 9651706913678711778, "TEXT", "#/texts/49", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/76"}], "text": "location of at least one table on each page. From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation. Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms.", "text-hash": 11888191065829014864, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/56", "hash": 1363251178266051349, "orig": "We do not locate the table directly on the image of the original PDF page but rather on an image representation of the parsed PDF page with cell boxes. The reasoning behind this is to reduce the variability between all input PDF pages as much as possible and thus increase the effectiveness of the deep neural networks. An example of such an image can be seen in Figure 5. The red bounding boxes around the tables are a result of the prediction using YOLOv2 and are absent in the image on which the model predicts. Note that the visualisation of the text cells visible in Figure 5 does not include any text of the original document, but only its geometrical definition. This is important when one compares for example Asian documents with Japanese, Chinese or Korean characters versus European languages with the roman alphabet. We do not want the deep neural network to focus on the specific characters, but rather on the layout of the cells in the page.", "properties": {"data": [["language", 1363251178266051349, "TEXT", "#/texts/50", "en", 0.9100000262260437], ["semantic", 1363251178266051349, "TEXT", "#/texts/50", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/77"}], "text": "We do not locate the table directly on the image of the original PDF page but rather on an image representation of the parsed PDF page with cell boxes. The reasoning behind this is to reduce the variability between all input PDF pages as much as possible and thus increase the effectiveness of the deep neural networks. An example of such an image can be seen in Figure 5. The red bounding boxes around the tables are a result of the prediction using YOLOv2 and are absent in the image on which the model predicts. Note that the visualisation of the text cells visible in Figure 5 does not include any text of the original document, but only its geometrical definition. This is important when one compares for example Asian documents with Japanese, Chinese or Korean characters versus European languages with the roman alphabet. We do not want the deep neural network to focus on the specific characters, but rather on the layout of the cells in the page.", "text-hash": 2009046567395259777, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/57", "hash": 18259197018396996238, "orig": "Let us now discuss both deep neural network training microservices on the platform. In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision. In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times. We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied. We believe that this is the main origin for the discrepancy of time-to-solution for the training phase. The same holds true for the prediction. Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", "properties": {"data": [["language", 18259197018396996238, "TEXT", "#/texts/51", "en", 0.9599999785423279], ["semantic", 18259197018396996238, "TEXT", "#/texts/51", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/78"}], "text": "Let us now discuss both deep neural network training microservices on the platform. In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision. In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times. We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied. We believe that this is the main origin for the discrepancy of time-to-solution for the training phase. The same holds true for the prediction. Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", "text-hash": 7883278994224882668, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/58", "hash": 14663676516964431047, "orig": "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously. The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1. We use these bounding boxes to associate with each cell a label, which is in this particular case either Table or Not-Table, depending on whether they overlap", "properties": {"data": [["language", 14663676516964431047, "TEXT", "#/texts/52", "en", 0.949999988079071], ["semantic", 14663676516964431047, "TEXT", "#/texts/52", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/79"}], "text": "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously. The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1. We use these bounding boxes to associate with each cell a label, which is in this particular case either Table or Not-Table, depending on whether they overlap", "text-hash": 7164504172498806323, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/59", "hash": 4577067829072175096, "orig": "Table 2: Performance results for the template specific model of the Physical Review B journals. The confusion matrix highlights the huge imbalance between the number of text cells with different labels. The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types.", "properties": {"data": [["language", 4577067829072175096, "TEXT", "#/texts/53", "en", 0.8600000143051147], ["semantic", 4577067829072175096, "TEXT", "#/texts/53", "text", 0.9200000166893005]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/80"}], "text": "Table 2: Performance results for the template specific model of the Physical Review B journals. The confusion matrix highlights the huge imbalance between the number of text cells with different labels. The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types.", "text-hash": 3406859306294395222, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/60", "hash": 2569392033451362672, "orig": "with the predicted bounding box. The corresponding recall and precision are then computed for this dual-class classification problem. In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence. For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0. 5. The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers. We believe this originates from the selective search algorithm which is used to determine regions of interest. The images we feed it are not typical photographic images (made with a camera) but layout visualisations. The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", "properties": {"data": [["language", 2569392033451362672, "TEXT", "#/texts/54", "en", 0.9200000166893005], ["semantic", 2569392033451362672, "TEXT", "#/texts/54", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/82"}], "text": "with the predicted bounding box. The corresponding recall and precision are then computed for this dual-class classification problem. In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence. For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0. 5. The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers. We believe this originates from the selective search algorithm which is used to determine regions of interest. The images we feed it are not typical photographic images (made with a camera) but layout visualisations. The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", "text-hash": 5414143675771382750, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/61", "hash": 14539041145469267811, "orig": "3.4.3 Template specific Models. The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template. This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance. Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality.", "properties": {"data": [["language", 14539041145469267811, "TEXT", "#/texts/55", "en", 0.9200000166893005], ["semantic", 14539041145469267811, "TEXT", "#/texts/55", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/83"}], "text": "3.4.3 Template specific Models. The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template. This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance. Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality.", "text-hash": 6991735551340401103, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/62", "hash": 8607014065143641201, "orig": "For an algorithm to fit in the interactive platform design we identified a few key requirements. First, it is crucial that the model can generate good results with a limited set of pages. In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation. Second it must be robust against extreme imbalance of the labeled data. It is clear that cells of the label Title will be much more uncommon than cells with the label of Text. Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process.", "properties": {"data": [["language", 8607014065143641201, "TEXT", "#/texts/56", "en", 0.949999988079071], ["semantic", 8607014065143641201, "TEXT", "#/texts/56", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/84"}], "text": "For an algorithm to fit in the interactive platform design we identified a few key requirements. First, it is crucial that the model can generate good results with a limited set of pages. In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation. Second it must be robust against extreme imbalance of the labeled data. It is clear that cells of the label Title will be much more uncommon than cells with the label of Text. Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process.", "text-hash": 17832237182951286493, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/63", "hash": 1994904537764312371, "orig": "For these reasons, we chose random forest [2] as a machine learning algorithm for template specific models. Random forest algorithms are known to be trained fast and can produce very accurate results on limited, but relatively structured data. In our case,", "properties": {"data": [["language", 1994904537764312371, "TEXT", "#/texts/57", "en", 0.949999988079071], ["semantic", 1994904537764312371, "TEXT", "#/texts/57", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/85"}], "text": "For these reasons, we chose random forest [2] as a machine learning algorithm for template specific models. Random forest algorithms are known to be trained fast and can produce very accurate results on limited, but relatively structured data. In our case,", "text-hash": 1377511684573734815, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/65", "hash": 7742256726079628058, "orig": "this structure originates of course from the template. Furthermore, random forest is an ensemble method, meaning that they learn on the distribution function of the features, and not individual dataelements. As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised.", "properties": {"data": [["language", 7742256726079628058, "TEXT", "#/texts/58", "en", 0.9200000166893005], ["semantic", 7742256726079628058, "TEXT", "#/texts/58", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/88"}], "text": "this structure originates of course from the template. Furthermore, random forest is an ensemble method, meaning that they learn on the distribution function of the features, and not individual dataelements. As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised.", "text-hash": 250119056806139256, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/66", "hash": 8810233123818174294, "orig": "The random forest method is applied to each cell of the page based on a feature vector representing all of its properties. For example, the feature vector contains information as the page number, the size of the text cell, its position, as well as the distance from the neighbouring cells. Additionally to pure geometrical information we include the text style (normal, italic, or bold) and some text statistics, as the fraction of numeric characters. We then improve the obtained results by performing subsequent iterations with other random forest methods, which operate on an enlarged feature space including the previously predicted labels of the neighbourhood around the current cell.", "properties": {"data": [["language", 8810233123818174294, "TEXT", "#/texts/59", "en", 0.9599999785423279], ["semantic", 8810233123818174294, "TEXT", "#/texts/59", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/89"}], "text": "The random forest method is applied to each cell of the page based on a feature vector representing all of its properties. For example, the feature vector contains information as the page number, the size of the text cell, its position, as well as the distance from the neighbouring cells. Additionally to pure geometrical information we include the text style (normal, italic, or bold) and some text statistics, as the fraction of numeric characters. We then improve the obtained results by performing subsequent iterations with other random forest methods, which operate on an enlarged feature space including the previously predicted labels of the neighbourhood around the current cell.", "text-hash": 17619932035192809924, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/67", "hash": 16446711449286912460, "orig": "It is important to realize that almost all of these features are purely geometrical. This allows us to apply exactly the same machine learning methods on both scanned and programmatic PDF documents.", "properties": {"data": [["language", 16446711449286912460, "TEXT", "#/texts/60", "en", 0.9399999976158142], ["semantic", 16446711449286912460, "TEXT", "#/texts/60", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/90"}], "text": "It is important to realize that almost all of these features are purely geometrical. This allows us to apply exactly the same machine learning methods on both scanned and programmatic PDF documents.", "text-hash": 9704353849744984874, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/68", "hash": 9558434107504657973, "orig": "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$. We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels. Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label. We observe that the recall and precision numbers are excellent, with most of them above 99%. This is not surprising, since we are building models that specialise for a particular template.", "properties": {"data": [["language", 9558434107504657973, "TEXT", "#/texts/61", "en", 0.9100000262260437], ["semantic", 9558434107504657973, "TEXT", "#/texts/61", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/91"}], "text": "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$. We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels. Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label. We observe that the recall and precision numbers are excellent, with most of them above 99%. This is not surprising, since we are building models that specialise for a particular template.", "text-hash": 11971893452237256865, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/69", "hash": 18349896906192842040, "orig": "Moreover, the same ML algorithm proves to perform very well on different document templates, as is evident from the numbers shown in Table 3, simply by providing it with different datasets to train on. The latter is the power of our platform: we can re-use the same machine-learning algorithm to generate different models solely based on the data gathered by the annotation on the platform. We do not need to define rules and heuristics or update code in order to deal with new types of documents. We only need to gather more data.", "properties": {"data": [["language", 18349896906192842040, "TEXT", "#/texts/62", "en", 0.9399999976158142], ["semantic", 18349896906192842040, "TEXT", "#/texts/62", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/92"}], "text": "Moreover, the same ML algorithm proves to perform very well on different document templates, as is evident from the numbers shown in Table 3, simply by providing it with different datasets to train on. The latter is the power of our platform: we can re-use the same machine-learning algorithm to generate different models solely based on the data gathered by the annotation on the platform. We do not need to define rules and heuristics or update code in order to deal with new types of documents. We only need to gather more data.", "text-hash": 8080940474762743702, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/70", "hash": 10082834006373808153, "orig": "3.5 Assembly", "properties": {"data": [["language", 10082834006373808153, "TEXT", "#/texts/63", "en", 0.8199999928474426], ["semantic", 10082834006373808153, "TEXT", "#/texts/63", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/93"}], "text": "3.5 Assembly", "text-hash": 11736313095563614837, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/71", "hash": 15253541252152665681, "orig": "In this component, we build a structured data file in JSON or XML format, which contains all the text and objects (e.g. tables) from the original document, retaining the layout semantics. This structured data file is constructed by assembling all the cells from the parsed file", "properties": {"data": [["language", 15253541252152665681, "TEXT", "#/texts/64", "en", 0.8899999856948853], ["semantic", 15253541252152665681, "TEXT", "#/texts/64", "text", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/94"}], "text": "In this component, we build a structured data file in JSON or XML format, which contains all the text and objects (e.g. tables) from the original document, retaining the layout semantics. This structured data file is constructed by assembling all the cells from the parsed file", "text-hash": 6565628665194191037, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/72", "hash": 3904142170608486950, "orig": "Listing 1: Excerpt from the JSON output of the Corpus Conversion Service after conversion of this paper.", "properties": {"data": [["language", 3904142170608486950, "TEXT", "#/texts/65", "en", 0.7799999713897705], ["semantic", 3904142170608486950, "TEXT", "#/texts/65", "text", 0.9100000262260437]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/96"}], "text": "Listing 1: Excerpt from the JSON output of the Corpus Conversion Service after conversion of this paper.", "text-hash": 4079383948124449940, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/73", "hash": 6410818076508661508, "orig": "{ 'description ': { 'title ': 'Corpus Conversion Service: A machine learning platform to ingest documents at scale. ',, \u2192 'abstract ': 'Over the past few decades, the amount of scientific articles [...] ',, \u2192 'affiliations ': 'IBM Research Rueschlikon, Switzerland ', 'authors ': 'Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas ', \u2192 }, 'main-text ': [{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...], 'tables ': [{...},...], 'images ': [{...},...] }", "properties": {"data": [["language", 6410818076508661508, "TEXT", "#/texts/66", "en", 0.3499999940395355], ["semantic", 6410818076508661508, "TEXT", "#/texts/66", "reference", 0.7200000286102295]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/97"}], "text": "{ 'description ': { 'title ': 'Corpus Conversion Service: A machine learning platform to ingest documents at scale. ',, \u2192 'abstract ': 'Over the past few decades, the amount of scientific articles [...] ',, \u2192 'affiliations ': 'IBM Research Rueschlikon, Switzerland ', 'authors ': 'Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas ', \u2192 }, 'main-text ': [{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...], 'tables ': [{...},...], 'images ': [{...},...] }", "text-hash": 15129105844666734962, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/74", "hash": 12813875992986832439, "orig": "in combination with their associated predicted (or human-annotated) layout semantic labels. It should be noted that no machine learning is used in this component. It is purely rule based and therefore completely deterministic.", "properties": {"data": [["language", 12813875992986832439, "TEXT", "#/texts/67", "en", 0.9800000190734863], ["semantic", 12813875992986832439, "TEXT", "#/texts/67", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/98"}], "text": "in combination with their associated predicted (or human-annotated) layout semantic labels. It should be noted that no machine learning is used in this component. It is purely rule based and therefore completely deterministic.", "text-hash": 13337022012432085155, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/75", "hash": 11030869010407626539, "orig": "The assembly phase is a two step process. First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order. Then, the text of all cells that have the same label is contracted into a temporary document objects. Third, we build the internal structure of the temporary document objects, based on the information provided by the models. The latter is only applicable for internally structured objects, such as tables. An example of the generated JSON output is shown in Listing 1.", "properties": {"data": [["language", 11030869010407626539, "TEXT", "#/texts/68", "en", 0.949999988079071], ["semantic", 11030869010407626539, "TEXT", "#/texts/68", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/99"}], "text": "The assembly phase is a two step process. First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order. Then, the text of all cells that have the same label is contracted into a temporary document objects. Third, we build the internal structure of the temporary document objects, based on the information provided by the models. The latter is only applicable for internally structured objects, such as tables. An example of the generated JSON output is shown in Listing 1.", "text-hash": 10508897272021404039, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/76", "hash": 2142320548375900929, "orig": "4 ARCHITECTURE AND ORCHESTRATION OF CLOUD BASED MICROSERVICES", "properties": {"data": [["language", 2142320548375900929, "TEXT", "#/texts/69", "en", 0.33000001311302185], ["semantic", 2142320548375900929, "TEXT", "#/texts/69", "header", 0.9800000190734863]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/100"}], "text": "4 ARCHITECTURE AND ORCHESTRATION OF CLOUD BASED MICROSERVICES", "text-hash": 950718827856471405, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/77", "hash": 12747011194397783283, "orig": "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated. Before discussing the technical details, we would like to point out our requirements for the architecture of the platform. These requirements are all related to scaling. Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources. In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation. It is clear that the architecture of such a service is heavily influenced by these requirements.", "properties": {"data": [["language", 12747011194397783283, "TEXT", "#/texts/70", "en", 0.9599999785423279], ["semantic", 12747011194397783283, "TEXT", "#/texts/70", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/101"}], "text": "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated. Before discussing the technical details, we would like to point out our requirements for the architecture of the platform. These requirements are all related to scaling. Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources. In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation. It is clear that the architecture of such a service is heavily influenced by these requirements.", "text-hash": 13395059553653450335, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/78", "hash": 174789262945188010, "orig": "4.1 Platform layers", "properties": {"data": [["language", 174789262945188010, "TEXT", "#/texts/71", "en", 0.6200000047683716], ["semantic", 174789262945188010, "TEXT", "#/texts/71", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/102"}], "text": "4.1 Platform layers", "text-hash": 3197077882590976520, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/79", "hash": 7228893318503650455, "orig": "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents. In Figure 6, we show a sketch of its", "properties": {"data": [["language", 7228893318503650455, "TEXT", "#/texts/72", "en", 0.9399999976158142], ["semantic", 7228893318503650455, "TEXT", "#/texts/72", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/103"}], "text": "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents. In Figure 6, we show a sketch of its", "text-hash": 475277818666452483, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/81", "hash": 9230667184712205690, "orig": "architecture. As one can observe, we have grouped the service into four layers. These layers are:", "properties": {"data": [["language", 9230667184712205690, "TEXT", "#/texts/73", "en", 0.9599999785423279], ["semantic", 9230667184712205690, "TEXT", "#/texts/73", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/106"}], "text": "architecture. As one can observe, we have grouped the service into four layers. These layers are:", "text-hash": 12309253064221915096, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/82", "hash": 17419815751432442882, "orig": "(1) An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering. The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", "properties": {"data": [["language", 17419815751432442882, "TEXT", "#/texts/74", "en", 0.8600000143051147], ["semantic", 17419815751432442882, "TEXT", "#/texts/74", "text", 0.9800000190734863]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/107"}], "text": "(1) An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering. The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", "text-hash": 8731693174932948592, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/83", "hash": 11194226403360998426, "orig": "(2) An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result. The task scheduling is done with the Message Broker RabbitMQ$^{14}$. The results are stored in the in-memory data store Redis$^{15}$. In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully. This approach allows for a very robust, fault-tolerant service with very little downtime.", "properties": {"data": [["language", 11194226403360998426, "TEXT", "#/texts/75", "en", 0.8899999856948853], ["semantic", 11194226403360998426, "TEXT", "#/texts/75", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/108"}], "text": "(2) An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result. The task scheduling is done with the Message Broker RabbitMQ$^{14}$. The results are stored in the in-memory data store Redis$^{15}$. In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully. This approach allows for a very robust, fault-tolerant service with very little downtime.", "text-hash": 10633901501381588600, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/84", "hash": 9005324696118733701, "orig": "(3) A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc). In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$. This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker. The workers are not only consumers of tasks, but may also produce new ones. This is the case for the requests", "properties": {"data": [["language", 9005324696118733701, "TEXT", "#/texts/76", "en", 0.8799999952316284], ["semantic", 9005324696118733701, "TEXT", "#/texts/76", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/109"}], "text": "(3) A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc). In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$. This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker. The workers are not only consumers of tasks, but may also produce new ones. This is the case for the requests", "text-hash": 17146307233289309425, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/86", "hash": 8082547756621048511, "orig": "operating on the whole corpus. Whenever possible we parallelise the compute-heavy operations at the page (or document) level.", "properties": {"data": [["language", 8082547756621048511, "TEXT", "#/texts/77", "en", 0.800000011920929], ["semantic", 8082547756621048511, "TEXT", "#/texts/77", "text", 0.9700000286102295]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/116"}], "text": "operating on the whole corpus. Whenever possible we parallelise the compute-heavy operations at the page (or document) level.", "text-hash": 18059523399368641563, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/87", "hash": 7791113385466815951, "orig": "(4) A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store. The object-store allows us to easily scale the storage with regard to the number of processed documents. However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", "properties": {"data": [["language", 7791113385466815951, "TEXT", "#/texts/78", "en", 0.9200000166893005], ["semantic", 7791113385466815951, "TEXT", "#/texts/78", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/117"}], "text": "(4) A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store. The object-store allows us to easily scale the storage with regard to the number of processed documents. However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", "text-hash": 18360382746077681451, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/88", "hash": 2845012065511066307, "orig": "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it. This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ. Being a cloud-based platform, our solution allows for these software assets to be detached from the main deployment and to be served by specialised vendors services which are certified to the latest industry requirements such as data-at-rest encryption, high availability, etc.", "properties": {"data": [["language", 2845012065511066307, "TEXT", "#/texts/79", "en", 0.9599999785423279], ["semantic", 2845012065511066307, "TEXT", "#/texts/79", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/118"}], "text": "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it. This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ. Being a cloud-based platform, our solution allows for these software assets to be detached from the main deployment and to be served by specialised vendors services which are certified to the latest industry requirements such as data-at-rest encryption, high availability, etc.", "text-hash": 5147922161190726703, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/89", "hash": 15072914837937068796, "orig": "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform. From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services. During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks. For example other result-backends didn't offer the auto-cleaning functionality offered by Redis and, before opting for a custom solution mixing MongoDB with an object storage, we evaluated other solutions as", "properties": {"data": [["language", 15072914837937068796, "TEXT", "#/texts/80", "en", 0.949999988079071], ["semantic", 15072914837937068796, "TEXT", "#/texts/80", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/119"}], "text": "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform. From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services. During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks. For example other result-backends didn't offer the auto-cleaning functionality offered by Redis and, before opting for a custom solution mixing MongoDB with an object storage, we evaluated other solutions as", "text-hash": 6457975667604208730, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/91", "hash": 15263283599394646155, "orig": "the GridFS storage, but it didn't fit to the constraints of typical cloud environments.", "properties": {"data": [["language", 15263283599394646155, "TEXT", "#/texts/81", "en", 0.9800000190734863], ["semantic", 15263283599394646155, "TEXT", "#/texts/81", "text", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/123"}], "text": "the GridFS storage, but it didn't fit to the constraints of typical cloud environments.", "text-hash": 6564180200469858791, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/92", "hash": 11417717357379295278, "orig": "4.2 Deployment", "properties": {"data": [["language", 11417717357379295278, "TEXT", "#/texts/82", "en", 0.8399999737739563], ["semantic", 11417717357379295278, "TEXT", "#/texts/82", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/124"}], "text": "4.2 Deployment", "text-hash": 10410411375713696396, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/93", "hash": 9031137420247852045, "orig": "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution. Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", "properties": {"data": [["language", 9031137420247852045, "TEXT", "#/texts/83", "en", 0.8500000238418579], ["semantic", 9031137420247852045, "TEXT", "#/texts/83", "text", 0.9800000190734863]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/125"}], "text": "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution. Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", "text-hash": 17120327512656828009, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/94", "hash": 18436578077535696718, "orig": "The common parts of all deployments are the interface and the compute layer. The compute layer is designed for dynamically adapt the number of resources on the current load. For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents.", "properties": {"data": [["language", 18436578077535696718, "TEXT", "#/texts/84", "en", 0.9399999976158142], ["semantic", 18436578077535696718, "TEXT", "#/texts/84", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/126"}], "text": "The common parts of all deployments are the interface and the compute layer. The compute layer is designed for dynamically adapt the number of resources on the current load. For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents.", "text-hash": 8003240278028347820, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/95", "hash": 11734907767490759865, "orig": "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements. The parse component is indeed more demanding than the simple annotation components.", "properties": {"data": [["language", 11734907767490759865, "TEXT", "#/texts/85", "en", 0.9100000262260437], ["semantic", 11734907767490759865, "TEXT", "#/texts/85", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/127"}], "text": "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements. The parse component is indeed more demanding than the simple annotation components.", "text-hash": 14704352826439757333, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/96", "hash": 7845460979782401889, "orig": "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks. Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment. Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer.", "properties": {"data": [["language", 7845460979782401889, "TEXT", "#/texts/86", "en", 0.9399999976158142], ["semantic", 7845460979782401889, "TEXT", "#/texts/86", "text", 0.9599999785423279]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/128"}], "text": "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks. Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment. Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer.", "text-hash": 18296438351865061837, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/97", "hash": 17769988780693768120, "orig": "4.3 Scaling benchmarks", "properties": {"data": [["language", 17769988780693768120, "TEXT", "#/texts/87", "en", 0.38999998569488525], ["semantic", 17769988780693768120, "TEXT", "#/texts/87", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/129"}], "text": "4.3 Scaling benchmarks", "text-hash": 8669715371308316950, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/98", "hash": 12387489643011067991, "orig": "Let us now discuss some scaling results on our platform. As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources. In Figure 7, we show the number of users and the number of processed PDF", "properties": {"data": [["language", 12387489643011067991, "TEXT", "#/texts/88", "en", 0.9300000071525574], ["semantic", 12387489643011067991, "TEXT", "#/texts/88", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/130"}], "text": "Let us now discuss some scaling results on our platform. As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources. In Figure 7, we show the number of users and the number of processed PDF", "text-hash": 14043220598855238339, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/99", "hash": 10375772475809458895, "orig": "pages 20 as a function of time. As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017. It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time. Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", "properties": {"data": [["language", 10375772475809458895, "TEXT", "#/texts/89", "en", 0.9900000095367432], ["semantic", 10375772475809458895, "TEXT", "#/texts/89", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/133"}], "text": "pages 20 as a function of time. As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017. It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time. Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", "text-hash": 11451664978555915307, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/100", "hash": 7054726458191881751, "orig": "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources. We show this scaling by displaying the speedup versus the number of worker nodes available. Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores. As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes. Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker. The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level. The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes. Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size.", "properties": {"data": [["language", 7054726458191881751, "TEXT", "#/texts/90", "en", 0.9399999976158142], ["semantic", 7054726458191881751, "TEXT", "#/texts/90", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/134"}], "text": "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources. We show this scaling by displaying the speedup versus the number of worker nodes available. Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores. As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes. Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker. The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level. The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes. Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size.", "text-hash": 641132783909312643, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/101", "hash": 7794115281016062068, "orig": "5 CONCLUSION", "properties": {"data": [["language", 7794115281016062068, "TEXT", "#/texts/91", "en", 0.38999998569488525], ["semantic", 7794115281016062068, "TEXT", "#/texts/91", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/135"}], "text": "5 CONCLUSION", "text-hash": 18347902420476900066, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/102", "hash": 7038163015905900647, "orig": "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation.", "properties": {"data": [["language", 7038163015905900647, "TEXT", "#/texts/92", "en", 0.9200000166893005], ["semantic", 7038163015905900647, "TEXT", "#/texts/92", "text", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/136"}], "text": "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation.", "text-hash": 657005981473069779, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/103", "hash": 1508626318915838319, "orig": "The fundamental design choices in our solution have proven to enable scaling in three elementary ways. First, it can service multiple users concurrently. Second, it can ingest, parse and apply machine learned models on many documents at the same time. Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources.", "properties": {"data": [["language", 1508626318915838319, "TEXT", "#/texts/93", "en", 0.949999988079071], ["semantic", 1508626318915838319, "TEXT", "#/texts/93", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/137"}], "text": "The fundamental design choices in our solution have proven to enable scaling in three elementary ways. First, it can service multiple users concurrently. Second, it can ingest, parse and apply machine learned models on many documents at the same time. Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources.", "text-hash": 1575427749670982603, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/104", "hash": 17247086344435786796, "orig": "In the future, we plan to extend the platform in two major areas. First, we would like to extend the number of microservices, especially with regard to image understanding. The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc). The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier. Second, we would like to improve the quality and performance of our default models. We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5). To leverage this growing use of deep learning models, we will additionally introduce", "properties": {"data": [["language", 17247086344435786796, "TEXT", "#/texts/94", "en", 0.9300000071525574], ["semantic", 17247086344435786796, "TEXT", "#/texts/94", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/138"}], "text": "In the future, we plan to extend the platform in two major areas. First, we would like to extend the number of microservices, especially with regard to image understanding. The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc). The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier. Second, we would like to improve the quality and performance of our default models. We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5). To leverage this growing use of deep learning models, we will additionally introduce", "text-hash": 9192771730962863754, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/105", "hash": 10287541089279789496, "orig": "specialised data-parallelism in order to speed up the training and provide interactive user-customisation capabilities.", "properties": {"data": [["language", 10287541089279789496, "TEXT", "#/texts/95", "en", 0.8299999833106995], ["semantic", 10287541089279789496, "TEXT", "#/texts/95", "text", 0.8299999833106995]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/140"}], "text": "specialised data-parallelism in order to speed up the training and provide interactive user-customisation capabilities.", "text-hash": 11530911151361059606, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/106", "hash": 7819882792760965882, "orig": "ACKNOWLEDGMENTS", "properties": {"data": [["language", 7819882792760965882, "TEXT", "#/texts/96", "en", 0.25], ["semantic", 7819882792760965882, "TEXT", "#/texts/96", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/141"}], "text": "ACKNOWLEDGMENTS", "text-hash": 18322720810464861272, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/107", "hash": 15983582675278266440, "orig": "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system.", "properties": {"data": [["language", 15983582675278266440, "TEXT", "#/texts/97", "en", 0.949999988079071], ["semantic", 15983582675278266440, "TEXT", "#/texts/97", "text", 0.9399999976158142]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/142"}], "text": "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system.", "text-hash": 5556222901900980902, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/108", "hash": 12711351442546714716, "orig": "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation. MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", "properties": {"data": [["language", 12711351442546714716, "TEXT", "#/texts/98", "en", 0.9300000071525574], ["semantic", 12711351442546714716, "TEXT", "#/texts/98", "text", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/143"}], "text": "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation. MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", "text-hash": 13431247303555599034, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/109", "hash": 1225384713519841338, "orig": "REFERENCES", "properties": {"data": [["language", 1225384713519841338, "TEXT", "#/texts/99", "en", 0.33000001311302185], ["semantic", 1225384713519841338, "TEXT", "#/texts/99", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/144"}], "text": "REFERENCES", "text-hash": 1858797456585454232, "type": "subtitle-level-1"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/110", "hash": 1712774266196702392, "orig": "[1] A. Antonacopoulos, C. Clausner, C. Papadopoulos, and S. Pletschacher. 2015. ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015. In Proceedings of the 13th International Conference on Document Analysis and Recognition (ICDAR2015). Nancy, 1151-1155.", "properties": {"data": [["language", 1712774266196702392, "TEXT", "#/texts/100", "en", 0.6499999761581421], ["semantic", 1712774266196702392, "TEXT", "#/texts/100", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/145"}], "text": "[1] A. Antonacopoulos, C. Clausner, C. Papadopoulos, and S. Pletschacher. 2015. ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015. In Proceedings of the 13th International Conference on Document Analysis and Recognition (ICDAR2015). Nancy, 1151-1155.", "text-hash": 1659105420801451542, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/111", "hash": 14718288547983000340, "orig": "[2] Leo Breiman. 2001. Random Forests. Machine Learning 45, 1 (01 Oct 2001), 5-32. https://doi.org/10.1023/A:1010933404324", "properties": {"data": [["language", 14718288547983000340, "TEXT", "#/texts/101", "en", 0.5799999833106995], ["semantic", 14718288547983000340, "TEXT", "#/texts/101", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/146"}], "text": "[2] Leo Breiman. 2001. Random Forests. Machine Learning 45, 1 (01 Oct 2001), 5-32. https://doi.org/10.1023/A:1010933404324", "text-hash": 6812664208788567426, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/112", "hash": 16943780574244090186, "orig": "[3] R. Cattoni, T. Coianiz, S. Messelodi, and C. M. Modena. 1998. Geometric layout analysis techniques for document image understanding: a review. Technical Report.", "properties": {"data": [["language", 16943780574244090186, "TEXT", "#/texts/102", "en", 0.6700000166893005], ["semantic", 16943780574244090186, "TEXT", "#/texts/102", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/147"}], "text": "[3] R. Cattoni, T. Coianiz, S. Messelodi, and C. M. Modena. 1998. Geometric layout analysis techniques for document image understanding: a review. Technical Report.", "text-hash": 9486476535199015848, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/113", "hash": 8004985786049140169, "orig": "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier. 2005. From Legacy Documents to XML: A Conversion Framework. Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103. https://doi.org/10.1007/11551362_9", "properties": {"data": [["language", 8004985786049140169, "TEXT", "#/texts/103", "en", 0.3400000035762787], ["semantic", 8004985786049140169, "TEXT", "#/texts/103", "reference", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/148"}], "text": "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier. 2005. From Legacy Documents to XML: A Conversion Framework. Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103. https://doi.org/10.1007/11551362_9", "text-hash": 18434854666592634661, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/114", "hash": 12744546813104546377, "orig": "[5] Ross Girshick. 2015. Fast R-CNN. In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15). IEEE Computer Society, Washington, DC, USA, 1440-1448. https://doi.org/10.1109/ICCV.2015.169", "properties": {"data": [["language", 12744546813104546377, "TEXT", "#/texts/104", "en", 0.47999998927116394], ["semantic", 12744546813104546377, "TEXT", "#/texts/104", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/149"}], "text": "[5] Ross Girshick. 2015. Fast R-CNN. In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15). IEEE Computer Society, Washington, DC, USA, 1440-1448. https://doi.org/10.1109/ICCV.2015.169", "text-hash": 13406949228208477349, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/115", "hash": 16061746189176848219, "orig": "[6] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. 2013. Rich feature hierarchies for accurate object detection and semantic segmentation. CoRR abs/1311.2524 (2013). arXiv:1311.2524 http://arxiv.org/abs/1311.2524", "properties": {"data": [["language", 16061746189176848219, "TEXT", "#/texts/105", "en", 0.6299999952316284], ["semantic", 16061746189176848219, "TEXT", "#/texts/105", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/150"}], "text": "[6] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. 2013. Rich feature hierarchies for accurate object detection and semantic segmentation. CoRR abs/1311.2524 (2013). arXiv:1311.2524 http://arxiv.org/abs/1311.2524", "text-hash": 5756829059313082807, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/116", "hash": 11872392946390819176, "orig": "[7] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C. Berg. 2016. SSD: Single Shot MultiBox Detector. Springer International Publishing, Cham, 21-37. https://doi.org/10. 1007/978-3-319-46448-0_2", "properties": {"data": [["language", 11872392946390819176, "TEXT", "#/texts/106", "en", 0.38999998569488525], ["semantic", 11872392946390819176, "TEXT", "#/texts/106", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/151"}], "text": "[7] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C. Berg. 2016. SSD: Single Shot MultiBox Detector. Springer International Publishing, Cham, 21-37. https://doi.org/10. 1007/978-3-319-46448-0_2", "text-hash": 14270091870781297606, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/117", "hash": 2956849475535726296, "orig": "[8] Joseph Redmon, Santosh Kumar Divvala, Ross B. Girshick, and Ali Farhadi. 2016. You Only Look Once: Unified, Real-Time Object Detection. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016), 779-788.", "properties": {"data": [["language", 2956849475535726296, "TEXT", "#/texts/107", "en", 0.6299999952316284], ["semantic", 2956849475535726296, "TEXT", "#/texts/107", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/152"}], "text": "[8] Joseph Redmon, Santosh Kumar Divvala, Ross B. Girshick, and Ali Farhadi. 2016. You Only Look Once: Unified, Real-Time Object Detection. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016), 779-788.", "text-hash": 4738468948628789302, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/118", "hash": 6623297047995432604, "orig": "[9] Joseph Redmon and Ali Farhadi. 2016. YOLO9000: Better, Faster, Stronger. arXiv preprint arXiv:1612.08242 (2016).", "properties": {"data": [["language", 6623297047995432604, "TEXT", "#/texts/108", "en", 0.4399999976158142], ["semantic", 6623297047995432604, "TEXT", "#/texts/108", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/153"}], "text": "[9] Joseph Redmon and Ali Farhadi. 2016. YOLO9000: Better, Faster, Stronger. arXiv preprint arXiv:1612.08242 (2016).", "text-hash": 15195146357792776186, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/119", "hash": 2507285765516108280, "orig": "[10] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In Advances in Neural Information Processing Systems 28, C. Cortes, N. D. Lawrence, D. D. Lee, M. Sugiyama, and R. Garnett (Eds.). Curran Associates, Inc., 91-99. http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks. pdf", "properties": {"data": [["language", 2507285765516108280, "TEXT", "#/texts/109", "en", 0.5899999737739563], ["semantic", 2507285765516108280, "TEXT", "#/texts/109", "reference", 0.9800000190734863]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/154"}], "text": "[10] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In Advances in Neural Information Processing Systems 28, C. Cortes, N. D. Lawrence, D. D. Lee, M. Sugiyama, and R. Garnett (Eds.). Curran Associates, Inc., 91-99. http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks. pdf", "text-hash": 5476658171803931478, "type": "paragraph"}, {"dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/120", "hash": 14905276480471286920, "orig": "[11] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. 2018. Corpus Conversion Service poster at the SysML conference. http://www.sysml.cc/doc/ 76.pdf", "properties": {"data": [["language", 14905276480471286920, "TEXT", "#/texts/110", "en", 0.47999998927116394], ["semantic", 14905276480471286920, "TEXT", "#/texts/110", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [{"$ref": "#/page-elements/155"}], "text": "[11] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. 2018. Corpus Conversion Service poster at the SysML conference. http://www.sysml.cc/doc/ 76.pdf", "text-hash": 6922174983558886886, "type": "paragraph"}]} diff --git a/tests/data/texts/references.nlp.jsonl b/tests/data/texts/references.nlp.jsonl index e43011fb..5b3f1000 100644 --- a/tests/data/texts/references.nlp.jsonl +++ b/tests/data/texts/references.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["reference", "citation-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3089690646178643593, 8143668872857370346, 18446744073709551615, 18446744073709551615, 4, 16, 4, 16, 3, 6, true, "J. Nagamatsu", "J. Nagamatsu"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 4958313984110456424, 12758216704979571657, 18446744073709551615, 18446744073709551615, 18, 29, 18, 29, 7, 10, true, "N. Nakagawa", "N. Nakagawa"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 11365016642846088308, 8423174012903247560, 18446744073709551615, 18446744073709551615, 31, 42, 31, 42, 11, 14, true, "T. Muranaka", "T. Muranaka"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3078907282781552519, 13717640772378057531, 18446744073709551615, 18446744073709551615, 44, 55, 44, 55, 15, 18, true, "Y. Zenitani", "Y. Zenitani"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 1988581422311921121, 4091673062415006471, 18446744073709551615, 18446744073709551615, 61, 72, 61, 72, 20, 23, true, "J. Akimitsu", "J. Akimitsu"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164561, 18446744073709551615, 18446744073709551615, 74, 120, 74, 120, 24, 31, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, 18446744073709551615, 18446744073709551615, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "volume", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, 18446744073709551615, 18446744073709551615, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["reference", "pages", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", "reference", 0.8899999856948853]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, 18446744073709551615, 18446744073709551615, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, 18446744073709551615, 18446744073709551615, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, 18446744073709551615, 18446744073709551615, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, 18446744073709551615, 18446744073709551615, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, 18446744073709551615, 18446744073709551615, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, 18446744073709551615, 18446744073709551615, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, 18446744073709551615, 18446744073709551615, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, 18446744073709551615, 18446744073709551615, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, 18446744073709551615, 18446744073709551615, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, 18446744073709551615, 18446744073709551615, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, 18446744073709551615, 18446744073709551615, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"], ["reference", "volume", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, 18446744073709551615, 18446744073709551615, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["reference", "pages", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, 18446744073709551615, 18446744073709551615, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", "reference", 0.9399999976158142]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["reference", "citation-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3089690646178643593, 8143668872857370346, 18446744073709551615, 18446744073709551615, 4, 16, 4, 16, 3, 6, true, "J. Nagamatsu", "J. Nagamatsu"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 4958313984110456424, 12758216704979571657, 18446744073709551615, 18446744073709551615, 18, 29, 18, 29, 7, 10, true, "N. Nakagawa", "N. Nakagawa"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 11365016642846088308, 8423174012903247560, 18446744073709551615, 18446744073709551615, 31, 42, 31, 42, 11, 14, true, "T. Muranaka", "T. Muranaka"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3078907282781552519, 13717640772378057531, 18446744073709551615, 18446744073709551615, 44, 55, 44, 55, 15, 18, true, "Y. Zenitani", "Y. Zenitani"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 1988581422311921121, 4091673062415006471, 18446744073709551615, 18446744073709551615, 61, 72, 61, 72, 20, 23, true, "J. Akimitsu", "J. Akimitsu"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164561, 18446744073709551615, 18446744073709551615, 74, 120, 74, 120, 24, 31, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, 18446744073709551615, 18446744073709551615, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, 18446744073709551615, 18446744073709551615, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, 18446744073709551615, 18446744073709551615, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, 18446744073709551615, 18446744073709551615, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, 18446744073709551615, 18446744073709551615, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, 18446744073709551615, 18446744073709551615, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, 18446744073709551615, 18446744073709551615, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, 18446744073709551615, 18446744073709551615, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, 18446744073709551615, 18446744073709551615, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, 18446744073709551615, 18446744073709551615, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, 18446744073709551615, 18446744073709551615, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, 18446744073709551615, 18446744073709551615, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/semantics.nlp.jsonl b/tests/data/texts/semantics.nlp.jsonl index 821624cb..fdda794a 100644 --- a/tests/data/texts/semantics.nlp.jsonl +++ b/tests/data/texts/semantics.nlp.jsonl @@ -1,7 +1,7 @@ -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 7759316032128614217, "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", "header", 0.7099999785423279]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14339562343989983509, "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", "meta-data", 0.800000011920929]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 18143996061359107703, "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", "meta-data", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "text"} -{"applied-models": ["link", "numval"], "dloc": "", "hash": 11035282656876697300, "model-application": {"message": "success", "success": true}, "orig": "ABSTRACT", "properties": {"data": [["semantic", "meta-data", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14993488697470108654, "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, 18446744073709551615, 18446744073709551615, 447, 449, 447, 449, 81, 82, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, 18446744073709551615, 18446744073709551615, 599, 602, 599, 602, 106, 107, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", "text", 0.9599999785423279]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 164218115435155290, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481728, 16379900111711101126, 18446744073709551615, 18446744073709551615, 95, 97, 95, 97, 26, 27, true, "39", "39"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, 18446744073709551615, 18446744073709551615, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["numval", "year", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", "reference", 0.8899999856948853]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, 18446744073709551615, 18446744073709551615, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, 18446744073709551615, 18446744073709551615, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, 18446744073709551615, 18446744073709551615, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", "reference", 0.9399999976158142]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 7759316032128614217, "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", 7759316032128614217, "TEXT", "#", "header", 0.5400000214576721]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14339562343989983509, "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", 14339562343989983509, "TEXT", "#", "meta-data", 0.9100000262260437]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 18143996061359107703, "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", 18143996061359107703, "TEXT", "#", "meta-data", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "text"} +{"applied-models": ["link", "numval"], "dloc": "", "hash": 11035282656876697300, "model-application": {"message": "success", "success": true}, "orig": "ABSTRACT", "properties": {"data": [["semantic", 11035282656876697300, "TEXT", "#", "meta-data", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14993488697470108654, "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, 18446744073709551615, 18446744073709551615, 447, 449, 447, 449, 81, 82, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, 18446744073709551615, 18446744073709551615, 599, 602, 599, 602, 106, 107, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", 14993488697470108654, "TEXT", "#", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 164218115435155290, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481728, 16379900111711101126, 18446744073709551615, 18446744073709551615, 95, 97, 95, 97, 26, 27, true, "39", "39"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, 18446744073709551615, 18446744073709551615, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["numval", "year", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, 18446744073709551615, 18446744073709551615, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, 18446744073709551615, 18446744073709551615, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, 18446744073709551615, 18446744073709551615, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/terms.nlp.jsonl b/tests/data/texts/terms.nlp.jsonl index c42f0ffa..2a927824 100644 --- a/tests/data/texts/terms.nlp.jsonl +++ b/tests/data/texts/terms.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 9818235231875948258, "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, 18446744073709551615, 18446744073709551615, 0, 177, 0, 164, 0, 35, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, 18446744073709551615, 18446744073709551615, 7, 31, 7, 26, 1, 7, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, 18446744073709551615, 18446744073709551615, 27, 30, 24, 25, 5, 6, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, 18446744073709551615, 18446744073709551615, 48, 63, 43, 58, 10, 12, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, 18446744073709551615, 18446744073709551615, 64, 122, 59, 109, 12, 22, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, 18446744073709551615, 18446744073709551615, 73, 95, 68, 88, 15, 17, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, 18446744073709551615, 18446744073709551615, 96, 106, 89, 98, 17, 19, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, 18446744073709551615, 18446744073709551615, 107, 121, 99, 108, 19, 21, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, 18446744073709551615, 18446744073709551615, 123, 127, 110, 114, 23, 26, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, 18446744073709551615, 18446744073709551615, 124, 126, 111, 113, 24, 25, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, 18446744073709551615, 18446744073709551615, 128, 130, 115, 117, 26, 27, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, 18446744073709551615, 18446744073709551615, 133, 140, 120, 127, 28, 29, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, 18446744073709551615, 18446744073709551615, 141, 158, 128, 145, 29, 31, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, 18446744073709551615, 18446744073709551615, 159, 161, 146, 148, 31, 32, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, 18446744073709551615, 18446744073709551615, 162, 176, 149, 163, 32, 34, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, 18446744073709551615, 18446744073709551615, 170, 176, 157, 163, 33, 34, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, 18446744073709551615, 18446744073709551615, 178, 375, 165, 362, 35, 67, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, 18446744073709551615, 18446744073709551615, 186, 194, 173, 181, 37, 38, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, 18446744073709551615, 18446744073709551615, 195, 211, 182, 198, 38, 40, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, 18446744073709551615, 18446744073709551615, 204, 227, 191, 214, 39, 42, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, 18446744073709551615, 18446744073709551615, 216, 227, 203, 214, 41, 42, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, 18446744073709551615, 18446744073709551615, 228, 234, 215, 221, 42, 44, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, 18446744073709551615, 18446744073709551615, 235, 243, 222, 230, 44, 45, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16827399947339178045, 496128657873109341, 18446744073709551615, 18446744073709551615, 252, 293, 239, 280, 47, 53, true, "Atlantic, Pacific and Indian oceans,[XII]", "Atlantic, Pacific and Indian oceans,[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, 18446744073709551615, 18446744073709551615, 252, 260, 239, 247, 47, 48, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, 18446744073709551615, 18446744073709551615, 262, 269, 249, 256, 49, 50, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 3575373331357445963, 1702692810903063225, 18446744073709551615, 18446744073709551615, 274, 293, 261, 280, 51, 53, true, "Indian oceans,[XII]", "Indian oceans,[XII]"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, 18446744073709551615, 18446744073709551615, 281, 293, 268, 280, 52, 53, true, "oceans,[XII]", "oceans,[XII]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 16381206561323757770, 14007677850696664277, 18446744073709551615, 18446744073709551615, 294, 300, 281, 287, 53, 54, true, "giving", "giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, 18446744073709551615, 18446744073709551615, 308, 314, 295, 301, 56, 58, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, 18446744073709551615, 18446744073709551615, 315, 361, 302, 348, 58, 63, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, 18446744073709551615, 18446744073709551615, 362, 368, 349, 355, 63, 65, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, 18446744073709551615, 18446744073709551615, 369, 374, 356, 361, 65, 66, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, 18446744073709551615, 18446744073709551615, 376, 637, 363, 624, 67, 118, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, 18446744073709551615, 18446744073709551615, 376, 410, 363, 397, 67, 71, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, 18446744073709551615, 18446744073709551615, 389, 395, 376, 382, 68, 69, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, 18446744073709551615, 18446744073709551615, 411, 415, 398, 402, 71, 72, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, 18446744073709551615, 18446744073709551615, 416, 438, 403, 425, 72, 75, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, 18446744073709551615, 18446744073709551615, 439, 445, 426, 432, 75, 77, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, 18446744073709551615, 18446744073709551615, 446, 451, 433, 438, 77, 78, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, 18446744073709551615, 18446744073709551615, 461, 467, 448, 454, 80, 82, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, 18446744073709551615, 18446744073709551615, 492, 498, 479, 485, 86, 88, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, 18446744073709551615, 18446744073709551615, 505, 521, 492, 508, 90, 93, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, 18446744073709551615, 18446744073709551615, 515, 521, 502, 508, 92, 93, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, 18446744073709551615, 18446744073709551615, 522, 528, 509, 515, 93, 95, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, 18446744073709551615, 18446744073709551615, 541, 558, 528, 545, 98, 101, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, 18446744073709551615, 18446744073709551615, 559, 565, 546, 552, 101, 103, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, 18446744073709551615, 18446744073709551615, 566, 571, 553, 558, 103, 104, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, 18446744073709551615, 18446744073709551615, 579, 594, 566, 581, 107, 109, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, 18446744073709551615, 18446744073709551615, 595, 603, 582, 590, 109, 111, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, 18446744073709551615, 18446744073709551615, 619, 625, 606, 612, 113, 115, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, 18446744073709551615, 18446744073709551615, 626, 636, 613, 623, 115, 117, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, 18446744073709551615, 18446744073709551615, 638, 961, 625, 948, 118, 176, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, 18446744073709551615, 18446744073709551615, 642, 659, 629, 646, 119, 121, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, 18446744073709551615, 18446744073709551615, 660, 667, 647, 654, 121, 122, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, 18446744073709551615, 18446744073709551615, 668, 676, 655, 663, 122, 124, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, 18446744073709551615, 18446744073709551615, 677, 682, 664, 669, 124, 125, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, 18446744073709551615, 18446744073709551615, 683, 689, 670, 676, 125, 127, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, 18446744073709551615, 18446744073709551615, 709, 717, 696, 704, 130, 132, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, 18446744073709551615, 18446744073709551615, 736, 742, 723, 729, 134, 136, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, 18446744073709551615, 18446744073709551615, 778, 798, 765, 785, 143, 145, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, 18446744073709551615, 18446744073709551615, 799, 806, 786, 793, 145, 146, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, 18446744073709551615, 18446744073709551615, 807, 820, 794, 807, 146, 148, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, 18446744073709551615, 18446744073709551615, 821, 823, 808, 810, 148, 149, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, 18446744073709551615, 18446744073709551615, 824, 864, 811, 851, 149, 156, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, 18446744073709551615, 18446744073709551615, 839, 851, 826, 838, 152, 154, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, 18446744073709551615, 18446744073709551615, 856, 864, 843, 851, 155, 156, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, 18446744073709551615, 18446744073709551615, 865, 871, 852, 858, 156, 158, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, 18446744073709551615, 18446744073709551615, 872, 886, 859, 873, 158, 160, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, 18446744073709551615, 18446744073709551615, 892, 910, 879, 897, 162, 165, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, 18446744073709551615, 18446744073709551615, 916, 928, 903, 915, 167, 169, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, 18446744073709551615, 18446744073709551615, 929, 931, 916, 918, 169, 170, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, 18446744073709551615, 18446744073709551615, 962, 1384, 949, 1371, 176, 254, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, 18446744073709551615, 18446744073709551615, 966, 991, 953, 978, 177, 180, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, 18446744073709551615, 18446744073709551615, 992, 1020, 979, 1007, 180, 187, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, 18446744073709551615, 18446744073709551615, 998, 1000, 985, 987, 182, 183, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, 18446744073709551615, 18446744073709551615, 1007, 1019, 994, 1006, 184, 186, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, 18446744073709551615, 18446744073709551615, 1021, 1025, 1008, 1012, 187, 188, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, 18446744073709551615, 18446744073709551615, 1028, 1036, 1015, 1023, 189, 190, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, 18446744073709551615, 18446744073709551615, 1037, 1041, 1024, 1028, 190, 191, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, 18446744073709551615, 18446744073709551615, 1042, 1044, 1029, 1031, 191, 192, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, 18446744073709551615, 18446744073709551615, 1045, 1052, 1032, 1039, 192, 193, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, 18446744073709551615, 18446744073709551615, 1057, 1072, 1044, 1059, 194, 199, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, 18446744073709551615, 18446744073709551615, 1058, 1065, 1045, 1052, 195, 196, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, 18446744073709551615, 18446744073709551615, 1066, 1071, 1053, 1058, 196, 198, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, 18446744073709551615, 18446744073709551615, 1077, 1081, 1064, 1068, 200, 201, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, 18446744073709551615, 18446744073709551615, 1084, 1100, 1071, 1087, 202, 204, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, 18446744073709551615, 18446744073709551615, 1101, 1103, 1088, 1090, 204, 205, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, 18446744073709551615, 18446744073709551615, 1104, 1108, 1091, 1095, 205, 206, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, 18446744073709551615, 18446744073709551615, 1109, 1111, 1096, 1098, 206, 207, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, 18446744073709551615, 18446744073709551615, 1112, 1119, 1099, 1106, 207, 208, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, 18446744073709551615, 18446744073709551615, 1120, 1122, 1107, 1109, 208, 209, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, 18446744073709551615, 18446744073709551615, 1123, 1125, 1110, 1112, 209, 210, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, 18446744073709551615, 18446744073709551615, 1126, 1133, 1113, 1120, 210, 211, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, 18446744073709551615, 18446744073709551615, 1134, 1145, 1121, 1132, 211, 212, true, "2023.[5][8]", "2023.[5][8]"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, 18446744073709551615, 18446744073709551615, 1153, 1155, 1140, 1142, 213, 214, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 101756270285429158, 6309445736017161690, 18446744073709551615, 18446744073709551615, 1158, 1192, 1145, 1179, 215, 218, true, "unitary semi-presidential republic", "unitary semi-presidential republic"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, 18446744073709551615, 18446744073709551615, 1166, 1183, 1153, 1170, 216, 217, true, "semi-presidential", "semi-presidential"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, 18446744073709551615, 18446744073709551615, 1193, 1197, 1180, 1184, 218, 219, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, 18446744073709551615, 18446744073709551615, 1202, 1209, 1189, 1196, 220, 221, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, 18446744073709551615, 18446744073709551615, 1210, 1212, 1197, 1199, 221, 222, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, 18446744073709551615, 18446744073709551615, 1213, 1218, 1200, 1205, 222, 223, true, "Paris", "Paris"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7060736712546470087, 14254659311922306724, 18446744073709551615, 18446744073709551615, 1224, 1246, 1211, 1233, 225, 228, true, "countrys largest city", "country's largest city"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, 18446744073709551615, 18446744073709551615, 1224, 1233, 1211, 1220, 225, 226, true, "countrys", "country's"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, 18446744073709551615, 18446744073709551615, 1251, 1286, 1238, 1273, 229, 234, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, 18446744073709551615, 18446744073709551615, 1269, 1286, 1256, 1273, 232, 234, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, 18446744073709551615, 18446744073709551615, 1288, 1311, 1275, 1298, 235, 239, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, 18446744073709551615, 18446744073709551615, 1312, 1319, 1299, 1306, 239, 240, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, 18446744073709551615, 18446744073709551615, 1320, 1383, 1307, 1370, 240, 253, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, 18446744073709551615, 18446744073709551615, 1320, 1329, 1307, 1316, 240, 241, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, 18446744073709551615, 18446744073709551615, 1331, 1335, 1318, 1322, 242, 243, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, 18446744073709551615, 18446744073709551615, 1337, 1345, 1324, 1332, 244, 245, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, 18446744073709551615, 18446744073709551615, 1347, 1352, 1334, 1339, 246, 247, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, 18446744073709551615, 18446744073709551615, 1354, 1362, 1341, 1349, 248, 249, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, 18446744073709551615, 18446744073709551615, 1364, 1374, 1351, 1361, 250, 251, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, 18446744073709551615, 18446744073709551615, 1379, 1383, 1366, 1370, 252, 253, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", "en", 0.9300000071525574], ["semantic", "text", 0.9599999785423279]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 4522339299074192207, "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, 18446744073709551615, 18446744073709551615, 0, 188, 0, 188, 0, 28, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, 18446744073709551615, 18446744073709551615, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, 18446744073709551615, 18446744073709551615, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, 18446744073709551615, 18446744073709551615, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004679635976, 18446744073709551615, 18446744073709551615, 24, 41, 24, 41, 5, 7, true, "interband pairing", "interband pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, 18446744073709551615, 18446744073709551615, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["term", "enum-term-mark-1", 4522339299074192207, "TEXT", "#", 1.0, 18178792033664231045, 5215905145529509301, 18446744073709551615, 18446744073709551615, 45, 87, 45, 87, 8, 13, true, "two-band s-wave and d-wave superconductors", "two-band s-wave and d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, 18446744073709551615, 18446744073709551615, 45, 53, 45, 53, 8, 9, true, "two-band", "two-band"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, 18446744073709551615, 18446744073709551615, 54, 60, 54, 60, 9, 10, true, "s-wave", "s-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15865120430118694837, 607662791561950043, 18446744073709551615, 18446744073709551615, 65, 87, 65, 87, 11, 13, true, "d-wave superconductors", "d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, 18446744073709551615, 18446744073709551615, 65, 71, 65, 71, 11, 12, true, "d-wave", "d-wave"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, 18446744073709551615, 18446744073709551615, 88, 92, 88, 92, 13, 14, true, "with", "with"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7066208506210013514, 1315102098090612032, 18446744073709551615, 18446744073709551615, 93, 105, 93, 105, 14, 16, true, "D4h symmetry", "D4h symmetry"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, 18446744073709551615, 18446744073709551615, 93, 96, 93, 96, 14, 15, true, "D4h", "D4h"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, 18446744073709551615, 18446744073709551615, 106, 113, 106, 113, 16, 18, true, "in both", "in both"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5172475826427571765, 16752879714615995236, 18446744073709551615, 18446744073709551615, 114, 137, 114, 137, 18, 20, true, "time-reversal invariant", "time-reversal invariant"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, 18446744073709551615, 18446744073709551615, 114, 127, 114, 127, 18, 19, true, "time-reversal", "time-reversal"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, 18446744073709551615, 18446744073709551615, 146, 148, 146, 148, 22, 23, true, "as", "as"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 10193294999568911218, 6331719907444433820, 18446744073709551615, 18446744073709551615, 149, 171, 149, 171, 23, 25, true, "time-reversal symmetry", "time-reversal symmetry"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, 18446744073709551615, 18446744073709551615, 149, 162, 149, 162, 23, 24, true, "time-reversal", "time-reversal"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, 18446744073709551615, 18446744073709551615, 172, 180, 172, 180, 25, 26, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, 18446744073709551615, 18446744073709551615, 181, 187, 181, 187, 26, 27, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, 18446744073709551615, 18446744073709551615, 189, 384, 189, 384, 28, 58, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, 18446744073709551615, 18446744073709551615, 193, 201, 193, 201, 29, 30, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 30, 31, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, 18446744073709551615, 18446744073709551615, 205, 214, 205, 214, 31, 32, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, 18446744073709551615, 18446744073709551615, 215, 244, 215, 244, 32, 35, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, 18446744073709551615, 18446744073709551615, 249, 264, 249, 264, 36, 38, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, 18446744073709551615, 18446744073709551615, 265, 271, 265, 271, 38, 40, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, 18446744073709551615, 18446744073709551615, 272, 286, 272, 286, 40, 41, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, 18446744073709551615, 18446744073709551615, 288, 293, 288, 293, 42, 43, true, "nodes", "nodes"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 3766089650286616147, 5895288868427388531, 18446744073709551615, 18446744073709551615, 294, 309, 294, 309, 43, 45, true, "can (dis)appear", "can (dis)appear"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, 18446744073709551615, 18446744073709551615, 298, 309, 298, 309, 44, 45, true, "(dis)appear", "(dis)appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, 18446744073709551615, 18446744073709551615, 311, 316, 311, 316, 46, 47, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, 18446744073709551615, 18446744073709551615, 322, 327, 322, 327, 49, 50, true, "leave", "leave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106670696871780136, 17807492235586576248, 18446744073709551615, 18446744073709551615, 328, 351, 328, 351, 50, 52, true, "high-symmetry locations", "high-symmetry locations"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, 18446744073709551615, 18446744073709551615, 328, 341, 328, 341, 50, 51, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, 18446744073709551615, 18446744073709551615, 357, 374, 357, 374, 53, 55, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, 18446744073709551615, 18446744073709551615, 375, 383, 375, 383, 55, 57, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, 18446744073709551615, 18446744073709551615, 385, 594, 385, 594, 58, 93, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, 18446744073709551615, 18446744073709551615, 398, 404, 398, 404, 60, 62, true, "in the", "in the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15559186615879240368, 12910915472651789195, 18446744073709551615, 18446744073709551615, 405, 416, 405, 416, 62, 64, true, "d-wave case", "d-wave case"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, 18446744073709551615, 18446744073709551615, 405, 411, 405, 411, 62, 63, true, "d-wave", "d-wave"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, 18446744073709551615, 18446744073709551615, 421, 425, 421, 425, 66, 67, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, 18446744073709551615, 18446744073709551615, 426, 430, 426, 430, 67, 68, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, 18446744073709551615, 18446744073709551615, 440, 454, 440, 454, 70, 72, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, 18446744073709551615, 18446744073709551615, 455, 475, 455, 475, 72, 74, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, 18446744073709551615, 18446744073709551615, 481, 490, 481, 490, 75, 76, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, 18446744073709551615, 18446744073709551615, 491, 498, 491, 498, 76, 77, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, 18446744073709551615, 18446744073709551615, 499, 508, 499, 508, 77, 78, true, "increases", "increases"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 18352755674675419019, 8051640294707098683, 18446744073709551615, 18446744073709551615, 510, 547, 510, 547, 79, 84, true, "flat zero-energy Andreev bound states", "flat zero-energy Andreev bound states"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, 18446744073709551615, 18446744073709551615, 515, 526, 515, 526, 80, 81, true, "zero-energy", "zero-energy"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, 18446744073709551615, 18446744073709551615, 548, 555, 548, 555, 84, 86, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, 18446744073709551615, 18446744073709551615, 560, 570, 560, 570, 87, 88, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, 18446744073709551615, 18446744073709551615, 571, 573, 571, 573, 88, 89, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, 18446744073709551615, 18446744073709551615, 574, 593, 574, 593, 89, 92, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", "en", 0.8799999952316284], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} +{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 9818235231875948258, "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, 18446744073709551615, 18446744073709551615, 0, 177, 0, 164, 0, 35, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, 18446744073709551615, 18446744073709551615, 7, 31, 7, 26, 1, 7, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, 18446744073709551615, 18446744073709551615, 27, 30, 24, 25, 5, 6, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, 18446744073709551615, 18446744073709551615, 48, 63, 43, 58, 10, 12, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, 18446744073709551615, 18446744073709551615, 64, 122, 59, 109, 12, 22, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, 18446744073709551615, 18446744073709551615, 73, 95, 68, 88, 15, 17, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, 18446744073709551615, 18446744073709551615, 96, 106, 89, 98, 17, 19, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, 18446744073709551615, 18446744073709551615, 107, 121, 99, 108, 19, 21, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, 18446744073709551615, 18446744073709551615, 123, 127, 110, 114, 23, 26, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, 18446744073709551615, 18446744073709551615, 124, 126, 111, 113, 24, 25, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, 18446744073709551615, 18446744073709551615, 128, 130, 115, 117, 26, 27, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, 18446744073709551615, 18446744073709551615, 133, 140, 120, 127, 28, 29, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, 18446744073709551615, 18446744073709551615, 141, 158, 128, 145, 29, 31, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, 18446744073709551615, 18446744073709551615, 159, 161, 146, 148, 31, 32, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, 18446744073709551615, 18446744073709551615, 162, 176, 149, 163, 32, 34, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, 18446744073709551615, 18446744073709551615, 170, 176, 157, 163, 33, 34, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, 18446744073709551615, 18446744073709551615, 178, 375, 165, 362, 35, 67, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, 18446744073709551615, 18446744073709551615, 186, 194, 173, 181, 37, 38, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, 18446744073709551615, 18446744073709551615, 195, 211, 182, 198, 38, 40, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, 18446744073709551615, 18446744073709551615, 204, 227, 191, 214, 39, 42, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, 18446744073709551615, 18446744073709551615, 216, 227, 203, 214, 41, 42, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, 18446744073709551615, 18446744073709551615, 228, 234, 215, 221, 42, 44, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, 18446744073709551615, 18446744073709551615, 235, 243, 222, 230, 44, 45, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16827399947339178045, 496128657873109341, 18446744073709551615, 18446744073709551615, 252, 293, 239, 280, 47, 53, true, "Atlantic, Pacific and Indian oceans,[XII]", "Atlantic, Pacific and Indian oceans,[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, 18446744073709551615, 18446744073709551615, 252, 260, 239, 247, 47, 48, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, 18446744073709551615, 18446744073709551615, 262, 269, 249, 256, 49, 50, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 3575373331357445963, 1702692810903063225, 18446744073709551615, 18446744073709551615, 274, 293, 261, 280, 51, 53, true, "Indian oceans,[XII]", "Indian oceans,[XII]"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, 18446744073709551615, 18446744073709551615, 281, 293, 268, 280, 52, 53, true, "oceans,[XII]", "oceans,[XII]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 16381206561323757770, 14007677850696664277, 18446744073709551615, 18446744073709551615, 294, 300, 281, 287, 53, 54, true, "giving", "giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, 18446744073709551615, 18446744073709551615, 308, 314, 295, 301, 56, 58, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, 18446744073709551615, 18446744073709551615, 315, 361, 302, 348, 58, 63, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, 18446744073709551615, 18446744073709551615, 362, 368, 349, 355, 63, 65, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, 18446744073709551615, 18446744073709551615, 369, 374, 356, 361, 65, 66, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, 18446744073709551615, 18446744073709551615, 376, 637, 363, 624, 67, 118, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, 18446744073709551615, 18446744073709551615, 376, 410, 363, 397, 67, 71, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, 18446744073709551615, 18446744073709551615, 389, 395, 376, 382, 68, 69, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, 18446744073709551615, 18446744073709551615, 411, 415, 398, 402, 71, 72, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, 18446744073709551615, 18446744073709551615, 416, 438, 403, 425, 72, 75, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, 18446744073709551615, 18446744073709551615, 439, 445, 426, 432, 75, 77, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, 18446744073709551615, 18446744073709551615, 446, 451, 433, 438, 77, 78, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, 18446744073709551615, 18446744073709551615, 461, 467, 448, 454, 80, 82, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, 18446744073709551615, 18446744073709551615, 492, 498, 479, 485, 86, 88, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, 18446744073709551615, 18446744073709551615, 505, 521, 492, 508, 90, 93, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, 18446744073709551615, 18446744073709551615, 515, 521, 502, 508, 92, 93, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, 18446744073709551615, 18446744073709551615, 522, 528, 509, 515, 93, 95, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, 18446744073709551615, 18446744073709551615, 541, 558, 528, 545, 98, 101, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, 18446744073709551615, 18446744073709551615, 559, 565, 546, 552, 101, 103, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, 18446744073709551615, 18446744073709551615, 566, 571, 553, 558, 103, 104, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, 18446744073709551615, 18446744073709551615, 579, 594, 566, 581, 107, 109, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, 18446744073709551615, 18446744073709551615, 595, 603, 582, 590, 109, 111, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, 18446744073709551615, 18446744073709551615, 619, 625, 606, 612, 113, 115, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, 18446744073709551615, 18446744073709551615, 626, 636, 613, 623, 115, 117, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, 18446744073709551615, 18446744073709551615, 638, 961, 625, 948, 118, 176, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, 18446744073709551615, 18446744073709551615, 642, 659, 629, 646, 119, 121, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, 18446744073709551615, 18446744073709551615, 660, 667, 647, 654, 121, 122, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, 18446744073709551615, 18446744073709551615, 668, 676, 655, 663, 122, 124, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, 18446744073709551615, 18446744073709551615, 677, 682, 664, 669, 124, 125, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, 18446744073709551615, 18446744073709551615, 683, 689, 670, 676, 125, 127, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, 18446744073709551615, 18446744073709551615, 709, 717, 696, 704, 130, 132, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, 18446744073709551615, 18446744073709551615, 736, 742, 723, 729, 134, 136, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, 18446744073709551615, 18446744073709551615, 778, 798, 765, 785, 143, 145, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, 18446744073709551615, 18446744073709551615, 799, 806, 786, 793, 145, 146, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, 18446744073709551615, 18446744073709551615, 807, 820, 794, 807, 146, 148, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, 18446744073709551615, 18446744073709551615, 821, 823, 808, 810, 148, 149, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, 18446744073709551615, 18446744073709551615, 824, 864, 811, 851, 149, 156, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, 18446744073709551615, 18446744073709551615, 839, 851, 826, 838, 152, 154, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, 18446744073709551615, 18446744073709551615, 856, 864, 843, 851, 155, 156, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, 18446744073709551615, 18446744073709551615, 865, 871, 852, 858, 156, 158, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, 18446744073709551615, 18446744073709551615, 872, 886, 859, 873, 158, 160, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, 18446744073709551615, 18446744073709551615, 892, 910, 879, 897, 162, 165, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, 18446744073709551615, 18446744073709551615, 916, 928, 903, 915, 167, 169, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, 18446744073709551615, 18446744073709551615, 929, 931, 916, 918, 169, 170, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, 18446744073709551615, 18446744073709551615, 962, 1384, 949, 1371, 176, 254, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, 18446744073709551615, 18446744073709551615, 966, 991, 953, 978, 177, 180, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, 18446744073709551615, 18446744073709551615, 992, 1020, 979, 1007, 180, 187, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, 18446744073709551615, 18446744073709551615, 998, 1000, 985, 987, 182, 183, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, 18446744073709551615, 18446744073709551615, 1007, 1019, 994, 1006, 184, 186, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, 18446744073709551615, 18446744073709551615, 1021, 1025, 1008, 1012, 187, 188, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, 18446744073709551615, 18446744073709551615, 1028, 1036, 1015, 1023, 189, 190, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, 18446744073709551615, 18446744073709551615, 1037, 1041, 1024, 1028, 190, 191, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, 18446744073709551615, 18446744073709551615, 1042, 1044, 1029, 1031, 191, 192, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, 18446744073709551615, 18446744073709551615, 1045, 1052, 1032, 1039, 192, 193, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, 18446744073709551615, 18446744073709551615, 1057, 1072, 1044, 1059, 194, 199, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, 18446744073709551615, 18446744073709551615, 1058, 1065, 1045, 1052, 195, 196, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, 18446744073709551615, 18446744073709551615, 1066, 1071, 1053, 1058, 196, 198, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, 18446744073709551615, 18446744073709551615, 1077, 1081, 1064, 1068, 200, 201, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, 18446744073709551615, 18446744073709551615, 1084, 1100, 1071, 1087, 202, 204, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, 18446744073709551615, 18446744073709551615, 1101, 1103, 1088, 1090, 204, 205, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, 18446744073709551615, 18446744073709551615, 1104, 1108, 1091, 1095, 205, 206, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, 18446744073709551615, 18446744073709551615, 1109, 1111, 1096, 1098, 206, 207, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, 18446744073709551615, 18446744073709551615, 1112, 1119, 1099, 1106, 207, 208, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, 18446744073709551615, 18446744073709551615, 1120, 1122, 1107, 1109, 208, 209, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, 18446744073709551615, 18446744073709551615, 1123, 1125, 1110, 1112, 209, 210, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, 18446744073709551615, 18446744073709551615, 1126, 1133, 1113, 1120, 210, 211, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, 18446744073709551615, 18446744073709551615, 1134, 1145, 1121, 1132, 211, 212, true, "2023.[5][8]", "2023.[5][8]"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, 18446744073709551615, 18446744073709551615, 1153, 1155, 1140, 1142, 213, 214, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 101756270285429158, 6309445736017161690, 18446744073709551615, 18446744073709551615, 1158, 1192, 1145, 1179, 215, 218, true, "unitary semi-presidential republic", "unitary semi-presidential republic"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, 18446744073709551615, 18446744073709551615, 1166, 1183, 1153, 1170, 216, 217, true, "semi-presidential", "semi-presidential"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, 18446744073709551615, 18446744073709551615, 1193, 1197, 1180, 1184, 218, 219, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, 18446744073709551615, 18446744073709551615, 1202, 1209, 1189, 1196, 220, 221, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, 18446744073709551615, 18446744073709551615, 1210, 1212, 1197, 1199, 221, 222, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, 18446744073709551615, 18446744073709551615, 1213, 1218, 1200, 1205, 222, 223, true, "Paris", "Paris"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7060736712546470087, 14254659311922306724, 18446744073709551615, 18446744073709551615, 1224, 1246, 1211, 1233, 225, 228, true, "countrys largest city", "country's largest city"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, 18446744073709551615, 18446744073709551615, 1224, 1233, 1211, 1220, 225, 226, true, "countrys", "country's"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, 18446744073709551615, 18446744073709551615, 1251, 1286, 1238, 1273, 229, 234, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, 18446744073709551615, 18446744073709551615, 1269, 1286, 1256, 1273, 232, 234, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, 18446744073709551615, 18446744073709551615, 1288, 1311, 1275, 1298, 235, 239, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, 18446744073709551615, 18446744073709551615, 1312, 1319, 1299, 1306, 239, 240, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, 18446744073709551615, 18446744073709551615, 1320, 1383, 1307, 1370, 240, 253, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, 18446744073709551615, 18446744073709551615, 1320, 1329, 1307, 1316, 240, 241, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, 18446744073709551615, 18446744073709551615, 1331, 1335, 1318, 1322, 242, 243, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, 18446744073709551615, 18446744073709551615, 1337, 1345, 1324, 1332, 244, 245, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, 18446744073709551615, 18446744073709551615, 1347, 1352, 1334, 1339, 246, 247, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, 18446744073709551615, 18446744073709551615, 1354, 1362, 1341, 1349, 248, 249, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, 18446744073709551615, 18446744073709551615, 1364, 1374, 1351, 1361, 250, 251, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, 18446744073709551615, 18446744073709551615, 1379, 1383, 1366, 1370, 252, 253, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.9300000071525574], ["semantic", 9818235231875948258, "TEXT", "#", "text", 0.8100000023841858]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} +{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 4522339299074192207, "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, 18446744073709551615, 18446744073709551615, 0, 188, 0, 188, 0, 28, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, 18446744073709551615, 18446744073709551615, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, 18446744073709551615, 18446744073709551615, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, 18446744073709551615, 18446744073709551615, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004679635976, 18446744073709551615, 18446744073709551615, 24, 41, 24, 41, 5, 7, true, "interband pairing", "interband pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, 18446744073709551615, 18446744073709551615, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["term", "enum-term-mark-1", 4522339299074192207, "TEXT", "#", 1.0, 18178792033664231045, 5215905145529509301, 18446744073709551615, 18446744073709551615, 45, 87, 45, 87, 8, 13, true, "two-band s-wave and d-wave superconductors", "two-band s-wave and d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, 18446744073709551615, 18446744073709551615, 45, 53, 45, 53, 8, 9, true, "two-band", "two-band"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, 18446744073709551615, 18446744073709551615, 54, 60, 54, 60, 9, 10, true, "s-wave", "s-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15865120430118694837, 607662791561950043, 18446744073709551615, 18446744073709551615, 65, 87, 65, 87, 11, 13, true, "d-wave superconductors", "d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, 18446744073709551615, 18446744073709551615, 65, 71, 65, 71, 11, 12, true, "d-wave", "d-wave"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, 18446744073709551615, 18446744073709551615, 88, 92, 88, 92, 13, 14, true, "with", "with"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7066208506210013514, 1315102098090612032, 18446744073709551615, 18446744073709551615, 93, 105, 93, 105, 14, 16, true, "D4h symmetry", "D4h symmetry"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, 18446744073709551615, 18446744073709551615, 93, 96, 93, 96, 14, 15, true, "D4h", "D4h"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, 18446744073709551615, 18446744073709551615, 106, 113, 106, 113, 16, 18, true, "in both", "in both"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5172475826427571765, 16752879714615995236, 18446744073709551615, 18446744073709551615, 114, 137, 114, 137, 18, 20, true, "time-reversal invariant", "time-reversal invariant"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, 18446744073709551615, 18446744073709551615, 114, 127, 114, 127, 18, 19, true, "time-reversal", "time-reversal"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, 18446744073709551615, 18446744073709551615, 146, 148, 146, 148, 22, 23, true, "as", "as"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 10193294999568911218, 6331719907444433820, 18446744073709551615, 18446744073709551615, 149, 171, 149, 171, 23, 25, true, "time-reversal symmetry", "time-reversal symmetry"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, 18446744073709551615, 18446744073709551615, 149, 162, 149, 162, 23, 24, true, "time-reversal", "time-reversal"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, 18446744073709551615, 18446744073709551615, 172, 180, 172, 180, 25, 26, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, 18446744073709551615, 18446744073709551615, 181, 187, 181, 187, 26, 27, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, 18446744073709551615, 18446744073709551615, 189, 384, 189, 384, 28, 58, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, 18446744073709551615, 18446744073709551615, 193, 201, 193, 201, 29, 30, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 30, 31, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, 18446744073709551615, 18446744073709551615, 205, 214, 205, 214, 31, 32, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, 18446744073709551615, 18446744073709551615, 215, 244, 215, 244, 32, 35, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, 18446744073709551615, 18446744073709551615, 249, 264, 249, 264, 36, 38, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, 18446744073709551615, 18446744073709551615, 265, 271, 265, 271, 38, 40, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, 18446744073709551615, 18446744073709551615, 272, 286, 272, 286, 40, 41, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, 18446744073709551615, 18446744073709551615, 288, 293, 288, 293, 42, 43, true, "nodes", "nodes"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 3766089650286616147, 5895288868427388531, 18446744073709551615, 18446744073709551615, 294, 309, 294, 309, 43, 45, true, "can (dis)appear", "can (dis)appear"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, 18446744073709551615, 18446744073709551615, 298, 309, 298, 309, 44, 45, true, "(dis)appear", "(dis)appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, 18446744073709551615, 18446744073709551615, 311, 316, 311, 316, 46, 47, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, 18446744073709551615, 18446744073709551615, 322, 327, 322, 327, 49, 50, true, "leave", "leave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106670696871780136, 17807492235586576248, 18446744073709551615, 18446744073709551615, 328, 351, 328, 351, 50, 52, true, "high-symmetry locations", "high-symmetry locations"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, 18446744073709551615, 18446744073709551615, 328, 341, 328, 341, 50, 51, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, 18446744073709551615, 18446744073709551615, 357, 374, 357, 374, 53, 55, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, 18446744073709551615, 18446744073709551615, 375, 383, 375, 383, 55, 57, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, 18446744073709551615, 18446744073709551615, 385, 594, 385, 594, 58, 93, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, 18446744073709551615, 18446744073709551615, 398, 404, 398, 404, 60, 62, true, "in the", "in the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15559186615879240368, 12910915472651789195, 18446744073709551615, 18446744073709551615, 405, 416, 405, 416, 62, 64, true, "d-wave case", "d-wave case"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, 18446744073709551615, 18446744073709551615, 405, 411, 405, 411, 62, 63, true, "d-wave", "d-wave"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, 18446744073709551615, 18446744073709551615, 421, 425, 421, 425, 66, 67, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, 18446744073709551615, 18446744073709551615, 426, 430, 426, 430, 67, 68, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, 18446744073709551615, 18446744073709551615, 440, 454, 440, 454, 70, 72, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, 18446744073709551615, 18446744073709551615, 455, 475, 455, 475, 72, 74, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, 18446744073709551615, 18446744073709551615, 481, 490, 481, 490, 75, 76, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, 18446744073709551615, 18446744073709551615, 491, 498, 491, 498, 76, 77, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, 18446744073709551615, 18446744073709551615, 499, 508, 499, 508, 77, 78, true, "increases", "increases"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 18352755674675419019, 8051640294707098683, 18446744073709551615, 18446744073709551615, 510, 547, 510, 547, 79, 84, true, "flat zero-energy Andreev bound states", "flat zero-energy Andreev bound states"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, 18446744073709551615, 18446744073709551615, 515, 526, 515, 526, 80, 81, true, "zero-energy", "zero-energy"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, 18446744073709551615, 18446744073709551615, 548, 555, 548, 555, 84, 86, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, 18446744073709551615, 18446744073709551615, 560, 570, 560, 570, 87, 88, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, 18446744073709551615, 18446744073709551615, 571, 573, 571, 573, 88, 89, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, 18446744073709551615, 18446744073709551615, 574, 593, 574, 593, 89, 92, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.8799999952316284], ["semantic", 4522339299074192207, "TEXT", "#", "text", 0.9399999976158142]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} diff --git a/tests/data/texts/test_02A_text_01.jsonl b/tests/data/texts/test_02A_text_01.jsonl index 91bccab4..ac2358c7 100644 --- a/tests/data/texts/test_02A_text_01.jsonl +++ b/tests/data/texts/test_02A_text_01.jsonl @@ -1 +1 @@ -{"applied-models": ["cite", "expression", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "", "hash": 253473544312511038, "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, 18446744073709551615, 18446744073709551615, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, 18446744073709551615, 18446744073709551615, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, 18446744073709551615, 18446744073709551615, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", "en", 0.5799999833106995]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"applied-models": ["cite", "expression", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "", "hash": 253473544312511038, "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, 18446744073709551615, 18446744073709551615, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, 18446744073709551615, 18446744073709551615, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, 18446744073709551615, 18446744073709551615, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} diff --git a/tests/data/texts/test_02B_text_01.jsonl b/tests/data/texts/test_02B_text_01.jsonl index 0bc897d5..b59cedd9 100644 --- a/tests/data/texts/test_02B_text_01.jsonl +++ b/tests/data/texts/test_02B_text_01.jsonl @@ -1 +1 @@ -{"dloc": "", "hash": 253473544312511038, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", "en", 0.5799999833106995]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"dloc": "", "hash": 253473544312511038, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.5799999833106995]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} diff --git a/tests/test_nlp.py b/tests/test_nlp.py index ed5715e5..38eeefd2 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -11,6 +11,12 @@ GENERATE=False +def round_floats(o): + if isinstance(o, float): return round(o, 2) + if isinstance(o, dict): return {k: round_floats(v) for k, v in o.items()} + if isinstance(o, (list, tuple)): return [round_floats(x) for x in o] + return o + def test_01_load_nlp_models(): models = load_pretrained_nlp_models() #print(f"models: {models}") @@ -37,6 +43,8 @@ def test_02A_run_nlp_models_on_text(): model = init_nlp_model("sentence;language;term") sres = model.apply_on_text("FeSe is a material.") + sres = round_floats(sres) + if GENERATE: # generate the test-data fw = open(source, "w") @@ -49,7 +57,8 @@ def test_02A_run_nlp_models_on_text(): with open(target) as fr: tres = json.load(fr) - + tres = round_floats(tres) + for label in ["properties", "instances"]: check_dimensions(sres[label]) assert label in sres @@ -192,14 +201,10 @@ def test_04A_terms(): for i,row_i in enumerate(res["properties"]["data"]): row_j = data["properties"]["data"][i] - #print(i, "\t", row_i) - #print(i, "\t", row_j) assert row_i==row_j for i,row_i in enumerate(res["instances"]["data"]): row_j = data["instances"]["data"][i] - #print(i, "\t", row_i) - #print(i, "\t", row_j) assert row_i==row_j assert res==data @@ -273,7 +278,6 @@ def test_04C_references(): for line in lines: data = json.loads(line) res = model.apply_on_text(data["text"]) - assert res==data """ From 6796127be41e07b805646019f52086c90ce5a0ea Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Wed, 15 Nov 2023 07:06:40 +0100 Subject: [PATCH 05/22] working on the tests Signed-off-by: Peter Staar --- tests/test_nlp.py | 50 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/tests/test_nlp.py b/tests/test_nlp.py index 38eeefd2..1dc8512f 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -41,8 +41,8 @@ def test_02A_run_nlp_models_on_text(): target = source model = init_nlp_model("sentence;language;term") + sres = model.apply_on_text("FeSe is a material.") - sres = round_floats(sres) if GENERATE: # generate the test-data @@ -76,8 +76,10 @@ def test_02B_run_nlp_models_on_text(): filters = ["properties"] model = init_nlp_model("sentence;language;term", filters) - sres = model.apply_on_text("FeSe is a material.") + sres = model.apply_on_text("FeSe is a material.") + sres = round_floats(sres) + if GENERATE: # generate the test-data fw = open(source, "w") @@ -90,7 +92,8 @@ def test_02B_run_nlp_models_on_text(): with open(target) as fr: tres = json.load(fr) - + tres = round_floats(tres) + for label in ["text", "properties"]: assert label in sres @@ -105,9 +108,10 @@ def test_03A_run_nlp_models_on_document(): doc = json.load(fr) model = init_nlp_model("sentence;language;term;reference;abbreviation") - res = model.apply_on_doc(doc) - #print(res.keys()) + res = model.apply_on_doc(doc) + res = round_floats(res) + for label in ["description", "body", "meta", "page-elements", "texts", "tables", "figures", "properties", "instances", "relations"]: @@ -125,8 +129,9 @@ def test_03B_run_nlp_models_on_document(): filters = ["applied-models", "properties"] model = init_nlp_model("sentence;language;term;reference", filters) + res = model.apply_on_doc(doc) - #print(res.keys()) + res = round_floats(res) for label in ["dloc", "applied-models", "description", "body", "meta", @@ -139,6 +144,7 @@ def test_03B_run_nlp_models_on_document(): check_dimensions(res["properties"]) +""" def test_03C_run_nlp_models_on_document(): model = init_nlp_model("language;semantic;sentence;term;verb;conn;geoloc;reference") @@ -151,6 +157,8 @@ def test_03C_run_nlp_models_on_document(): doc = json.load(fr) res = model.apply_on_doc(doc) + res = round_floats(res) + extract_references_from_doc(res) fw = open(target, "w") @@ -164,11 +172,14 @@ def test_03C_run_nlp_models_on_document(): sdoc = json.load(fr) res = model.apply_on_doc(sdoc) + res = round_floats(res) with open(target) as fr: tdoc = json.load(fr) - + tdoc = round_floats(tdoc) + assert res==tdoc +""" def test_04A_terms(): @@ -185,8 +196,11 @@ def test_04A_terms(): for line in lines: data = json.loads(line) + data = round_floats(data) + res = model.apply_on_text(data["text"]) - + res = round_floats(res) + fw.write(json.dumps(res)+"\n") fw.close() @@ -197,8 +211,11 @@ def test_04A_terms(): for line in lines: data = json.loads(line) - res = model.apply_on_text(data["text"]) + data = round_floats(data) + res = model.apply_on_text(data["text"]) + res = round_floats(res) + for i,row_i in enumerate(res["properties"]["data"]): row_j = data["properties"]["data"][i] assert row_i==row_j @@ -227,7 +244,10 @@ def test_04B_semantic(): for line in lines: data = json.loads(line) + data = round_floats(data) + res = model.apply_on_text(data["text"]) + res = round_floats(res) fw.write(json.dumps(res)+"\n") @@ -240,8 +260,11 @@ def test_04B_semantic(): for line in lines: data = json.loads(line) + data = round_floats(data) + res = model.apply_on_text(data["text"]) - + res = round_floats(res) + for i,row_i in enumerate(res["properties"]["data"]): row_j = data["properties"]["data"][i] assert row_i==row_j @@ -264,7 +287,10 @@ def test_04C_references(): for line in lines: data = json.loads(line) + data = round_floats(data) + res = model.apply_on_text(data["text"]) + res = round_floats(res) fw.write(json.dumps(res)+"\n") @@ -277,7 +303,11 @@ def test_04C_references(): for line in lines: data = json.loads(line) + data = round_floats(data) + res = model.apply_on_text(data["text"]) + res = round_floats(res) + assert res==data """ From b64385c91efeae6d363b0520ce8ecee3d7c99fb6 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Wed, 15 Nov 2023 07:42:19 +0100 Subject: [PATCH 06/22] working on the tests (2) Signed-off-by: Peter Staar --- tests/data/texts/references.nlp.jsonl | 2 +- tests/data/texts/semantics.nlp.jsonl | 10 +++++----- tests/data/texts/terms.nlp.jsonl | 4 ++-- tests/data/texts/test_02B_text_01.jsonl | 2 +- tests/test_nlp.py | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/data/texts/references.nlp.jsonl b/tests/data/texts/references.nlp.jsonl index 5b3f1000..58dfb0bd 100644 --- a/tests/data/texts/references.nlp.jsonl +++ b/tests/data/texts/references.nlp.jsonl @@ -1,2 +1,2 @@ {"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["reference", "citation-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3089690646178643593, 8143668872857370346, 18446744073709551615, 18446744073709551615, 4, 16, 4, 16, 3, 6, true, "J. Nagamatsu", "J. Nagamatsu"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 4958313984110456424, 12758216704979571657, 18446744073709551615, 18446744073709551615, 18, 29, 18, 29, 7, 10, true, "N. Nakagawa", "N. Nakagawa"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 11365016642846088308, 8423174012903247560, 18446744073709551615, 18446744073709551615, 31, 42, 31, 42, 11, 14, true, "T. Muranaka", "T. Muranaka"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3078907282781552519, 13717640772378057531, 18446744073709551615, 18446744073709551615, 44, 55, 44, 55, 15, 18, true, "Y. Zenitani", "Y. Zenitani"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 1988581422311921121, 4091673062415006471, 18446744073709551615, 18446744073709551615, 61, 72, 61, 72, 20, 23, true, "J. Akimitsu", "J. Akimitsu"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164561, 18446744073709551615, 18446744073709551615, 74, 120, 74, 120, 24, 31, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, 18446744073709551615, 18446744073709551615, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, 18446744073709551615, 18446744073709551615, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, 18446744073709551615, 18446744073709551615, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, 18446744073709551615, 18446744073709551615, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, 18446744073709551615, 18446744073709551615, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, 18446744073709551615, 18446744073709551615, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, 18446744073709551615, 18446744073709551615, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, 18446744073709551615, 18446744073709551615, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, 18446744073709551615, 18446744073709551615, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, 18446744073709551615, 18446744073709551615, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, 18446744073709551615, 18446744073709551615, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, 18446744073709551615, 18446744073709551615, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, 18446744073709551615, 18446744073709551615, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, 18446744073709551615, 18446744073709551615, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, 18446744073709551615, 18446744073709551615, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, 18446744073709551615, 18446744073709551615, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, 18446744073709551615, 18446744073709551615, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, 18446744073709551615, 18446744073709551615, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, 18446744073709551615, 18446744073709551615, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, 18446744073709551615, 18446744073709551615, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, 18446744073709551615, 18446744073709551615, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, 18446744073709551615, 18446744073709551615, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, 18446744073709551615, 18446744073709551615, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/semantics.nlp.jsonl b/tests/data/texts/semantics.nlp.jsonl index fdda794a..3e6cbe02 100644 --- a/tests/data/texts/semantics.nlp.jsonl +++ b/tests/data/texts/semantics.nlp.jsonl @@ -1,7 +1,7 @@ -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 7759316032128614217, "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", 7759316032128614217, "TEXT", "#", "header", 0.5400000214576721]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14339562343989983509, "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", 14339562343989983509, "TEXT", "#", "meta-data", 0.9100000262260437]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 18143996061359107703, "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", 18143996061359107703, "TEXT", "#", "meta-data", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 7759316032128614217, "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", 7759316032128614217, "TEXT", "#", "subtitle", 0.51]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14339562343989983509, "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", 14339562343989983509, "TEXT", "#", "paragraph", 0.85]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 18143996061359107703, "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", 18143996061359107703, "TEXT", "#", "reference", 0.71]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "text"} {"applied-models": ["link", "numval"], "dloc": "", "hash": 11035282656876697300, "model-application": {"message": "success", "success": true}, "orig": "ABSTRACT", "properties": {"data": [["semantic", 11035282656876697300, "TEXT", "#", "meta-data", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14993488697470108654, "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, 18446744073709551615, 18446744073709551615, 447, 449, 447, 449, 81, 82, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, 18446744073709551615, 18446744073709551615, 599, 602, 599, 602, 106, 107, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", 14993488697470108654, "TEXT", "#", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 164218115435155290, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14993488697470108654, "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, 18446744073709551615, 18446744073709551615, 447, 449, 447, 449, 81, 82, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, 18446744073709551615, 18446744073709551615, 599, 602, 599, 602, 106, 107, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", 14993488697470108654, "TEXT", "#", "paragraph", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 164218115435155290, "type": "text"} {"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481728, 16379900111711101126, 18446744073709551615, 18446744073709551615, 95, 97, 95, 97, 26, 27, true, "39", "39"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, 18446744073709551615, 18446744073709551615, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["numval", "year", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, 18446744073709551615, 18446744073709551615, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, 18446744073709551615, 18446744073709551615, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, 18446744073709551615, 18446744073709551615, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.9900000095367432]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, 18446744073709551615, 18446744073709551615, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, 18446744073709551615, 18446744073709551615, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, 18446744073709551615, 18446744073709551615, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/terms.nlp.jsonl b/tests/data/texts/terms.nlp.jsonl index 2a927824..32250ccb 100644 --- a/tests/data/texts/terms.nlp.jsonl +++ b/tests/data/texts/terms.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 9818235231875948258, "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, 18446744073709551615, 18446744073709551615, 0, 177, 0, 164, 0, 35, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, 18446744073709551615, 18446744073709551615, 7, 31, 7, 26, 1, 7, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, 18446744073709551615, 18446744073709551615, 27, 30, 24, 25, 5, 6, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, 18446744073709551615, 18446744073709551615, 48, 63, 43, 58, 10, 12, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, 18446744073709551615, 18446744073709551615, 64, 122, 59, 109, 12, 22, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, 18446744073709551615, 18446744073709551615, 73, 95, 68, 88, 15, 17, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, 18446744073709551615, 18446744073709551615, 96, 106, 89, 98, 17, 19, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, 18446744073709551615, 18446744073709551615, 107, 121, 99, 108, 19, 21, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, 18446744073709551615, 18446744073709551615, 123, 127, 110, 114, 23, 26, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, 18446744073709551615, 18446744073709551615, 124, 126, 111, 113, 24, 25, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, 18446744073709551615, 18446744073709551615, 128, 130, 115, 117, 26, 27, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, 18446744073709551615, 18446744073709551615, 133, 140, 120, 127, 28, 29, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, 18446744073709551615, 18446744073709551615, 141, 158, 128, 145, 29, 31, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, 18446744073709551615, 18446744073709551615, 159, 161, 146, 148, 31, 32, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, 18446744073709551615, 18446744073709551615, 162, 176, 149, 163, 32, 34, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, 18446744073709551615, 18446744073709551615, 170, 176, 157, 163, 33, 34, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, 18446744073709551615, 18446744073709551615, 178, 375, 165, 362, 35, 67, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, 18446744073709551615, 18446744073709551615, 186, 194, 173, 181, 37, 38, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, 18446744073709551615, 18446744073709551615, 195, 211, 182, 198, 38, 40, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, 18446744073709551615, 18446744073709551615, 204, 227, 191, 214, 39, 42, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, 18446744073709551615, 18446744073709551615, 216, 227, 203, 214, 41, 42, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, 18446744073709551615, 18446744073709551615, 228, 234, 215, 221, 42, 44, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, 18446744073709551615, 18446744073709551615, 235, 243, 222, 230, 44, 45, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16827399947339178045, 496128657873109341, 18446744073709551615, 18446744073709551615, 252, 293, 239, 280, 47, 53, true, "Atlantic, Pacific and Indian oceans,[XII]", "Atlantic, Pacific and Indian oceans,[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, 18446744073709551615, 18446744073709551615, 252, 260, 239, 247, 47, 48, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, 18446744073709551615, 18446744073709551615, 262, 269, 249, 256, 49, 50, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 3575373331357445963, 1702692810903063225, 18446744073709551615, 18446744073709551615, 274, 293, 261, 280, 51, 53, true, "Indian oceans,[XII]", "Indian oceans,[XII]"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, 18446744073709551615, 18446744073709551615, 281, 293, 268, 280, 52, 53, true, "oceans,[XII]", "oceans,[XII]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 16381206561323757770, 14007677850696664277, 18446744073709551615, 18446744073709551615, 294, 300, 281, 287, 53, 54, true, "giving", "giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, 18446744073709551615, 18446744073709551615, 308, 314, 295, 301, 56, 58, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, 18446744073709551615, 18446744073709551615, 315, 361, 302, 348, 58, 63, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, 18446744073709551615, 18446744073709551615, 362, 368, 349, 355, 63, 65, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, 18446744073709551615, 18446744073709551615, 369, 374, 356, 361, 65, 66, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, 18446744073709551615, 18446744073709551615, 376, 637, 363, 624, 67, 118, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, 18446744073709551615, 18446744073709551615, 376, 410, 363, 397, 67, 71, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, 18446744073709551615, 18446744073709551615, 389, 395, 376, 382, 68, 69, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, 18446744073709551615, 18446744073709551615, 411, 415, 398, 402, 71, 72, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, 18446744073709551615, 18446744073709551615, 416, 438, 403, 425, 72, 75, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, 18446744073709551615, 18446744073709551615, 439, 445, 426, 432, 75, 77, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, 18446744073709551615, 18446744073709551615, 446, 451, 433, 438, 77, 78, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, 18446744073709551615, 18446744073709551615, 461, 467, 448, 454, 80, 82, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, 18446744073709551615, 18446744073709551615, 492, 498, 479, 485, 86, 88, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, 18446744073709551615, 18446744073709551615, 505, 521, 492, 508, 90, 93, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, 18446744073709551615, 18446744073709551615, 515, 521, 502, 508, 92, 93, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, 18446744073709551615, 18446744073709551615, 522, 528, 509, 515, 93, 95, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, 18446744073709551615, 18446744073709551615, 541, 558, 528, 545, 98, 101, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, 18446744073709551615, 18446744073709551615, 559, 565, 546, 552, 101, 103, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, 18446744073709551615, 18446744073709551615, 566, 571, 553, 558, 103, 104, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, 18446744073709551615, 18446744073709551615, 579, 594, 566, 581, 107, 109, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, 18446744073709551615, 18446744073709551615, 595, 603, 582, 590, 109, 111, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, 18446744073709551615, 18446744073709551615, 619, 625, 606, 612, 113, 115, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, 18446744073709551615, 18446744073709551615, 626, 636, 613, 623, 115, 117, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, 18446744073709551615, 18446744073709551615, 638, 961, 625, 948, 118, 176, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, 18446744073709551615, 18446744073709551615, 642, 659, 629, 646, 119, 121, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, 18446744073709551615, 18446744073709551615, 660, 667, 647, 654, 121, 122, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, 18446744073709551615, 18446744073709551615, 668, 676, 655, 663, 122, 124, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, 18446744073709551615, 18446744073709551615, 677, 682, 664, 669, 124, 125, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, 18446744073709551615, 18446744073709551615, 683, 689, 670, 676, 125, 127, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, 18446744073709551615, 18446744073709551615, 709, 717, 696, 704, 130, 132, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, 18446744073709551615, 18446744073709551615, 736, 742, 723, 729, 134, 136, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, 18446744073709551615, 18446744073709551615, 778, 798, 765, 785, 143, 145, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, 18446744073709551615, 18446744073709551615, 799, 806, 786, 793, 145, 146, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, 18446744073709551615, 18446744073709551615, 807, 820, 794, 807, 146, 148, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, 18446744073709551615, 18446744073709551615, 821, 823, 808, 810, 148, 149, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, 18446744073709551615, 18446744073709551615, 824, 864, 811, 851, 149, 156, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, 18446744073709551615, 18446744073709551615, 839, 851, 826, 838, 152, 154, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, 18446744073709551615, 18446744073709551615, 856, 864, 843, 851, 155, 156, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, 18446744073709551615, 18446744073709551615, 865, 871, 852, 858, 156, 158, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, 18446744073709551615, 18446744073709551615, 872, 886, 859, 873, 158, 160, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, 18446744073709551615, 18446744073709551615, 892, 910, 879, 897, 162, 165, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, 18446744073709551615, 18446744073709551615, 916, 928, 903, 915, 167, 169, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, 18446744073709551615, 18446744073709551615, 929, 931, 916, 918, 169, 170, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, 18446744073709551615, 18446744073709551615, 962, 1384, 949, 1371, 176, 254, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, 18446744073709551615, 18446744073709551615, 966, 991, 953, 978, 177, 180, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, 18446744073709551615, 18446744073709551615, 992, 1020, 979, 1007, 180, 187, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, 18446744073709551615, 18446744073709551615, 998, 1000, 985, 987, 182, 183, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, 18446744073709551615, 18446744073709551615, 1007, 1019, 994, 1006, 184, 186, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, 18446744073709551615, 18446744073709551615, 1021, 1025, 1008, 1012, 187, 188, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, 18446744073709551615, 18446744073709551615, 1028, 1036, 1015, 1023, 189, 190, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, 18446744073709551615, 18446744073709551615, 1037, 1041, 1024, 1028, 190, 191, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, 18446744073709551615, 18446744073709551615, 1042, 1044, 1029, 1031, 191, 192, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, 18446744073709551615, 18446744073709551615, 1045, 1052, 1032, 1039, 192, 193, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, 18446744073709551615, 18446744073709551615, 1057, 1072, 1044, 1059, 194, 199, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, 18446744073709551615, 18446744073709551615, 1058, 1065, 1045, 1052, 195, 196, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, 18446744073709551615, 18446744073709551615, 1066, 1071, 1053, 1058, 196, 198, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, 18446744073709551615, 18446744073709551615, 1077, 1081, 1064, 1068, 200, 201, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, 18446744073709551615, 18446744073709551615, 1084, 1100, 1071, 1087, 202, 204, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, 18446744073709551615, 18446744073709551615, 1101, 1103, 1088, 1090, 204, 205, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, 18446744073709551615, 18446744073709551615, 1104, 1108, 1091, 1095, 205, 206, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, 18446744073709551615, 18446744073709551615, 1109, 1111, 1096, 1098, 206, 207, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, 18446744073709551615, 18446744073709551615, 1112, 1119, 1099, 1106, 207, 208, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, 18446744073709551615, 18446744073709551615, 1120, 1122, 1107, 1109, 208, 209, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, 18446744073709551615, 18446744073709551615, 1123, 1125, 1110, 1112, 209, 210, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, 18446744073709551615, 18446744073709551615, 1126, 1133, 1113, 1120, 210, 211, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, 18446744073709551615, 18446744073709551615, 1134, 1145, 1121, 1132, 211, 212, true, "2023.[5][8]", "2023.[5][8]"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, 18446744073709551615, 18446744073709551615, 1153, 1155, 1140, 1142, 213, 214, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 101756270285429158, 6309445736017161690, 18446744073709551615, 18446744073709551615, 1158, 1192, 1145, 1179, 215, 218, true, "unitary semi-presidential republic", "unitary semi-presidential republic"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, 18446744073709551615, 18446744073709551615, 1166, 1183, 1153, 1170, 216, 217, true, "semi-presidential", "semi-presidential"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, 18446744073709551615, 18446744073709551615, 1193, 1197, 1180, 1184, 218, 219, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, 18446744073709551615, 18446744073709551615, 1202, 1209, 1189, 1196, 220, 221, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, 18446744073709551615, 18446744073709551615, 1210, 1212, 1197, 1199, 221, 222, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, 18446744073709551615, 18446744073709551615, 1213, 1218, 1200, 1205, 222, 223, true, "Paris", "Paris"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7060736712546470087, 14254659311922306724, 18446744073709551615, 18446744073709551615, 1224, 1246, 1211, 1233, 225, 228, true, "countrys largest city", "country's largest city"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, 18446744073709551615, 18446744073709551615, 1224, 1233, 1211, 1220, 225, 226, true, "countrys", "country's"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, 18446744073709551615, 18446744073709551615, 1251, 1286, 1238, 1273, 229, 234, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, 18446744073709551615, 18446744073709551615, 1269, 1286, 1256, 1273, 232, 234, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, 18446744073709551615, 18446744073709551615, 1288, 1311, 1275, 1298, 235, 239, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, 18446744073709551615, 18446744073709551615, 1312, 1319, 1299, 1306, 239, 240, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, 18446744073709551615, 18446744073709551615, 1320, 1383, 1307, 1370, 240, 253, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, 18446744073709551615, 18446744073709551615, 1320, 1329, 1307, 1316, 240, 241, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, 18446744073709551615, 18446744073709551615, 1331, 1335, 1318, 1322, 242, 243, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, 18446744073709551615, 18446744073709551615, 1337, 1345, 1324, 1332, 244, 245, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, 18446744073709551615, 18446744073709551615, 1347, 1352, 1334, 1339, 246, 247, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, 18446744073709551615, 18446744073709551615, 1354, 1362, 1341, 1349, 248, 249, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, 18446744073709551615, 18446744073709551615, 1364, 1374, 1351, 1361, 250, 251, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, 18446744073709551615, 18446744073709551615, 1379, 1383, 1366, 1370, 252, 253, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.9300000071525574], ["semantic", 9818235231875948258, "TEXT", "#", "text", 0.8100000023841858]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 4522339299074192207, "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, 18446744073709551615, 18446744073709551615, 0, 188, 0, 188, 0, 28, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, 18446744073709551615, 18446744073709551615, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, 18446744073709551615, 18446744073709551615, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, 18446744073709551615, 18446744073709551615, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004679635976, 18446744073709551615, 18446744073709551615, 24, 41, 24, 41, 5, 7, true, "interband pairing", "interband pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, 18446744073709551615, 18446744073709551615, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["term", "enum-term-mark-1", 4522339299074192207, "TEXT", "#", 1.0, 18178792033664231045, 5215905145529509301, 18446744073709551615, 18446744073709551615, 45, 87, 45, 87, 8, 13, true, "two-band s-wave and d-wave superconductors", "two-band s-wave and d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, 18446744073709551615, 18446744073709551615, 45, 53, 45, 53, 8, 9, true, "two-band", "two-band"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, 18446744073709551615, 18446744073709551615, 54, 60, 54, 60, 9, 10, true, "s-wave", "s-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15865120430118694837, 607662791561950043, 18446744073709551615, 18446744073709551615, 65, 87, 65, 87, 11, 13, true, "d-wave superconductors", "d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, 18446744073709551615, 18446744073709551615, 65, 71, 65, 71, 11, 12, true, "d-wave", "d-wave"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, 18446744073709551615, 18446744073709551615, 88, 92, 88, 92, 13, 14, true, "with", "with"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7066208506210013514, 1315102098090612032, 18446744073709551615, 18446744073709551615, 93, 105, 93, 105, 14, 16, true, "D4h symmetry", "D4h symmetry"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, 18446744073709551615, 18446744073709551615, 93, 96, 93, 96, 14, 15, true, "D4h", "D4h"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, 18446744073709551615, 18446744073709551615, 106, 113, 106, 113, 16, 18, true, "in both", "in both"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5172475826427571765, 16752879714615995236, 18446744073709551615, 18446744073709551615, 114, 137, 114, 137, 18, 20, true, "time-reversal invariant", "time-reversal invariant"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, 18446744073709551615, 18446744073709551615, 114, 127, 114, 127, 18, 19, true, "time-reversal", "time-reversal"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, 18446744073709551615, 18446744073709551615, 146, 148, 146, 148, 22, 23, true, "as", "as"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 10193294999568911218, 6331719907444433820, 18446744073709551615, 18446744073709551615, 149, 171, 149, 171, 23, 25, true, "time-reversal symmetry", "time-reversal symmetry"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, 18446744073709551615, 18446744073709551615, 149, 162, 149, 162, 23, 24, true, "time-reversal", "time-reversal"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, 18446744073709551615, 18446744073709551615, 172, 180, 172, 180, 25, 26, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, 18446744073709551615, 18446744073709551615, 181, 187, 181, 187, 26, 27, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, 18446744073709551615, 18446744073709551615, 189, 384, 189, 384, 28, 58, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, 18446744073709551615, 18446744073709551615, 193, 201, 193, 201, 29, 30, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 30, 31, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, 18446744073709551615, 18446744073709551615, 205, 214, 205, 214, 31, 32, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, 18446744073709551615, 18446744073709551615, 215, 244, 215, 244, 32, 35, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, 18446744073709551615, 18446744073709551615, 249, 264, 249, 264, 36, 38, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, 18446744073709551615, 18446744073709551615, 265, 271, 265, 271, 38, 40, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, 18446744073709551615, 18446744073709551615, 272, 286, 272, 286, 40, 41, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, 18446744073709551615, 18446744073709551615, 288, 293, 288, 293, 42, 43, true, "nodes", "nodes"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 3766089650286616147, 5895288868427388531, 18446744073709551615, 18446744073709551615, 294, 309, 294, 309, 43, 45, true, "can (dis)appear", "can (dis)appear"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, 18446744073709551615, 18446744073709551615, 298, 309, 298, 309, 44, 45, true, "(dis)appear", "(dis)appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, 18446744073709551615, 18446744073709551615, 311, 316, 311, 316, 46, 47, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, 18446744073709551615, 18446744073709551615, 322, 327, 322, 327, 49, 50, true, "leave", "leave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106670696871780136, 17807492235586576248, 18446744073709551615, 18446744073709551615, 328, 351, 328, 351, 50, 52, true, "high-symmetry locations", "high-symmetry locations"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, 18446744073709551615, 18446744073709551615, 328, 341, 328, 341, 50, 51, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, 18446744073709551615, 18446744073709551615, 357, 374, 357, 374, 53, 55, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, 18446744073709551615, 18446744073709551615, 375, 383, 375, 383, 55, 57, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, 18446744073709551615, 18446744073709551615, 385, 594, 385, 594, 58, 93, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, 18446744073709551615, 18446744073709551615, 398, 404, 398, 404, 60, 62, true, "in the", "in the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15559186615879240368, 12910915472651789195, 18446744073709551615, 18446744073709551615, 405, 416, 405, 416, 62, 64, true, "d-wave case", "d-wave case"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, 18446744073709551615, 18446744073709551615, 405, 411, 405, 411, 62, 63, true, "d-wave", "d-wave"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, 18446744073709551615, 18446744073709551615, 421, 425, 421, 425, 66, 67, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, 18446744073709551615, 18446744073709551615, 426, 430, 426, 430, 67, 68, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, 18446744073709551615, 18446744073709551615, 440, 454, 440, 454, 70, 72, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, 18446744073709551615, 18446744073709551615, 455, 475, 455, 475, 72, 74, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, 18446744073709551615, 18446744073709551615, 481, 490, 481, 490, 75, 76, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, 18446744073709551615, 18446744073709551615, 491, 498, 491, 498, 76, 77, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, 18446744073709551615, 18446744073709551615, 499, 508, 499, 508, 77, 78, true, "increases", "increases"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 18352755674675419019, 8051640294707098683, 18446744073709551615, 18446744073709551615, 510, 547, 510, 547, 79, 84, true, "flat zero-energy Andreev bound states", "flat zero-energy Andreev bound states"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, 18446744073709551615, 18446744073709551615, 515, 526, 515, 526, 80, 81, true, "zero-energy", "zero-energy"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, 18446744073709551615, 18446744073709551615, 548, 555, 548, 555, 84, 86, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, 18446744073709551615, 18446744073709551615, 560, 570, 560, 570, 87, 88, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, 18446744073709551615, 18446744073709551615, 571, 573, 571, 573, 88, 89, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, 18446744073709551615, 18446744073709551615, 574, 593, 574, 593, 89, 92, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.8799999952316284], ["semantic", 4522339299074192207, "TEXT", "#", "text", 0.9399999976158142]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} +{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 9818235231875948258, "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, 18446744073709551615, 18446744073709551615, 0, 177, 0, 164, 0, 35, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, 18446744073709551615, 18446744073709551615, 7, 31, 7, 26, 1, 7, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, 18446744073709551615, 18446744073709551615, 27, 30, 24, 25, 5, 6, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, 18446744073709551615, 18446744073709551615, 48, 63, 43, 58, 10, 12, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, 18446744073709551615, 18446744073709551615, 64, 122, 59, 109, 12, 22, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, 18446744073709551615, 18446744073709551615, 73, 95, 68, 88, 15, 17, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, 18446744073709551615, 18446744073709551615, 96, 106, 89, 98, 17, 19, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, 18446744073709551615, 18446744073709551615, 107, 121, 99, 108, 19, 21, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, 18446744073709551615, 18446744073709551615, 123, 127, 110, 114, 23, 26, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, 18446744073709551615, 18446744073709551615, 124, 126, 111, 113, 24, 25, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, 18446744073709551615, 18446744073709551615, 128, 130, 115, 117, 26, 27, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, 18446744073709551615, 18446744073709551615, 133, 140, 120, 127, 28, 29, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, 18446744073709551615, 18446744073709551615, 141, 158, 128, 145, 29, 31, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, 18446744073709551615, 18446744073709551615, 159, 161, 146, 148, 31, 32, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, 18446744073709551615, 18446744073709551615, 162, 176, 149, 163, 32, 34, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, 18446744073709551615, 18446744073709551615, 170, 176, 157, 163, 33, 34, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, 18446744073709551615, 18446744073709551615, 178, 375, 165, 362, 35, 67, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, 18446744073709551615, 18446744073709551615, 186, 194, 173, 181, 37, 38, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, 18446744073709551615, 18446744073709551615, 195, 211, 182, 198, 38, 40, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, 18446744073709551615, 18446744073709551615, 204, 227, 191, 214, 39, 42, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, 18446744073709551615, 18446744073709551615, 216, 227, 203, 214, 41, 42, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, 18446744073709551615, 18446744073709551615, 228, 234, 215, 221, 42, 44, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, 18446744073709551615, 18446744073709551615, 235, 243, 222, 230, 44, 45, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16827399947339178045, 496128657873109341, 18446744073709551615, 18446744073709551615, 252, 293, 239, 280, 47, 53, true, "Atlantic, Pacific and Indian oceans,[XII]", "Atlantic, Pacific and Indian oceans,[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, 18446744073709551615, 18446744073709551615, 252, 260, 239, 247, 47, 48, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, 18446744073709551615, 18446744073709551615, 262, 269, 249, 256, 49, 50, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 3575373331357445963, 1702692810903063225, 18446744073709551615, 18446744073709551615, 274, 293, 261, 280, 51, 53, true, "Indian oceans,[XII]", "Indian oceans,[XII]"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, 18446744073709551615, 18446744073709551615, 281, 293, 268, 280, 52, 53, true, "oceans,[XII]", "oceans,[XII]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 16381206561323757770, 14007677850696664277, 18446744073709551615, 18446744073709551615, 294, 300, 281, 287, 53, 54, true, "giving", "giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, 18446744073709551615, 18446744073709551615, 308, 314, 295, 301, 56, 58, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, 18446744073709551615, 18446744073709551615, 315, 361, 302, 348, 58, 63, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, 18446744073709551615, 18446744073709551615, 362, 368, 349, 355, 63, 65, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, 18446744073709551615, 18446744073709551615, 369, 374, 356, 361, 65, 66, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, 18446744073709551615, 18446744073709551615, 376, 637, 363, 624, 67, 118, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, 18446744073709551615, 18446744073709551615, 376, 410, 363, 397, 67, 71, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, 18446744073709551615, 18446744073709551615, 389, 395, 376, 382, 68, 69, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, 18446744073709551615, 18446744073709551615, 411, 415, 398, 402, 71, 72, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, 18446744073709551615, 18446744073709551615, 416, 438, 403, 425, 72, 75, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, 18446744073709551615, 18446744073709551615, 439, 445, 426, 432, 75, 77, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, 18446744073709551615, 18446744073709551615, 446, 451, 433, 438, 77, 78, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, 18446744073709551615, 18446744073709551615, 461, 467, 448, 454, 80, 82, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, 18446744073709551615, 18446744073709551615, 492, 498, 479, 485, 86, 88, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, 18446744073709551615, 18446744073709551615, 505, 521, 492, 508, 90, 93, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, 18446744073709551615, 18446744073709551615, 515, 521, 502, 508, 92, 93, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, 18446744073709551615, 18446744073709551615, 522, 528, 509, 515, 93, 95, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, 18446744073709551615, 18446744073709551615, 541, 558, 528, 545, 98, 101, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, 18446744073709551615, 18446744073709551615, 559, 565, 546, 552, 101, 103, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, 18446744073709551615, 18446744073709551615, 566, 571, 553, 558, 103, 104, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, 18446744073709551615, 18446744073709551615, 579, 594, 566, 581, 107, 109, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, 18446744073709551615, 18446744073709551615, 595, 603, 582, 590, 109, 111, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, 18446744073709551615, 18446744073709551615, 619, 625, 606, 612, 113, 115, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, 18446744073709551615, 18446744073709551615, 626, 636, 613, 623, 115, 117, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, 18446744073709551615, 18446744073709551615, 638, 961, 625, 948, 118, 176, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, 18446744073709551615, 18446744073709551615, 642, 659, 629, 646, 119, 121, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, 18446744073709551615, 18446744073709551615, 660, 667, 647, 654, 121, 122, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, 18446744073709551615, 18446744073709551615, 668, 676, 655, 663, 122, 124, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, 18446744073709551615, 18446744073709551615, 677, 682, 664, 669, 124, 125, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, 18446744073709551615, 18446744073709551615, 683, 689, 670, 676, 125, 127, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, 18446744073709551615, 18446744073709551615, 709, 717, 696, 704, 130, 132, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, 18446744073709551615, 18446744073709551615, 736, 742, 723, 729, 134, 136, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, 18446744073709551615, 18446744073709551615, 778, 798, 765, 785, 143, 145, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, 18446744073709551615, 18446744073709551615, 799, 806, 786, 793, 145, 146, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, 18446744073709551615, 18446744073709551615, 807, 820, 794, 807, 146, 148, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, 18446744073709551615, 18446744073709551615, 821, 823, 808, 810, 148, 149, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, 18446744073709551615, 18446744073709551615, 824, 864, 811, 851, 149, 156, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, 18446744073709551615, 18446744073709551615, 839, 851, 826, 838, 152, 154, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, 18446744073709551615, 18446744073709551615, 856, 864, 843, 851, 155, 156, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, 18446744073709551615, 18446744073709551615, 865, 871, 852, 858, 156, 158, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, 18446744073709551615, 18446744073709551615, 872, 886, 859, 873, 158, 160, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, 18446744073709551615, 18446744073709551615, 892, 910, 879, 897, 162, 165, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, 18446744073709551615, 18446744073709551615, 916, 928, 903, 915, 167, 169, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, 18446744073709551615, 18446744073709551615, 929, 931, 916, 918, 169, 170, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, 18446744073709551615, 18446744073709551615, 962, 1384, 949, 1371, 176, 254, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, 18446744073709551615, 18446744073709551615, 966, 991, 953, 978, 177, 180, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, 18446744073709551615, 18446744073709551615, 992, 1020, 979, 1007, 180, 187, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, 18446744073709551615, 18446744073709551615, 998, 1000, 985, 987, 182, 183, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, 18446744073709551615, 18446744073709551615, 1007, 1019, 994, 1006, 184, 186, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, 18446744073709551615, 18446744073709551615, 1021, 1025, 1008, 1012, 187, 188, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, 18446744073709551615, 18446744073709551615, 1028, 1036, 1015, 1023, 189, 190, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, 18446744073709551615, 18446744073709551615, 1037, 1041, 1024, 1028, 190, 191, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, 18446744073709551615, 18446744073709551615, 1042, 1044, 1029, 1031, 191, 192, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, 18446744073709551615, 18446744073709551615, 1045, 1052, 1032, 1039, 192, 193, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, 18446744073709551615, 18446744073709551615, 1057, 1072, 1044, 1059, 194, 199, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, 18446744073709551615, 18446744073709551615, 1058, 1065, 1045, 1052, 195, 196, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, 18446744073709551615, 18446744073709551615, 1066, 1071, 1053, 1058, 196, 198, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, 18446744073709551615, 18446744073709551615, 1077, 1081, 1064, 1068, 200, 201, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, 18446744073709551615, 18446744073709551615, 1084, 1100, 1071, 1087, 202, 204, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, 18446744073709551615, 18446744073709551615, 1101, 1103, 1088, 1090, 204, 205, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, 18446744073709551615, 18446744073709551615, 1104, 1108, 1091, 1095, 205, 206, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, 18446744073709551615, 18446744073709551615, 1109, 1111, 1096, 1098, 206, 207, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, 18446744073709551615, 18446744073709551615, 1112, 1119, 1099, 1106, 207, 208, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, 18446744073709551615, 18446744073709551615, 1120, 1122, 1107, 1109, 208, 209, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, 18446744073709551615, 18446744073709551615, 1123, 1125, 1110, 1112, 209, 210, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, 18446744073709551615, 18446744073709551615, 1126, 1133, 1113, 1120, 210, 211, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, 18446744073709551615, 18446744073709551615, 1134, 1145, 1121, 1132, 211, 212, true, "2023.[5][8]", "2023.[5][8]"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, 18446744073709551615, 18446744073709551615, 1153, 1155, 1140, 1142, 213, 214, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 101756270285429158, 6309445736017161690, 18446744073709551615, 18446744073709551615, 1158, 1192, 1145, 1179, 215, 218, true, "unitary semi-presidential republic", "unitary semi-presidential republic"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, 18446744073709551615, 18446744073709551615, 1166, 1183, 1153, 1170, 216, 217, true, "semi-presidential", "semi-presidential"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, 18446744073709551615, 18446744073709551615, 1193, 1197, 1180, 1184, 218, 219, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, 18446744073709551615, 18446744073709551615, 1202, 1209, 1189, 1196, 220, 221, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, 18446744073709551615, 18446744073709551615, 1210, 1212, 1197, 1199, 221, 222, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, 18446744073709551615, 18446744073709551615, 1213, 1218, 1200, 1205, 222, 223, true, "Paris", "Paris"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7060736712546470087, 14254659311922306724, 18446744073709551615, 18446744073709551615, 1224, 1246, 1211, 1233, 225, 228, true, "countrys largest city", "country's largest city"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, 18446744073709551615, 18446744073709551615, 1224, 1233, 1211, 1220, 225, 226, true, "countrys", "country's"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, 18446744073709551615, 18446744073709551615, 1251, 1286, 1238, 1273, 229, 234, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, 18446744073709551615, 18446744073709551615, 1269, 1286, 1256, 1273, 232, 234, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, 18446744073709551615, 18446744073709551615, 1288, 1311, 1275, 1298, 235, 239, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, 18446744073709551615, 18446744073709551615, 1312, 1319, 1299, 1306, 239, 240, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, 18446744073709551615, 18446744073709551615, 1320, 1383, 1307, 1370, 240, 253, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, 18446744073709551615, 18446744073709551615, 1320, 1329, 1307, 1316, 240, 241, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, 18446744073709551615, 18446744073709551615, 1331, 1335, 1318, 1322, 242, 243, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, 18446744073709551615, 18446744073709551615, 1337, 1345, 1324, 1332, 244, 245, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, 18446744073709551615, 18446744073709551615, 1347, 1352, 1334, 1339, 246, 247, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, 18446744073709551615, 18446744073709551615, 1354, 1362, 1341, 1349, 248, 249, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, 18446744073709551615, 18446744073709551615, 1364, 1374, 1351, 1361, 250, 251, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, 18446744073709551615, 18446744073709551615, 1379, 1383, 1366, 1370, 252, 253, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.93], ["semantic", 9818235231875948258, "TEXT", "#", "paragraph", 0.97]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} +{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 4522339299074192207, "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, 18446744073709551615, 18446744073709551615, 0, 188, 0, 188, 0, 28, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, 18446744073709551615, 18446744073709551615, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, 18446744073709551615, 18446744073709551615, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, 18446744073709551615, 18446744073709551615, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004679635976, 18446744073709551615, 18446744073709551615, 24, 41, 24, 41, 5, 7, true, "interband pairing", "interband pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, 18446744073709551615, 18446744073709551615, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["term", "enum-term-mark-1", 4522339299074192207, "TEXT", "#", 1.0, 18178792033664231045, 5215905145529509301, 18446744073709551615, 18446744073709551615, 45, 87, 45, 87, 8, 13, true, "two-band s-wave and d-wave superconductors", "two-band s-wave and d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, 18446744073709551615, 18446744073709551615, 45, 53, 45, 53, 8, 9, true, "two-band", "two-band"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, 18446744073709551615, 18446744073709551615, 54, 60, 54, 60, 9, 10, true, "s-wave", "s-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15865120430118694837, 607662791561950043, 18446744073709551615, 18446744073709551615, 65, 87, 65, 87, 11, 13, true, "d-wave superconductors", "d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, 18446744073709551615, 18446744073709551615, 65, 71, 65, 71, 11, 12, true, "d-wave", "d-wave"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, 18446744073709551615, 18446744073709551615, 88, 92, 88, 92, 13, 14, true, "with", "with"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7066208506210013514, 1315102098090612032, 18446744073709551615, 18446744073709551615, 93, 105, 93, 105, 14, 16, true, "D4h symmetry", "D4h symmetry"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, 18446744073709551615, 18446744073709551615, 93, 96, 93, 96, 14, 15, true, "D4h", "D4h"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, 18446744073709551615, 18446744073709551615, 106, 113, 106, 113, 16, 18, true, "in both", "in both"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5172475826427571765, 16752879714615995236, 18446744073709551615, 18446744073709551615, 114, 137, 114, 137, 18, 20, true, "time-reversal invariant", "time-reversal invariant"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, 18446744073709551615, 18446744073709551615, 114, 127, 114, 127, 18, 19, true, "time-reversal", "time-reversal"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, 18446744073709551615, 18446744073709551615, 146, 148, 146, 148, 22, 23, true, "as", "as"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 10193294999568911218, 6331719907444433820, 18446744073709551615, 18446744073709551615, 149, 171, 149, 171, 23, 25, true, "time-reversal symmetry", "time-reversal symmetry"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, 18446744073709551615, 18446744073709551615, 149, 162, 149, 162, 23, 24, true, "time-reversal", "time-reversal"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, 18446744073709551615, 18446744073709551615, 172, 180, 172, 180, 25, 26, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, 18446744073709551615, 18446744073709551615, 181, 187, 181, 187, 26, 27, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, 18446744073709551615, 18446744073709551615, 189, 384, 189, 384, 28, 58, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, 18446744073709551615, 18446744073709551615, 193, 201, 193, 201, 29, 30, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 30, 31, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, 18446744073709551615, 18446744073709551615, 205, 214, 205, 214, 31, 32, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, 18446744073709551615, 18446744073709551615, 215, 244, 215, 244, 32, 35, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, 18446744073709551615, 18446744073709551615, 249, 264, 249, 264, 36, 38, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, 18446744073709551615, 18446744073709551615, 265, 271, 265, 271, 38, 40, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, 18446744073709551615, 18446744073709551615, 272, 286, 272, 286, 40, 41, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, 18446744073709551615, 18446744073709551615, 288, 293, 288, 293, 42, 43, true, "nodes", "nodes"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 3766089650286616147, 5895288868427388531, 18446744073709551615, 18446744073709551615, 294, 309, 294, 309, 43, 45, true, "can (dis)appear", "can (dis)appear"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, 18446744073709551615, 18446744073709551615, 298, 309, 298, 309, 44, 45, true, "(dis)appear", "(dis)appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, 18446744073709551615, 18446744073709551615, 311, 316, 311, 316, 46, 47, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, 18446744073709551615, 18446744073709551615, 322, 327, 322, 327, 49, 50, true, "leave", "leave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106670696871780136, 17807492235586576248, 18446744073709551615, 18446744073709551615, 328, 351, 328, 351, 50, 52, true, "high-symmetry locations", "high-symmetry locations"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, 18446744073709551615, 18446744073709551615, 328, 341, 328, 341, 50, 51, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, 18446744073709551615, 18446744073709551615, 357, 374, 357, 374, 53, 55, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, 18446744073709551615, 18446744073709551615, 375, 383, 375, 383, 55, 57, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, 18446744073709551615, 18446744073709551615, 385, 594, 385, 594, 58, 93, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, 18446744073709551615, 18446744073709551615, 398, 404, 398, 404, 60, 62, true, "in the", "in the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15559186615879240368, 12910915472651789195, 18446744073709551615, 18446744073709551615, 405, 416, 405, 416, 62, 64, true, "d-wave case", "d-wave case"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, 18446744073709551615, 18446744073709551615, 405, 411, 405, 411, 62, 63, true, "d-wave", "d-wave"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, 18446744073709551615, 18446744073709551615, 421, 425, 421, 425, 66, 67, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, 18446744073709551615, 18446744073709551615, 426, 430, 426, 430, 67, 68, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, 18446744073709551615, 18446744073709551615, 440, 454, 440, 454, 70, 72, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, 18446744073709551615, 18446744073709551615, 455, 475, 455, 475, 72, 74, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, 18446744073709551615, 18446744073709551615, 481, 490, 481, 490, 75, 76, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, 18446744073709551615, 18446744073709551615, 491, 498, 491, 498, 76, 77, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, 18446744073709551615, 18446744073709551615, 499, 508, 499, 508, 77, 78, true, "increases", "increases"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 18352755674675419019, 8051640294707098683, 18446744073709551615, 18446744073709551615, 510, 547, 510, 547, 79, 84, true, "flat zero-energy Andreev bound states", "flat zero-energy Andreev bound states"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, 18446744073709551615, 18446744073709551615, 515, 526, 515, 526, 80, 81, true, "zero-energy", "zero-energy"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, 18446744073709551615, 18446744073709551615, 548, 555, 548, 555, 84, 86, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, 18446744073709551615, 18446744073709551615, 560, 570, 560, 570, 87, 88, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, 18446744073709551615, 18446744073709551615, 571, 573, 571, 573, 88, 89, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, 18446744073709551615, 18446744073709551615, 574, 593, 574, 593, 89, 92, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.88], ["semantic", 4522339299074192207, "TEXT", "#", "paragraph", 0.9]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} diff --git a/tests/data/texts/test_02B_text_01.jsonl b/tests/data/texts/test_02B_text_01.jsonl index b59cedd9..0970ce13 100644 --- a/tests/data/texts/test_02B_text_01.jsonl +++ b/tests/data/texts/test_02B_text_01.jsonl @@ -1 +1 @@ -{"dloc": "", "hash": 253473544312511038, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.5799999833106995]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"dloc": "", "hash": 253473544312511038, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} diff --git a/tests/test_nlp.py b/tests/test_nlp.py index 1dc8512f..5a3992ba 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -9,7 +9,7 @@ from deepsearch_glm.nlp_train_semantic import train_semantic -GENERATE=False +GENERATE=True def round_floats(o): if isinstance(o, float): return round(o, 2) From 8bbe70aea91cd5dad5f07a57ca8b84cb8f185fbd Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Thu, 16 Nov 2023 05:07:54 +0100 Subject: [PATCH 07/22] fixing tests one by one Signed-off-by: Peter Staar --- tests/data/texts/test_02A_text_01.jsonl | 2 +- tests/data/texts/test_02B_text_01.jsonl | 2 +- tests/test_nlp.py | 11 ++++++++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/data/texts/test_02A_text_01.jsonl b/tests/data/texts/test_02A_text_01.jsonl index ac2358c7..deb0b90c 100644 --- a/tests/data/texts/test_02A_text_01.jsonl +++ b/tests/data/texts/test_02A_text_01.jsonl @@ -1 +1 @@ -{"applied-models": ["cite", "expression", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "", "hash": 253473544312511038, "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, 18446744073709551615, 18446744073709551615, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, 18446744073709551615, 18446744073709551615, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, 18446744073709551615, 18446744073709551615, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"applied-models": ["cite", "expression", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "", "hash": 253473544312511038, "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, 18446744073709551615, 18446744073709551615, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, 18446744073709551615, 18446744073709551615, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, 18446744073709551615, 18446744073709551615, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", "en", 0.58]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} diff --git a/tests/data/texts/test_02B_text_01.jsonl b/tests/data/texts/test_02B_text_01.jsonl index 0970ce13..adbb6f57 100644 --- a/tests/data/texts/test_02B_text_01.jsonl +++ b/tests/data/texts/test_02B_text_01.jsonl @@ -1 +1 @@ -{"dloc": "", "hash": 253473544312511038, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"dloc": "", "hash": 253473544312511038, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", "en", 0.58]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} diff --git a/tests/test_nlp.py b/tests/test_nlp.py index 982728f6..5c522544 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -10,7 +10,7 @@ from deepsearch_glm.nlp_train_semantic import train_semantic -GENERATE=True +GENERATE=False def round_floats(o): if isinstance(o, float): return round(o, 2) @@ -337,15 +337,20 @@ def test_05_to_legacy(): else: with open(target_nlp, "r") as fr: doc_nlp = json.load(fr) - + doc_nlp = round_floats(doc_nlp) + with open(target_leg, "r") as fr: doc_leg = json.load(fr) + doc_leg = round_floats(doc_leg) + doc_j = model.apply_on_doc(doc_i) - + doc_j = round_floats(doc_j) + assert doc_j==doc_nlp doc_i = to_legacy_document_format(doc_j, doc_i) + doc_i = round_floats(doc_i) assert doc_i==doc_leg From e5b051acff1ab591d00b29dd4e80cc8a01339626 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Thu, 16 Nov 2023 05:21:27 +0100 Subject: [PATCH 08/22] fixed the test Signed-off-by: Peter Staar --- tests/data/texts/references.nlp.jsonl | 4 ++-- tests/data/texts/semantics.nlp.jsonl | 14 +++++++------- tests/data/texts/terms.nlp.jsonl | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/data/texts/references.nlp.jsonl b/tests/data/texts/references.nlp.jsonl index 58dfb0bd..b22a3472 100644 --- a/tests/data/texts/references.nlp.jsonl +++ b/tests/data/texts/references.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["reference", "citation-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3089690646178643593, 8143668872857370346, 18446744073709551615, 18446744073709551615, 4, 16, 4, 16, 3, 6, true, "J. Nagamatsu", "J. Nagamatsu"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 4958313984110456424, 12758216704979571657, 18446744073709551615, 18446744073709551615, 18, 29, 18, 29, 7, 10, true, "N. Nakagawa", "N. Nakagawa"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 11365016642846088308, 8423174012903247560, 18446744073709551615, 18446744073709551615, 31, 42, 31, 42, 11, 14, true, "T. Muranaka", "T. Muranaka"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3078907282781552519, 13717640772378057531, 18446744073709551615, 18446744073709551615, 44, 55, 44, 55, 15, 18, true, "Y. Zenitani", "Y. Zenitani"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 1988581422311921121, 4091673062415006471, 18446744073709551615, 18446744073709551615, 61, 72, 61, 72, 20, 23, true, "J. Akimitsu", "J. Akimitsu"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164561, 18446744073709551615, 18446744073709551615, 74, 120, 74, 120, 24, 31, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, 18446744073709551615, 18446744073709551615, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, 18446744073709551615, 18446744073709551615, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, 18446744073709551615, 18446744073709551615, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, 18446744073709551615, 18446744073709551615, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, 18446744073709551615, 18446744073709551615, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, 18446744073709551615, 18446744073709551615, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, 18446744073709551615, 18446744073709551615, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, 18446744073709551615, 18446744073709551615, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, 18446744073709551615, 18446744073709551615, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, 18446744073709551615, 18446744073709551615, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, 18446744073709551615, 18446744073709551615, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, 18446744073709551615, 18446744073709551615, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["reference", "citation-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3089690646178643593, 8143668872857370346, 18446744073709551615, 18446744073709551615, 4, 16, 4, 16, 3, 6, true, "J. Nagamatsu", "J. Nagamatsu"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 4958313984110456424, 12758216704979571657, 18446744073709551615, 18446744073709551615, 18, 29, 18, 29, 7, 10, true, "N. Nakagawa", "N. Nakagawa"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 11365016642846088308, 8423174012903247560, 18446744073709551615, 18446744073709551615, 31, 42, 31, 42, 11, 14, true, "T. Muranaka", "T. Muranaka"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3078907282781552519, 13717640772378057531, 18446744073709551615, 18446744073709551615, 44, 55, 44, 55, 15, 18, true, "Y. Zenitani", "Y. Zenitani"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 1988581422311921121, 4091673062415006471, 18446744073709551615, 18446744073709551615, 61, 72, 61, 72, 20, 23, true, "J. Akimitsu", "J. Akimitsu"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164561, 18446744073709551615, 18446744073709551615, 74, 120, 74, 120, 24, 31, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, 18446744073709551615, 18446744073709551615, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "volume", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, 18446744073709551615, 18446744073709551615, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["reference", "pages", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", "reference", 0.89]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, 18446744073709551615, 18446744073709551615, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, 18446744073709551615, 18446744073709551615, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, 18446744073709551615, 18446744073709551615, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, 18446744073709551615, 18446744073709551615, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, 18446744073709551615, 18446744073709551615, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, 18446744073709551615, 18446744073709551615, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, 18446744073709551615, 18446744073709551615, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, 18446744073709551615, 18446744073709551615, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, 18446744073709551615, 18446744073709551615, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, 18446744073709551615, 18446744073709551615, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, 18446744073709551615, 18446744073709551615, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"], ["reference", "volume", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, 18446744073709551615, 18446744073709551615, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["reference", "pages", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, 18446744073709551615, 18446744073709551615, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", "reference", 0.94]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/semantics.nlp.jsonl b/tests/data/texts/semantics.nlp.jsonl index 3e6cbe02..3e879094 100644 --- a/tests/data/texts/semantics.nlp.jsonl +++ b/tests/data/texts/semantics.nlp.jsonl @@ -1,7 +1,7 @@ -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 7759316032128614217, "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", 7759316032128614217, "TEXT", "#", "subtitle", 0.51]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14339562343989983509, "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", 14339562343989983509, "TEXT", "#", "paragraph", 0.85]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 18143996061359107703, "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", 18143996061359107703, "TEXT", "#", "reference", 0.71]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "text"} -{"applied-models": ["link", "numval"], "dloc": "", "hash": 11035282656876697300, "model-application": {"message": "success", "success": true}, "orig": "ABSTRACT", "properties": {"data": [["semantic", 11035282656876697300, "TEXT", "#", "meta-data", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14993488697470108654, "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, 18446744073709551615, 18446744073709551615, 447, 449, 447, 449, 81, 82, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, 18446744073709551615, 18446744073709551615, 599, 602, 599, 602, 106, 107, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", 14993488697470108654, "TEXT", "#", "paragraph", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 164218115435155290, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481728, 16379900111711101126, 18446744073709551615, 18446744073709551615, 95, 97, 95, 97, 26, 27, true, "39", "39"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, 18446744073709551615, 18446744073709551615, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["numval", "year", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, 18446744073709551615, 18446744073709551615, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, 18446744073709551615, 18446744073709551615, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, 18446744073709551615, 18446744073709551615, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 7759316032128614217, "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", "header", 0.71]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14339562343989983509, "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", "meta-data", 0.8]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 18143996061359107703, "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", "meta-data", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "text"} +{"applied-models": ["link", "numval"], "dloc": "", "hash": 11035282656876697300, "model-application": {"message": "success", "success": true}, "orig": "ABSTRACT", "properties": {"data": [["semantic", "meta-data", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14993488697470108654, "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, 18446744073709551615, 18446744073709551615, 447, 449, 447, 449, 81, 82, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, 18446744073709551615, 18446744073709551615, 599, 602, 599, 602, 106, 107, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", "text", 0.96]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 164218115435155290, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481728, 16379900111711101126, 18446744073709551615, 18446744073709551615, 95, 97, 95, 97, 26, 27, true, "39", "39"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, 18446744073709551615, 18446744073709551615, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["numval", "year", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", "reference", 0.89]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, 18446744073709551615, 18446744073709551615, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, 18446744073709551615, 18446744073709551615, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, 18446744073709551615, 18446744073709551615, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", "reference", 0.94]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/terms.nlp.jsonl b/tests/data/texts/terms.nlp.jsonl index 32250ccb..9c8b9ec3 100644 --- a/tests/data/texts/terms.nlp.jsonl +++ b/tests/data/texts/terms.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 9818235231875948258, "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, 18446744073709551615, 18446744073709551615, 0, 177, 0, 164, 0, 35, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, 18446744073709551615, 18446744073709551615, 7, 31, 7, 26, 1, 7, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, 18446744073709551615, 18446744073709551615, 27, 30, 24, 25, 5, 6, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, 18446744073709551615, 18446744073709551615, 48, 63, 43, 58, 10, 12, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, 18446744073709551615, 18446744073709551615, 64, 122, 59, 109, 12, 22, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, 18446744073709551615, 18446744073709551615, 73, 95, 68, 88, 15, 17, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, 18446744073709551615, 18446744073709551615, 96, 106, 89, 98, 17, 19, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, 18446744073709551615, 18446744073709551615, 107, 121, 99, 108, 19, 21, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, 18446744073709551615, 18446744073709551615, 123, 127, 110, 114, 23, 26, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, 18446744073709551615, 18446744073709551615, 124, 126, 111, 113, 24, 25, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, 18446744073709551615, 18446744073709551615, 128, 130, 115, 117, 26, 27, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, 18446744073709551615, 18446744073709551615, 133, 140, 120, 127, 28, 29, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, 18446744073709551615, 18446744073709551615, 141, 158, 128, 145, 29, 31, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, 18446744073709551615, 18446744073709551615, 159, 161, 146, 148, 31, 32, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, 18446744073709551615, 18446744073709551615, 162, 176, 149, 163, 32, 34, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, 18446744073709551615, 18446744073709551615, 170, 176, 157, 163, 33, 34, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, 18446744073709551615, 18446744073709551615, 178, 375, 165, 362, 35, 67, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, 18446744073709551615, 18446744073709551615, 186, 194, 173, 181, 37, 38, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, 18446744073709551615, 18446744073709551615, 195, 211, 182, 198, 38, 40, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, 18446744073709551615, 18446744073709551615, 204, 227, 191, 214, 39, 42, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, 18446744073709551615, 18446744073709551615, 216, 227, 203, 214, 41, 42, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, 18446744073709551615, 18446744073709551615, 228, 234, 215, 221, 42, 44, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, 18446744073709551615, 18446744073709551615, 235, 243, 222, 230, 44, 45, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16827399947339178045, 496128657873109341, 18446744073709551615, 18446744073709551615, 252, 293, 239, 280, 47, 53, true, "Atlantic, Pacific and Indian oceans,[XII]", "Atlantic, Pacific and Indian oceans,[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, 18446744073709551615, 18446744073709551615, 252, 260, 239, 247, 47, 48, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, 18446744073709551615, 18446744073709551615, 262, 269, 249, 256, 49, 50, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 3575373331357445963, 1702692810903063225, 18446744073709551615, 18446744073709551615, 274, 293, 261, 280, 51, 53, true, "Indian oceans,[XII]", "Indian oceans,[XII]"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, 18446744073709551615, 18446744073709551615, 281, 293, 268, 280, 52, 53, true, "oceans,[XII]", "oceans,[XII]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 16381206561323757770, 14007677850696664277, 18446744073709551615, 18446744073709551615, 294, 300, 281, 287, 53, 54, true, "giving", "giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, 18446744073709551615, 18446744073709551615, 308, 314, 295, 301, 56, 58, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, 18446744073709551615, 18446744073709551615, 315, 361, 302, 348, 58, 63, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, 18446744073709551615, 18446744073709551615, 362, 368, 349, 355, 63, 65, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, 18446744073709551615, 18446744073709551615, 369, 374, 356, 361, 65, 66, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, 18446744073709551615, 18446744073709551615, 376, 637, 363, 624, 67, 118, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, 18446744073709551615, 18446744073709551615, 376, 410, 363, 397, 67, 71, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, 18446744073709551615, 18446744073709551615, 389, 395, 376, 382, 68, 69, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, 18446744073709551615, 18446744073709551615, 411, 415, 398, 402, 71, 72, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, 18446744073709551615, 18446744073709551615, 416, 438, 403, 425, 72, 75, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, 18446744073709551615, 18446744073709551615, 439, 445, 426, 432, 75, 77, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, 18446744073709551615, 18446744073709551615, 446, 451, 433, 438, 77, 78, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, 18446744073709551615, 18446744073709551615, 461, 467, 448, 454, 80, 82, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, 18446744073709551615, 18446744073709551615, 492, 498, 479, 485, 86, 88, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, 18446744073709551615, 18446744073709551615, 505, 521, 492, 508, 90, 93, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, 18446744073709551615, 18446744073709551615, 515, 521, 502, 508, 92, 93, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, 18446744073709551615, 18446744073709551615, 522, 528, 509, 515, 93, 95, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, 18446744073709551615, 18446744073709551615, 541, 558, 528, 545, 98, 101, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, 18446744073709551615, 18446744073709551615, 559, 565, 546, 552, 101, 103, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, 18446744073709551615, 18446744073709551615, 566, 571, 553, 558, 103, 104, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, 18446744073709551615, 18446744073709551615, 579, 594, 566, 581, 107, 109, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, 18446744073709551615, 18446744073709551615, 595, 603, 582, 590, 109, 111, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, 18446744073709551615, 18446744073709551615, 619, 625, 606, 612, 113, 115, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, 18446744073709551615, 18446744073709551615, 626, 636, 613, 623, 115, 117, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, 18446744073709551615, 18446744073709551615, 638, 961, 625, 948, 118, 176, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, 18446744073709551615, 18446744073709551615, 642, 659, 629, 646, 119, 121, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, 18446744073709551615, 18446744073709551615, 660, 667, 647, 654, 121, 122, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, 18446744073709551615, 18446744073709551615, 668, 676, 655, 663, 122, 124, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, 18446744073709551615, 18446744073709551615, 677, 682, 664, 669, 124, 125, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, 18446744073709551615, 18446744073709551615, 683, 689, 670, 676, 125, 127, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, 18446744073709551615, 18446744073709551615, 709, 717, 696, 704, 130, 132, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, 18446744073709551615, 18446744073709551615, 736, 742, 723, 729, 134, 136, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, 18446744073709551615, 18446744073709551615, 778, 798, 765, 785, 143, 145, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, 18446744073709551615, 18446744073709551615, 799, 806, 786, 793, 145, 146, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, 18446744073709551615, 18446744073709551615, 807, 820, 794, 807, 146, 148, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, 18446744073709551615, 18446744073709551615, 821, 823, 808, 810, 148, 149, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, 18446744073709551615, 18446744073709551615, 824, 864, 811, 851, 149, 156, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, 18446744073709551615, 18446744073709551615, 839, 851, 826, 838, 152, 154, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, 18446744073709551615, 18446744073709551615, 856, 864, 843, 851, 155, 156, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, 18446744073709551615, 18446744073709551615, 865, 871, 852, 858, 156, 158, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, 18446744073709551615, 18446744073709551615, 872, 886, 859, 873, 158, 160, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, 18446744073709551615, 18446744073709551615, 892, 910, 879, 897, 162, 165, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, 18446744073709551615, 18446744073709551615, 916, 928, 903, 915, 167, 169, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, 18446744073709551615, 18446744073709551615, 929, 931, 916, 918, 169, 170, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, 18446744073709551615, 18446744073709551615, 962, 1384, 949, 1371, 176, 254, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, 18446744073709551615, 18446744073709551615, 966, 991, 953, 978, 177, 180, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, 18446744073709551615, 18446744073709551615, 992, 1020, 979, 1007, 180, 187, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, 18446744073709551615, 18446744073709551615, 998, 1000, 985, 987, 182, 183, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, 18446744073709551615, 18446744073709551615, 1007, 1019, 994, 1006, 184, 186, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, 18446744073709551615, 18446744073709551615, 1021, 1025, 1008, 1012, 187, 188, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, 18446744073709551615, 18446744073709551615, 1028, 1036, 1015, 1023, 189, 190, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, 18446744073709551615, 18446744073709551615, 1037, 1041, 1024, 1028, 190, 191, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, 18446744073709551615, 18446744073709551615, 1042, 1044, 1029, 1031, 191, 192, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, 18446744073709551615, 18446744073709551615, 1045, 1052, 1032, 1039, 192, 193, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, 18446744073709551615, 18446744073709551615, 1057, 1072, 1044, 1059, 194, 199, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, 18446744073709551615, 18446744073709551615, 1058, 1065, 1045, 1052, 195, 196, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, 18446744073709551615, 18446744073709551615, 1066, 1071, 1053, 1058, 196, 198, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, 18446744073709551615, 18446744073709551615, 1077, 1081, 1064, 1068, 200, 201, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, 18446744073709551615, 18446744073709551615, 1084, 1100, 1071, 1087, 202, 204, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, 18446744073709551615, 18446744073709551615, 1101, 1103, 1088, 1090, 204, 205, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, 18446744073709551615, 18446744073709551615, 1104, 1108, 1091, 1095, 205, 206, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, 18446744073709551615, 18446744073709551615, 1109, 1111, 1096, 1098, 206, 207, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, 18446744073709551615, 18446744073709551615, 1112, 1119, 1099, 1106, 207, 208, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, 18446744073709551615, 18446744073709551615, 1120, 1122, 1107, 1109, 208, 209, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, 18446744073709551615, 18446744073709551615, 1123, 1125, 1110, 1112, 209, 210, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, 18446744073709551615, 18446744073709551615, 1126, 1133, 1113, 1120, 210, 211, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, 18446744073709551615, 18446744073709551615, 1134, 1145, 1121, 1132, 211, 212, true, "2023.[5][8]", "2023.[5][8]"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, 18446744073709551615, 18446744073709551615, 1153, 1155, 1140, 1142, 213, 214, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 101756270285429158, 6309445736017161690, 18446744073709551615, 18446744073709551615, 1158, 1192, 1145, 1179, 215, 218, true, "unitary semi-presidential republic", "unitary semi-presidential republic"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, 18446744073709551615, 18446744073709551615, 1166, 1183, 1153, 1170, 216, 217, true, "semi-presidential", "semi-presidential"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, 18446744073709551615, 18446744073709551615, 1193, 1197, 1180, 1184, 218, 219, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, 18446744073709551615, 18446744073709551615, 1202, 1209, 1189, 1196, 220, 221, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, 18446744073709551615, 18446744073709551615, 1210, 1212, 1197, 1199, 221, 222, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, 18446744073709551615, 18446744073709551615, 1213, 1218, 1200, 1205, 222, 223, true, "Paris", "Paris"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7060736712546470087, 14254659311922306724, 18446744073709551615, 18446744073709551615, 1224, 1246, 1211, 1233, 225, 228, true, "countrys largest city", "country's largest city"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, 18446744073709551615, 18446744073709551615, 1224, 1233, 1211, 1220, 225, 226, true, "countrys", "country's"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, 18446744073709551615, 18446744073709551615, 1251, 1286, 1238, 1273, 229, 234, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, 18446744073709551615, 18446744073709551615, 1269, 1286, 1256, 1273, 232, 234, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, 18446744073709551615, 18446744073709551615, 1288, 1311, 1275, 1298, 235, 239, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, 18446744073709551615, 18446744073709551615, 1312, 1319, 1299, 1306, 239, 240, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, 18446744073709551615, 18446744073709551615, 1320, 1383, 1307, 1370, 240, 253, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, 18446744073709551615, 18446744073709551615, 1320, 1329, 1307, 1316, 240, 241, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, 18446744073709551615, 18446744073709551615, 1331, 1335, 1318, 1322, 242, 243, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, 18446744073709551615, 18446744073709551615, 1337, 1345, 1324, 1332, 244, 245, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, 18446744073709551615, 18446744073709551615, 1347, 1352, 1334, 1339, 246, 247, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, 18446744073709551615, 18446744073709551615, 1354, 1362, 1341, 1349, 248, 249, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, 18446744073709551615, 18446744073709551615, 1364, 1374, 1351, 1361, 250, 251, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, 18446744073709551615, 18446744073709551615, 1379, 1383, 1366, 1370, 252, 253, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.93], ["semantic", 9818235231875948258, "TEXT", "#", "paragraph", 0.97]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 4522339299074192207, "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, 18446744073709551615, 18446744073709551615, 0, 188, 0, 188, 0, 28, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, 18446744073709551615, 18446744073709551615, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, 18446744073709551615, 18446744073709551615, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, 18446744073709551615, 18446744073709551615, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004679635976, 18446744073709551615, 18446744073709551615, 24, 41, 24, 41, 5, 7, true, "interband pairing", "interband pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, 18446744073709551615, 18446744073709551615, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["term", "enum-term-mark-1", 4522339299074192207, "TEXT", "#", 1.0, 18178792033664231045, 5215905145529509301, 18446744073709551615, 18446744073709551615, 45, 87, 45, 87, 8, 13, true, "two-band s-wave and d-wave superconductors", "two-band s-wave and d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, 18446744073709551615, 18446744073709551615, 45, 53, 45, 53, 8, 9, true, "two-band", "two-band"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, 18446744073709551615, 18446744073709551615, 54, 60, 54, 60, 9, 10, true, "s-wave", "s-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15865120430118694837, 607662791561950043, 18446744073709551615, 18446744073709551615, 65, 87, 65, 87, 11, 13, true, "d-wave superconductors", "d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, 18446744073709551615, 18446744073709551615, 65, 71, 65, 71, 11, 12, true, "d-wave", "d-wave"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, 18446744073709551615, 18446744073709551615, 88, 92, 88, 92, 13, 14, true, "with", "with"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7066208506210013514, 1315102098090612032, 18446744073709551615, 18446744073709551615, 93, 105, 93, 105, 14, 16, true, "D4h symmetry", "D4h symmetry"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, 18446744073709551615, 18446744073709551615, 93, 96, 93, 96, 14, 15, true, "D4h", "D4h"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, 18446744073709551615, 18446744073709551615, 106, 113, 106, 113, 16, 18, true, "in both", "in both"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5172475826427571765, 16752879714615995236, 18446744073709551615, 18446744073709551615, 114, 137, 114, 137, 18, 20, true, "time-reversal invariant", "time-reversal invariant"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, 18446744073709551615, 18446744073709551615, 114, 127, 114, 127, 18, 19, true, "time-reversal", "time-reversal"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, 18446744073709551615, 18446744073709551615, 146, 148, 146, 148, 22, 23, true, "as", "as"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 10193294999568911218, 6331719907444433820, 18446744073709551615, 18446744073709551615, 149, 171, 149, 171, 23, 25, true, "time-reversal symmetry", "time-reversal symmetry"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, 18446744073709551615, 18446744073709551615, 149, 162, 149, 162, 23, 24, true, "time-reversal", "time-reversal"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, 18446744073709551615, 18446744073709551615, 172, 180, 172, 180, 25, 26, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, 18446744073709551615, 18446744073709551615, 181, 187, 181, 187, 26, 27, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, 18446744073709551615, 18446744073709551615, 189, 384, 189, 384, 28, 58, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, 18446744073709551615, 18446744073709551615, 193, 201, 193, 201, 29, 30, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 30, 31, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, 18446744073709551615, 18446744073709551615, 205, 214, 205, 214, 31, 32, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, 18446744073709551615, 18446744073709551615, 215, 244, 215, 244, 32, 35, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, 18446744073709551615, 18446744073709551615, 249, 264, 249, 264, 36, 38, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, 18446744073709551615, 18446744073709551615, 265, 271, 265, 271, 38, 40, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, 18446744073709551615, 18446744073709551615, 272, 286, 272, 286, 40, 41, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, 18446744073709551615, 18446744073709551615, 288, 293, 288, 293, 42, 43, true, "nodes", "nodes"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 3766089650286616147, 5895288868427388531, 18446744073709551615, 18446744073709551615, 294, 309, 294, 309, 43, 45, true, "can (dis)appear", "can (dis)appear"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, 18446744073709551615, 18446744073709551615, 298, 309, 298, 309, 44, 45, true, "(dis)appear", "(dis)appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, 18446744073709551615, 18446744073709551615, 311, 316, 311, 316, 46, 47, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, 18446744073709551615, 18446744073709551615, 322, 327, 322, 327, 49, 50, true, "leave", "leave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106670696871780136, 17807492235586576248, 18446744073709551615, 18446744073709551615, 328, 351, 328, 351, 50, 52, true, "high-symmetry locations", "high-symmetry locations"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, 18446744073709551615, 18446744073709551615, 328, 341, 328, 341, 50, 51, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, 18446744073709551615, 18446744073709551615, 357, 374, 357, 374, 53, 55, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, 18446744073709551615, 18446744073709551615, 375, 383, 375, 383, 55, 57, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, 18446744073709551615, 18446744073709551615, 385, 594, 385, 594, 58, 93, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, 18446744073709551615, 18446744073709551615, 398, 404, 398, 404, 60, 62, true, "in the", "in the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15559186615879240368, 12910915472651789195, 18446744073709551615, 18446744073709551615, 405, 416, 405, 416, 62, 64, true, "d-wave case", "d-wave case"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, 18446744073709551615, 18446744073709551615, 405, 411, 405, 411, 62, 63, true, "d-wave", "d-wave"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, 18446744073709551615, 18446744073709551615, 421, 425, 421, 425, 66, 67, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, 18446744073709551615, 18446744073709551615, 426, 430, 426, 430, 67, 68, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, 18446744073709551615, 18446744073709551615, 440, 454, 440, 454, 70, 72, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, 18446744073709551615, 18446744073709551615, 455, 475, 455, 475, 72, 74, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, 18446744073709551615, 18446744073709551615, 481, 490, 481, 490, 75, 76, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, 18446744073709551615, 18446744073709551615, 491, 498, 491, 498, 76, 77, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, 18446744073709551615, 18446744073709551615, 499, 508, 499, 508, 77, 78, true, "increases", "increases"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 18352755674675419019, 8051640294707098683, 18446744073709551615, 18446744073709551615, 510, 547, 510, 547, 79, 84, true, "flat zero-energy Andreev bound states", "flat zero-energy Andreev bound states"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, 18446744073709551615, 18446744073709551615, 515, 526, 515, 526, 80, 81, true, "zero-energy", "zero-energy"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, 18446744073709551615, 18446744073709551615, 548, 555, 548, 555, 84, 86, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, 18446744073709551615, 18446744073709551615, 560, 570, 560, 570, 87, 88, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, 18446744073709551615, 18446744073709551615, 571, 573, 571, 573, 88, 89, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, 18446744073709551615, 18446744073709551615, 574, 593, 574, 593, 89, 92, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.88], ["semantic", 4522339299074192207, "TEXT", "#", "paragraph", 0.9]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} +{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 9818235231875948258, "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, 18446744073709551615, 18446744073709551615, 0, 177, 0, 164, 0, 35, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, 18446744073709551615, 18446744073709551615, 7, 31, 7, 26, 1, 7, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, 18446744073709551615, 18446744073709551615, 27, 30, 24, 25, 5, 6, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, 18446744073709551615, 18446744073709551615, 48, 63, 43, 58, 10, 12, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, 18446744073709551615, 18446744073709551615, 64, 122, 59, 109, 12, 22, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, 18446744073709551615, 18446744073709551615, 73, 95, 68, 88, 15, 17, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, 18446744073709551615, 18446744073709551615, 96, 106, 89, 98, 17, 19, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, 18446744073709551615, 18446744073709551615, 107, 121, 99, 108, 19, 21, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, 18446744073709551615, 18446744073709551615, 123, 127, 110, 114, 23, 26, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, 18446744073709551615, 18446744073709551615, 124, 126, 111, 113, 24, 25, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, 18446744073709551615, 18446744073709551615, 128, 130, 115, 117, 26, 27, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, 18446744073709551615, 18446744073709551615, 133, 140, 120, 127, 28, 29, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, 18446744073709551615, 18446744073709551615, 141, 158, 128, 145, 29, 31, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, 18446744073709551615, 18446744073709551615, 159, 161, 146, 148, 31, 32, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, 18446744073709551615, 18446744073709551615, 162, 176, 149, 163, 32, 34, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, 18446744073709551615, 18446744073709551615, 170, 176, 157, 163, 33, 34, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, 18446744073709551615, 18446744073709551615, 178, 375, 165, 362, 35, 67, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, 18446744073709551615, 18446744073709551615, 186, 194, 173, 181, 37, 38, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, 18446744073709551615, 18446744073709551615, 195, 211, 182, 198, 38, 40, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, 18446744073709551615, 18446744073709551615, 204, 227, 191, 214, 39, 42, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, 18446744073709551615, 18446744073709551615, 216, 227, 203, 214, 41, 42, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, 18446744073709551615, 18446744073709551615, 228, 234, 215, 221, 42, 44, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, 18446744073709551615, 18446744073709551615, 235, 243, 222, 230, 44, 45, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16827399947339178045, 496128657873109341, 18446744073709551615, 18446744073709551615, 252, 293, 239, 280, 47, 53, true, "Atlantic, Pacific and Indian oceans,[XII]", "Atlantic, Pacific and Indian oceans,[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, 18446744073709551615, 18446744073709551615, 252, 260, 239, 247, 47, 48, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, 18446744073709551615, 18446744073709551615, 262, 269, 249, 256, 49, 50, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 3575373331357445963, 1702692810903063225, 18446744073709551615, 18446744073709551615, 274, 293, 261, 280, 51, 53, true, "Indian oceans,[XII]", "Indian oceans,[XII]"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, 18446744073709551615, 18446744073709551615, 281, 293, 268, 280, 52, 53, true, "oceans,[XII]", "oceans,[XII]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 16381206561323757770, 14007677850696664277, 18446744073709551615, 18446744073709551615, 294, 300, 281, 287, 53, 54, true, "giving", "giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, 18446744073709551615, 18446744073709551615, 308, 314, 295, 301, 56, 58, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, 18446744073709551615, 18446744073709551615, 315, 361, 302, 348, 58, 63, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, 18446744073709551615, 18446744073709551615, 362, 368, 349, 355, 63, 65, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, 18446744073709551615, 18446744073709551615, 369, 374, 356, 361, 65, 66, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, 18446744073709551615, 18446744073709551615, 376, 637, 363, 624, 67, 118, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, 18446744073709551615, 18446744073709551615, 376, 410, 363, 397, 67, 71, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, 18446744073709551615, 18446744073709551615, 389, 395, 376, 382, 68, 69, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, 18446744073709551615, 18446744073709551615, 411, 415, 398, 402, 71, 72, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, 18446744073709551615, 18446744073709551615, 416, 438, 403, 425, 72, 75, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, 18446744073709551615, 18446744073709551615, 439, 445, 426, 432, 75, 77, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, 18446744073709551615, 18446744073709551615, 446, 451, 433, 438, 77, 78, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, 18446744073709551615, 18446744073709551615, 461, 467, 448, 454, 80, 82, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, 18446744073709551615, 18446744073709551615, 492, 498, 479, 485, 86, 88, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, 18446744073709551615, 18446744073709551615, 505, 521, 492, 508, 90, 93, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, 18446744073709551615, 18446744073709551615, 515, 521, 502, 508, 92, 93, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, 18446744073709551615, 18446744073709551615, 522, 528, 509, 515, 93, 95, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, 18446744073709551615, 18446744073709551615, 541, 558, 528, 545, 98, 101, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, 18446744073709551615, 18446744073709551615, 559, 565, 546, 552, 101, 103, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, 18446744073709551615, 18446744073709551615, 566, 571, 553, 558, 103, 104, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, 18446744073709551615, 18446744073709551615, 579, 594, 566, 581, 107, 109, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, 18446744073709551615, 18446744073709551615, 595, 603, 582, 590, 109, 111, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, 18446744073709551615, 18446744073709551615, 619, 625, 606, 612, 113, 115, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, 18446744073709551615, 18446744073709551615, 626, 636, 613, 623, 115, 117, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, 18446744073709551615, 18446744073709551615, 638, 961, 625, 948, 118, 176, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, 18446744073709551615, 18446744073709551615, 642, 659, 629, 646, 119, 121, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, 18446744073709551615, 18446744073709551615, 660, 667, 647, 654, 121, 122, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, 18446744073709551615, 18446744073709551615, 668, 676, 655, 663, 122, 124, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, 18446744073709551615, 18446744073709551615, 677, 682, 664, 669, 124, 125, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, 18446744073709551615, 18446744073709551615, 683, 689, 670, 676, 125, 127, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, 18446744073709551615, 18446744073709551615, 709, 717, 696, 704, 130, 132, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, 18446744073709551615, 18446744073709551615, 736, 742, 723, 729, 134, 136, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, 18446744073709551615, 18446744073709551615, 778, 798, 765, 785, 143, 145, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, 18446744073709551615, 18446744073709551615, 799, 806, 786, 793, 145, 146, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, 18446744073709551615, 18446744073709551615, 807, 820, 794, 807, 146, 148, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, 18446744073709551615, 18446744073709551615, 821, 823, 808, 810, 148, 149, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, 18446744073709551615, 18446744073709551615, 824, 864, 811, 851, 149, 156, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, 18446744073709551615, 18446744073709551615, 839, 851, 826, 838, 152, 154, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, 18446744073709551615, 18446744073709551615, 856, 864, 843, 851, 155, 156, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, 18446744073709551615, 18446744073709551615, 865, 871, 852, 858, 156, 158, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, 18446744073709551615, 18446744073709551615, 872, 886, 859, 873, 158, 160, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, 18446744073709551615, 18446744073709551615, 892, 910, 879, 897, 162, 165, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, 18446744073709551615, 18446744073709551615, 916, 928, 903, 915, 167, 169, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, 18446744073709551615, 18446744073709551615, 929, 931, 916, 918, 169, 170, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, 18446744073709551615, 18446744073709551615, 962, 1384, 949, 1371, 176, 254, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, 18446744073709551615, 18446744073709551615, 966, 991, 953, 978, 177, 180, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, 18446744073709551615, 18446744073709551615, 992, 1020, 979, 1007, 180, 187, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, 18446744073709551615, 18446744073709551615, 998, 1000, 985, 987, 182, 183, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, 18446744073709551615, 18446744073709551615, 1007, 1019, 994, 1006, 184, 186, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, 18446744073709551615, 18446744073709551615, 1021, 1025, 1008, 1012, 187, 188, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, 18446744073709551615, 18446744073709551615, 1028, 1036, 1015, 1023, 189, 190, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, 18446744073709551615, 18446744073709551615, 1037, 1041, 1024, 1028, 190, 191, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, 18446744073709551615, 18446744073709551615, 1042, 1044, 1029, 1031, 191, 192, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, 18446744073709551615, 18446744073709551615, 1045, 1052, 1032, 1039, 192, 193, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, 18446744073709551615, 18446744073709551615, 1057, 1072, 1044, 1059, 194, 199, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, 18446744073709551615, 18446744073709551615, 1058, 1065, 1045, 1052, 195, 196, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, 18446744073709551615, 18446744073709551615, 1066, 1071, 1053, 1058, 196, 198, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, 18446744073709551615, 18446744073709551615, 1077, 1081, 1064, 1068, 200, 201, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, 18446744073709551615, 18446744073709551615, 1084, 1100, 1071, 1087, 202, 204, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, 18446744073709551615, 18446744073709551615, 1101, 1103, 1088, 1090, 204, 205, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, 18446744073709551615, 18446744073709551615, 1104, 1108, 1091, 1095, 205, 206, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, 18446744073709551615, 18446744073709551615, 1109, 1111, 1096, 1098, 206, 207, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, 18446744073709551615, 18446744073709551615, 1112, 1119, 1099, 1106, 207, 208, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, 18446744073709551615, 18446744073709551615, 1120, 1122, 1107, 1109, 208, 209, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, 18446744073709551615, 18446744073709551615, 1123, 1125, 1110, 1112, 209, 210, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, 18446744073709551615, 18446744073709551615, 1126, 1133, 1113, 1120, 210, 211, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, 18446744073709551615, 18446744073709551615, 1134, 1145, 1121, 1132, 211, 212, true, "2023.[5][8]", "2023.[5][8]"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, 18446744073709551615, 18446744073709551615, 1153, 1155, 1140, 1142, 213, 214, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 101756270285429158, 6309445736017161690, 18446744073709551615, 18446744073709551615, 1158, 1192, 1145, 1179, 215, 218, true, "unitary semi-presidential republic", "unitary semi-presidential republic"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, 18446744073709551615, 18446744073709551615, 1166, 1183, 1153, 1170, 216, 217, true, "semi-presidential", "semi-presidential"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, 18446744073709551615, 18446744073709551615, 1193, 1197, 1180, 1184, 218, 219, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, 18446744073709551615, 18446744073709551615, 1202, 1209, 1189, 1196, 220, 221, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, 18446744073709551615, 18446744073709551615, 1210, 1212, 1197, 1199, 221, 222, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, 18446744073709551615, 18446744073709551615, 1213, 1218, 1200, 1205, 222, 223, true, "Paris", "Paris"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7060736712546470087, 14254659311922306724, 18446744073709551615, 18446744073709551615, 1224, 1246, 1211, 1233, 225, 228, true, "countrys largest city", "country's largest city"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, 18446744073709551615, 18446744073709551615, 1224, 1233, 1211, 1220, 225, 226, true, "countrys", "country's"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, 18446744073709551615, 18446744073709551615, 1251, 1286, 1238, 1273, 229, 234, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, 18446744073709551615, 18446744073709551615, 1269, 1286, 1256, 1273, 232, 234, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, 18446744073709551615, 18446744073709551615, 1288, 1311, 1275, 1298, 235, 239, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, 18446744073709551615, 18446744073709551615, 1312, 1319, 1299, 1306, 239, 240, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, 18446744073709551615, 18446744073709551615, 1320, 1383, 1307, 1370, 240, 253, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, 18446744073709551615, 18446744073709551615, 1320, 1329, 1307, 1316, 240, 241, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, 18446744073709551615, 18446744073709551615, 1331, 1335, 1318, 1322, 242, 243, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, 18446744073709551615, 18446744073709551615, 1337, 1345, 1324, 1332, 244, 245, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, 18446744073709551615, 18446744073709551615, 1347, 1352, 1334, 1339, 246, 247, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, 18446744073709551615, 18446744073709551615, 1354, 1362, 1341, 1349, 248, 249, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, 18446744073709551615, 18446744073709551615, 1364, 1374, 1351, 1361, 250, 251, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, 18446744073709551615, 18446744073709551615, 1379, 1383, 1366, 1370, 252, 253, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", "en", 0.93], ["semantic", "text", 0.96]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} +{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 4522339299074192207, "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, 18446744073709551615, 18446744073709551615, 0, 188, 0, 188, 0, 28, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, 18446744073709551615, 18446744073709551615, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, 18446744073709551615, 18446744073709551615, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, 18446744073709551615, 18446744073709551615, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004679635976, 18446744073709551615, 18446744073709551615, 24, 41, 24, 41, 5, 7, true, "interband pairing", "interband pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, 18446744073709551615, 18446744073709551615, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["term", "enum-term-mark-1", 4522339299074192207, "TEXT", "#", 1.0, 18178792033664231045, 5215905145529509301, 18446744073709551615, 18446744073709551615, 45, 87, 45, 87, 8, 13, true, "two-band s-wave and d-wave superconductors", "two-band s-wave and d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, 18446744073709551615, 18446744073709551615, 45, 53, 45, 53, 8, 9, true, "two-band", "two-band"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, 18446744073709551615, 18446744073709551615, 54, 60, 54, 60, 9, 10, true, "s-wave", "s-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15865120430118694837, 607662791561950043, 18446744073709551615, 18446744073709551615, 65, 87, 65, 87, 11, 13, true, "d-wave superconductors", "d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, 18446744073709551615, 18446744073709551615, 65, 71, 65, 71, 11, 12, true, "d-wave", "d-wave"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, 18446744073709551615, 18446744073709551615, 88, 92, 88, 92, 13, 14, true, "with", "with"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7066208506210013514, 1315102098090612032, 18446744073709551615, 18446744073709551615, 93, 105, 93, 105, 14, 16, true, "D4h symmetry", "D4h symmetry"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, 18446744073709551615, 18446744073709551615, 93, 96, 93, 96, 14, 15, true, "D4h", "D4h"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, 18446744073709551615, 18446744073709551615, 106, 113, 106, 113, 16, 18, true, "in both", "in both"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5172475826427571765, 16752879714615995236, 18446744073709551615, 18446744073709551615, 114, 137, 114, 137, 18, 20, true, "time-reversal invariant", "time-reversal invariant"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, 18446744073709551615, 18446744073709551615, 114, 127, 114, 127, 18, 19, true, "time-reversal", "time-reversal"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, 18446744073709551615, 18446744073709551615, 146, 148, 146, 148, 22, 23, true, "as", "as"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 10193294999568911218, 6331719907444433820, 18446744073709551615, 18446744073709551615, 149, 171, 149, 171, 23, 25, true, "time-reversal symmetry", "time-reversal symmetry"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, 18446744073709551615, 18446744073709551615, 149, 162, 149, 162, 23, 24, true, "time-reversal", "time-reversal"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, 18446744073709551615, 18446744073709551615, 172, 180, 172, 180, 25, 26, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, 18446744073709551615, 18446744073709551615, 181, 187, 181, 187, 26, 27, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, 18446744073709551615, 18446744073709551615, 189, 384, 189, 384, 28, 58, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, 18446744073709551615, 18446744073709551615, 193, 201, 193, 201, 29, 30, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 30, 31, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, 18446744073709551615, 18446744073709551615, 205, 214, 205, 214, 31, 32, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, 18446744073709551615, 18446744073709551615, 215, 244, 215, 244, 32, 35, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, 18446744073709551615, 18446744073709551615, 249, 264, 249, 264, 36, 38, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, 18446744073709551615, 18446744073709551615, 265, 271, 265, 271, 38, 40, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, 18446744073709551615, 18446744073709551615, 272, 286, 272, 286, 40, 41, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, 18446744073709551615, 18446744073709551615, 288, 293, 288, 293, 42, 43, true, "nodes", "nodes"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 3766089650286616147, 5895288868427388531, 18446744073709551615, 18446744073709551615, 294, 309, 294, 309, 43, 45, true, "can (dis)appear", "can (dis)appear"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, 18446744073709551615, 18446744073709551615, 298, 309, 298, 309, 44, 45, true, "(dis)appear", "(dis)appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, 18446744073709551615, 18446744073709551615, 311, 316, 311, 316, 46, 47, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, 18446744073709551615, 18446744073709551615, 322, 327, 322, 327, 49, 50, true, "leave", "leave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106670696871780136, 17807492235586576248, 18446744073709551615, 18446744073709551615, 328, 351, 328, 351, 50, 52, true, "high-symmetry locations", "high-symmetry locations"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, 18446744073709551615, 18446744073709551615, 328, 341, 328, 341, 50, 51, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, 18446744073709551615, 18446744073709551615, 357, 374, 357, 374, 53, 55, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, 18446744073709551615, 18446744073709551615, 375, 383, 375, 383, 55, 57, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, 18446744073709551615, 18446744073709551615, 385, 594, 385, 594, 58, 93, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, 18446744073709551615, 18446744073709551615, 398, 404, 398, 404, 60, 62, true, "in the", "in the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15559186615879240368, 12910915472651789195, 18446744073709551615, 18446744073709551615, 405, 416, 405, 416, 62, 64, true, "d-wave case", "d-wave case"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, 18446744073709551615, 18446744073709551615, 405, 411, 405, 411, 62, 63, true, "d-wave", "d-wave"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, 18446744073709551615, 18446744073709551615, 421, 425, 421, 425, 66, 67, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, 18446744073709551615, 18446744073709551615, 426, 430, 426, 430, 67, 68, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, 18446744073709551615, 18446744073709551615, 440, 454, 440, 454, 70, 72, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, 18446744073709551615, 18446744073709551615, 455, 475, 455, 475, 72, 74, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, 18446744073709551615, 18446744073709551615, 481, 490, 481, 490, 75, 76, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, 18446744073709551615, 18446744073709551615, 491, 498, 491, 498, 76, 77, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, 18446744073709551615, 18446744073709551615, 499, 508, 499, 508, 77, 78, true, "increases", "increases"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 18352755674675419019, 8051640294707098683, 18446744073709551615, 18446744073709551615, 510, 547, 510, 547, 79, 84, true, "flat zero-energy Andreev bound states", "flat zero-energy Andreev bound states"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, 18446744073709551615, 18446744073709551615, 515, 526, 515, 526, 80, 81, true, "zero-energy", "zero-energy"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, 18446744073709551615, 18446744073709551615, 548, 555, 548, 555, 84, 86, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, 18446744073709551615, 18446744073709551615, 560, 570, 560, 570, 87, 88, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, 18446744073709551615, 18446744073709551615, 571, 573, 571, 573, 88, 89, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, 18446744073709551615, 18446744073709551615, 574, 593, 574, 593, 89, 92, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", "en", 0.88], ["semantic", "text", 0.99]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} From 6dd246e5165a6c390d6533e003f6aae9d361e360 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Fri, 17 Nov 2023 14:28:15 +0100 Subject: [PATCH 09/22] updating document structure Signed-off-by: Peter Staar --- src/andromeda/nlp/cls/language.h | 8 +- src/andromeda/nlp/cls/semantic.h | 11 +- src/andromeda/tooling/models/base.h | 57 +- .../tooling/structs/elements/prov_element.h | 3 - .../tooling/structs/items/cls/base.h | 1 - .../tooling/structs/items/ent/instance.h | 5 +- src/andromeda/tooling/structs/subjects/base.h | 77 +- .../tooling/structs/subjects/document.h | 185 +- .../structs/subjects/document/doc_maintext.h | 4 +- .../subjects/document/doc_normalisation.h | 84 +- .../tooling/structs/subjects/figure.h | 12 + .../tooling/structs/subjects/table.h | 10 + tests/data/docs/doc_01.nlp.json | 87338 +++++++++++++--- tests/data/texts/references.nlp.jsonl | 4 +- tests/data/texts/semantics.nlp.jsonl | 14 +- tests/data/texts/terms.nlp.jsonl | 4 +- tests/data/texts/test_02A_text_01.jsonl | 2 +- tests/data/texts/test_02B_text_01.jsonl | 2 +- tests/test_nlp.py | 10 +- 19 files changed, 72938 insertions(+), 14893 deletions(-) diff --git a/src/andromeda/nlp/cls/language.h b/src/andromeda/nlp/cls/language.h index ebe07620..78b9b16c 100644 --- a/src/andromeda/nlp/cls/language.h +++ b/src/andromeda/nlp/cls/language.h @@ -229,13 +229,13 @@ namespace andromeda } } - para->properties.emplace_back(para->get_hash(), TEXT, "#/texts/"+std::to_string(ind), + para->properties.emplace_back(para->get_hash(), TEXT, para->get_self_ref(), //"#/texts/"+std::to_string(ind), get_name(), label, conf); para->applied_models.insert(get_key()); - subj.properties.emplace_back(para->get_hash(), TEXT, "#/texts/"+std::to_string(ind), - get_name(), label, conf); - subj.applied_models.insert(get_key()); + //subj.properties.emplace_back(para->get_hash(), TEXT, "#/texts/"+std::to_string(ind), + //get_name(), label, conf); + //subj.applied_models.insert(get_key()); } base_property prop(subj.get_hash(), DOCUMENT, "#", diff --git a/src/andromeda/nlp/cls/semantic.h b/src/andromeda/nlp/cls/semantic.h index 84653474..b60d43ac 100644 --- a/src/andromeda/nlp/cls/semantic.h +++ b/src/andromeda/nlp/cls/semantic.h @@ -364,13 +364,16 @@ namespace andromeda //std::string key = get_key(); - para->properties.emplace_back(para->get_hash(), TEXT, "#/texts/"+std::to_string(ind), + para->properties.emplace_back(para->get_hash(), TEXT, para->get_self_ref(), //"#/texts/"+std::to_string(ind), get_name(), label, conf); para->applied_models.insert(get_key()); - subj.properties.emplace_back(para->get_hash(), TEXT, "#/texts/"+std::to_string(ind), - get_name(), label, conf); - subj.applied_models.insert(get_key()); + //subj.properties.emplace_back(para->get_hash(), TEXT, "#/texts/"+std::to_string(ind), + //get_name(), label, conf); + //subj.applied_models.insert(get_key()); + + //subj.properties.emplace_back(para->get_hash(), TEXT, para->get_self_ref(), + //get_name(), label, conf); } return update_applied_models(subj); diff --git a/src/andromeda/tooling/models/base.h b/src/andromeda/tooling/models/base.h index dcf80d26..36385f74 100644 --- a/src/andromeda/tooling/models/base.h +++ b/src/andromeda/tooling/models/base.h @@ -39,8 +39,12 @@ namespace andromeda virtual bool apply(std::string& text, nlohmann::json& annots) { return false; } virtual bool apply(subject& subj) = 0;// { return false; } - virtual bool apply(subject
& subj) = 0;//{ return false; } + //virtual bool apply(subject
& subj) = 0;//{ return false; } + virtual bool apply(subject
& subj); //{ return false; } + virtual bool apply_on_table_data(subject
& subj) { return false; } + + virtual bool apply(subject
& subj); virtual bool apply(subject& subj); static bool finalise(subject& subj) { return false; } @@ -88,6 +92,44 @@ namespace andromeda return true; } + bool base_nlp_model::apply(subject
& subj) + { + //LOG_S(INFO) << __FUNCTION__ << " (apply on table)"; + + if(not satisfies_dependencies(subj)) + { + return false; + } + + for(auto& caption:subj.captions) + { + //LOG_S(INFO) << __FUNCTION__ << " (apply on table-caption)"; + this->apply(*caption); + } + + this->apply_on_table_data(subj); + + return true; + } + + bool base_nlp_model::apply(subject
& subj) + { + //LOG_S(INFO) << __FUNCTION__ << " (apply on figure)"; + + if(not satisfies_dependencies(subj)) + { + return false; + } + + for(auto& caption:subj.captions) + { + //LOG_S(INFO) << __FUNCTION__ << " (apply on figure-caption)"; + this->apply(*caption); + } + + return true; + } + bool base_nlp_model::apply(subject& subj) { if(not satisfies_dependencies(subj)) @@ -95,16 +137,27 @@ namespace andromeda return false; } + //subj.join_properties_with_texts(); for(auto& text_ptr:subj.texts) { this->apply(*text_ptr); } - + //subj.clear_properties_from_texts(); + + //subj.join_properties_with_tables(); for(auto& table_ptr:subj.tables) { this->apply(*table_ptr); } + //subj.clear_properties_from_tables(); + //subj.join_properties_with_tables(); + for(auto& figure_ptr:subj.figures) + { + this->apply(*figure_ptr); + } + //subj.clear_properties_from_tables(); + return update_applied_models(subj); } diff --git a/src/andromeda/tooling/structs/elements/prov_element.h b/src/andromeda/tooling/structs/elements/prov_element.h index ea75274e..d753fa5c 100644 --- a/src/andromeda/tooling/structs/elements/prov_element.h +++ b/src/andromeda/tooling/structs/elements/prov_element.h @@ -46,9 +46,6 @@ namespace andromeda ind_type get_maintext_ind() { return maintext_ind; } ind_type get_pdforder_ind() { return pdforder_ind; } - //std::string get_path() { return path; } - //void set_path(std::string val) { path = val; } - std::string get_item_ref() { return item_ref; } void set_item_ref(std::string val) { item_ref = val; } diff --git a/src/andromeda/tooling/structs/items/cls/base.h b/src/andromeda/tooling/structs/items/cls/base.h index 96b13a06..366db299 100644 --- a/src/andromeda/tooling/structs/items/cls/base.h +++ b/src/andromeda/tooling/structs/items/cls/base.h @@ -23,7 +23,6 @@ namespace andromeda base_property(hash_type subj_hash, // hash of the subject from which the entity comes subject_name subj_name, std::string subj_path, - //std::string type, model_name model, std::string label, val_type conf); diff --git a/src/andromeda/tooling/structs/items/ent/instance.h b/src/andromeda/tooling/structs/items/ent/instance.h index 86a36e31..4d3ccdbf 100644 --- a/src/andromeda/tooling/structs/items/ent/instance.h +++ b/src/andromeda/tooling/structs/items/ent/instance.h @@ -119,6 +119,10 @@ namespace andromeda std::size_t ctoken_len() { return (ctok_range[1]-ctok_range[0]);} std::size_t wtoken_len() { return (wtok_range[1]-wtok_range[0]);} + hash_type get_subj_hash() const { return subj_hash; } + subject_name get_subj_name() const { return subj_name; } + std::string get_subj_path() const { return subj_path; } + hash_type get_ehash() const { return ehash; } // entity-hash hash_type get_ihash() const { return ihash; } // instance-hash: combination of subj-hash, ent-hash and position @@ -175,7 +179,6 @@ namespace andromeda protected: hash_type subj_hash; // hash of the subject from which the entity comes - subject_name subj_name; std::string subj_path; diff --git a/src/andromeda/tooling/structs/subjects/base.h b/src/andromeda/tooling/structs/subjects/base.h index edf74181..551e2946 100644 --- a/src/andromeda/tooling/structs/subjects/base.h +++ b/src/andromeda/tooling/structs/subjects/base.h @@ -27,9 +27,9 @@ namespace andromeda const static inline std::string hash_lbl = "hash"; //const static inline std::string text_lbl = "text"; - const static inline std::string dloc_lbl = "dloc"; - //const static inline std::string dref_lbl = "dref"; - const static inline std::string jref_lbl = "$ref"; + const static inline std::string dloc_lbl = "dloc"; // location in the document + const static inline std::string sref_lbl = "sref"; // self-reference via path + const static inline std::string jref_lbl = "$ref"; // json-ref convention const static inline std::string name_lbl = "name"; const static inline std::string type_lbl = "type"; @@ -55,6 +55,12 @@ namespace andromeda virtual ~base_subject() {} + std::string get_self_ref(); + void set_self_ref(std::string sref); + + bool is_valid() const { return valid; } + void set_valid(bool val) { this->valid=val; } + static bool set_prov_refs(const nlohmann::json& data, const std::vector >& doc_provs, std::vector >& base_provs); @@ -103,22 +109,28 @@ namespace andromeda std::string key, std::vector >& vals); - public: - + //public: + protected: + bool valid; subject_name name; hash_type hash; // hash of the item hash_type dhash; // hash of the document of the item + protected: + std::string dloc; // location of item in the document # - + std::string sref; + + public: + std::set applied_models; std::vector properties; std::vector instances; std::vector relations; - + //std::vector entities; }; @@ -129,8 +141,9 @@ namespace andromeda hash(-1), dhash(-1), - dloc(""), - + dloc("#"), + sref("#"), + applied_models({}), properties({}), @@ -145,8 +158,9 @@ namespace andromeda hash(-1), dhash(-1), - dloc(""), - + dloc("#"), + sref("#"), + applied_models({}), properties({}), @@ -156,7 +170,7 @@ namespace andromeda base_subject::base_subject(uint64_t dhash, std::string dloc, - subject_name name)://, prov_element& prov): + subject_name name): valid(true), name(name), @@ -164,14 +178,46 @@ namespace andromeda dhash(dhash), dloc(dloc), - + sref("#"), + applied_models({}), properties({}), instances({}), relations({}) - {} + { + auto parts = utils::split(dloc, "#"); + if(parts.size()==2) + { + sref += parts.at(1); + } + else + { + LOG_S(WARNING) << "could not derive sref from dloc: " << dloc; + } + } + + void base_subject::set_self_ref(std::string sref) + { + this->sref = sref; + } + + std::string base_subject::get_self_ref() + { + return sref; + /* + if(dloc=="#") + { + return dloc; + } + + auto parts = utils::split(dloc, "#"); + assert(parts.size()==2); + return ("#"+parts.at(1)); + */ + } + bool base_subject::set_prov_refs(const nlohmann::json& data, const std::vector >& doc_provs, std::vector >& base_provs) @@ -203,7 +249,6 @@ namespace andromeda if(prov!=NULL) { nlohmann::json pref; - //pref[base_subject::jref_lbl] = prov->get_pref(); pref[base_subject::jref_lbl] = prov->get_self_ref(); result.push_back(pref); @@ -243,6 +288,7 @@ namespace andromeda { result[hash_lbl] = hash; result[dloc_lbl] = dloc; + result[sref_lbl] = sref; } if((properties.size()>0) and (filters.size()==0 or filters.count(prps_lbl))) @@ -294,6 +340,7 @@ namespace andromeda { hash = item.value(hash_lbl, hash); dloc = item.value(dloc_lbl, dloc); + sref = item.value(sref_lbl, sref); applied_models.clear(); if(item.count(applied_models_lbl)) diff --git a/src/andromeda/tooling/structs/subjects/document.h b/src/andromeda/tooling/structs/subjects/document.h index 2749d9b1..f46a9c1a 100644 --- a/src/andromeda/tooling/structs/subjects/document.h +++ b/src/andromeda/tooling/structs/subjects/document.h @@ -87,6 +87,12 @@ namespace andromeda void init_provs(); void show_provs(); + void clear_properties_from_texts(); + void clear_properties_from_tables(); + + void join_properties_with_texts(); + void join_properties_with_tables(); + private: void set_dscr(nlohmann::json& data); @@ -260,7 +266,7 @@ namespace andromeda return true; } - + bool subject::from_json(const nlohmann::json& item, const std::vector >& doc_provs) { @@ -450,83 +456,56 @@ namespace andromeda return (valid_props and valid_insts and valid_rels); } - + bool subject::finalise_properties() { - /* - std::map property_total; - std::map, val_type> property_label_mapping; + // only keep document global properties + std::set > doc_properties={}; + + for(auto& prop:properties) + { + doc_properties.insert({prop.get_subj_hash(), prop.get_model()}); + } + for(auto& text:texts) { for(auto& prop:text->properties) { - properties.push_back(prop); - - //std::string mdl = prop.get_type(); - model_name mdl = prop.get_model(); - std::string lbl = prop.get_label(); - - val_type conf = prop.get_conf(); - val_type dst = text->dst; - - if(property_total.count(mdl)==1) - { - property_total[mdl] += dst; - } - else - { - property_total[mdl] = dst; - } - - std::pair key={mdl,lbl}; - if(property_label_mapping.count(key)==1) - { - property_label_mapping[key] += dst*conf; - } - else - { - property_label_mapping[key] = dst*conf; - } - } + std::pair key({prop.get_subj_hash(), prop.get_model()}); + if(doc_properties.count(key)==0) + { + properties.push_back(prop); + } + } + text->properties.clear(); } - properties.clear(); - for(auto itr=property_label_mapping.begin(); itr!=property_label_mapping.end(); itr++) + for(auto& table:tables) { - model_name mdl = (itr->first).first; - itr->second /= (property_total.at(mdl)); - - base_property prop(this->get_hash(), TEXT, "#/texts", - (itr->first).first, (itr->first).second, itr->second); - properties.push_back(prop); - } - - //LOG_S(INFO) << "properties: \n\n" << tabulate(properties); - - std::sort(properties.begin(), properties.end()); - - //LOG_S(INFO) << "properties: \n\n" << tabulate(properties); + for(auto& prop:table->properties) + { + std::pair key({prop.get_subj_hash(), prop.get_model()}); + if(doc_properties.count(key)==0) + { + properties.push_back(prop); + } + } + table->properties.clear(); + } - for(auto itr=properties.begin(); itr!=properties.end(); ) + for(auto& figure:figures) { - auto next = itr; - next++; - - if(itr==properties.end() or next==properties.end()) + for(auto& prop:figure->properties) { - break; - } - else if(itr->get_type()==next->get_type()) - { - properties.erase(next); - } - else - { - itr++; - } - } - */ + std::pair key({prop.get_subj_hash(), prop.get_model()}); + if(doc_properties.count(key)==0) + { + properties.push_back(prop); + } + } + figure->properties.clear(); + } return true; } @@ -619,6 +598,82 @@ namespace andromeda return true; } + void subject::clear_properties_from_texts() + { + for(auto& text:texts) + { + text->properties.clear(); + } + } + + void subject::join_properties_with_texts() + { + clear_properties_from_texts(); + + for(auto& prop:this->properties) + { + std::string path = prop.get_subj_path(); + LOG_S(INFO) << path; + + auto parts = utils::split(path, "/"); + + if(parts.size()<3) + { + continue; + } + + int ind = std::stoi(parts.at(2)); + LOG_S(INFO) << " -> " << ind; + + if(parts.at(1)==texts_lbl and indget_hash()==prop.get_subj_hash()); + texts.at(ind)->properties.push_back(prop); + } + else + {} + } + } + + void subject::clear_properties_from_tables() + { + for(auto& table:tables) + { + table->properties.clear(); + } + } + + void subject::join_properties_with_tables() + { + for(auto& table:tables) + { + table->properties.clear(); + } + + for(auto& prop:this->properties) + { + std::string path = prop.get_subj_path(); + LOG_S(INFO) << path; + + auto parts = utils::split(path, "/"); + if(parts.size()<3) + { + continue; + } + + int ind = std::stoi(parts.at(2)); + LOG_S(INFO) << " -> " << ind; + + if(parts.at(1)==tables_lbl and indget_hash()==prop.get_subj_hash()); + tables.at(ind)->properties.push_back(prop); + } + else + {} + } + } + } #endif diff --git a/src/andromeda/tooling/structs/subjects/document/doc_maintext.h b/src/andromeda/tooling/structs/subjects/document/doc_maintext.h index 146cc0db..6623789f 100644 --- a/src/andromeda/tooling/structs/subjects/document/doc_maintext.h +++ b/src/andromeda/tooling/structs/subjects/document/doc_maintext.h @@ -105,7 +105,7 @@ namespace andromeda (jump_col or jump_page)) { curr->concatenate(next); - next->valid=false; + next->set_valid(false); } } @@ -113,7 +113,7 @@ namespace andromeda auto itr=texts.begin(); while(itr!=texts.end()) { - if((*itr)->valid) + if((*itr)->is_valid()) { itr++; } diff --git a/src/andromeda/tooling/structs/subjects/document/doc_normalisation.h b/src/andromeda/tooling/structs/subjects/document/doc_normalisation.h index 779f26c4..182ea3b7 100644 --- a/src/andromeda/tooling/structs/subjects/document/doc_normalisation.h +++ b/src/andromeda/tooling/structs/subjects/document/doc_normalisation.h @@ -480,27 +480,89 @@ namespace andromeda void doc_normalisation::resolve_paths() { auto& texts = doc.texts; + + auto& footnotes = doc.footnotes; + auto& page_headers = doc.page_headers; + auto& page_footers = doc.page_footers; + auto& other = doc.other; + auto& tables = doc.tables; auto& figures = doc.figures; for(index_type l=0; lset_self_ref(ss.str()); + for(auto& prov:texts.at(l)->provs) { - std::stringstream ss; - ss << "#/" << doc_type::texts_lbl << "/" << l; + prov->set_item_ref(ss.str()); + } + } + for(index_type l=0; lset_self_ref(ss.str()); + + for(auto& prov:footnotes.at(l)->provs) + { + prov->set_item_ref(ss.str()); + } + } + + for(index_type l=0; lset_self_ref(ss.str()); + + for(auto& prov:page_headers.at(l)->provs) + { prov->set_item_ref(ss.str()); } } + for(index_type l=0; lset_self_ref(ss.str()); + + for(auto& prov:page_footers.at(l)->provs) + { + prov->set_item_ref(ss.str()); + } + } + + for(index_type l=0; lset_self_ref(ss.str()); + + for(auto& prov:other.at(l)->provs) + { + prov->set_item_ref(ss.str()); + } + } + for(index_type l=0; lset_self_ref(ss.str()); + for(auto& prov:tables.at(l)->provs) { - std::stringstream ss; - ss << "#/" << doc_type::tables_lbl << "/" << l; - prov->set_item_ref(ss.str()); } @@ -514,17 +576,21 @@ namespace andromeda << doc_type::captions_lbl << "/" << k; prov->set_item_ref(ss.str()); + + tables.at(l)->captions.at(k)->set_self_ref(ss.str()); } } } for(index_type l=0; lset_self_ref(ss.str()); + for(auto& prov:figures.at(l)->provs) { - std::stringstream ss; - ss << "#/" << doc_type::figures_lbl << "/" << l; - prov->set_item_ref(ss.str()); } @@ -538,6 +604,8 @@ namespace andromeda << doc_type::captions_lbl << "/" << k; prov->set_item_ref(ss.str()); + + figures.at(l)->captions.at(k)->set_self_ref(ss.str()); } } } diff --git a/src/andromeda/tooling/structs/subjects/figure.h b/src/andromeda/tooling/structs/subjects/figure.h index 0c02341d..07ae2321 100644 --- a/src/andromeda/tooling/structs/subjects/figure.h +++ b/src/andromeda/tooling/structs/subjects/figure.h @@ -169,6 +169,18 @@ namespace andromeda bool subject
::set_tokens(std::shared_ptr char_normaliser, std::shared_ptr text_normaliser) { + valid = true; + + for(auto& caption:captions) + { + caption->set_tokens(char_normaliser, text_normaliser); + } + + for(auto& footnote:footnotes) + { + footnote->set_tokens(char_normaliser, text_normaliser); + } + return true; } diff --git a/src/andromeda/tooling/structs/subjects/table.h b/src/andromeda/tooling/structs/subjects/table.h index 399d879f..841fbd79 100644 --- a/src/andromeda/tooling/structs/subjects/table.h +++ b/src/andromeda/tooling/structs/subjects/table.h @@ -387,6 +387,16 @@ namespace andromeda { valid = true; + for(auto& caption:captions) + { + caption->set_tokens(char_normaliser, text_normaliser); + } + + for(auto& footnote:footnotes) + { + footnote->set_tokens(char_normaliser, text_normaliser); + } + for(auto& row:data) { for(auto& cell:row) diff --git a/tests/data/docs/doc_01.nlp.json b/tests/data/docs/doc_01.nlp.json index 07a9c770..52989428 100644 --- a/tests/data/docs/doc_01.nlp.json +++ b/tests/data/docs/doc_01.nlp.json @@ -1,8 +1,17 @@ { "applied-models": [ + "cite", + "expression", + "language", + "lapos", "link", + "name", "numval", - "semantic" + "parenthesis", + "quote", + "semantic", + "sentence", + "term" ], "body": [ { @@ -613,6 +622,9 @@ } ], "description": { + "languages": [ + "en" + ], "logs": [ { "agent": "CCS", @@ -635,6 +647,7 @@ "$ref": "#/page-elements/52" } ], + "sref": "#/figures/0/captions/0", "text": "FIGURE1 Schematic of a data flow for the creation of a Knowledge Graph. The data flow consists of three main task types: extraction of document elements (abstracts, paragraphs, tables, figures, etc.), annotation of these elements to detect entities and their relationships and finally aggregation of these entities and their relationships. For every task, we keep complete provenance, such that we can always trace back to a specific document or element that embeds a certain entity or relationship", "text-hash": 12816755167354360565, "type": "caption" @@ -651,6 +664,7 @@ "$ref": "#/page-elements/51" } ], + "sref": "#/figures/0", "type": "figure" }, { @@ -666,6 +680,7 @@ "$ref": "#/page-elements/67" } ], + "sref": "#/figures/1", "type": "figure" }, { @@ -679,6 +694,7 @@ "$ref": "#/page-elements/109" } ], + "sref": "#/figures/2/captions/0", "text": "FIGURE 3 The time-to-solution for k-hop graph traversal for Neo4J and our new graph engine. The results were obtained for the graph500 and twitter benchmark graphs. The 10th and 90th percentiles are represented by the shaded regions; the median is shown by the markers", "text-hash": 9558113653035301733, "type": "caption" @@ -695,6 +711,7 @@ "$ref": "#/page-elements/108" } ], + "sref": "#/figures/2", "type": "figure" }, { @@ -708,6 +725,7 @@ "$ref": "#/page-elements/120" } ], + "sref": "#/figures/3/captions/0", "text": "FIGURE 4 Visual workflow editor for deep queries in the CPS platform. The interface exhibits a left toolbar to pick specific graph operations, a main drawing area for the workflow DAG and a right panel to inspect and define parameters of each graph operation. Colors indicate different operation types such as input node-retrieval (blue), traversal (red), logical operators (green) and transform functions (yellow). Valid workflows can be executed using the ' play ' button", "text-hash": 12590315652817418422, "type": "caption" @@ -724,6 +742,7 @@ "$ref": "#/page-elements/119" } ], + "sref": "#/figures/3", "type": "figure" }, { @@ -739,6 +758,7 @@ "$ref": "#/page-elements/130" } ], + "sref": "#/figures/4", "type": "figure" }, { @@ -752,6 +772,7 @@ "$ref": "#/page-elements/148" } ], + "sref": "#/figures/5/captions/0", "text": "FIGURE5 The architectural design of the CPS platform. On the left, we show the data flow processing architecture orchestrated through an asynchronous REST API. On the right, we sketch the multitenant KG serving facility which provides a dedicated environment for each project", "text-hash": 1256907401557265619, "type": "caption" @@ -768,6 +789,7 @@ "$ref": "#/page-elements/147" } ], + "sref": "#/figures/5", "type": "figure" }, { @@ -781,6 +803,7 @@ "$ref": "#/page-elements/158" } ], + "sref": "#/figures/6/captions/0", "text": "FIGURE 6 Sketch of the entire pipeline to perform deep data exploration on large corpora", "text-hash": 10669134213704159562, "type": "caption" @@ -797,6 +820,7 @@ "$ref": "#/page-elements/157" } ], + "sref": "#/figures/6", "type": "figure" }, { @@ -810,6 +834,7 @@ "$ref": "#/page-elements/174" } ], + "sref": "#/figures/7/captions/0", "text": "FIGURE 7 The evaluation workflow to identify the petroleum system elements (PSE) in an article and infer its properties. It starts by searching for all petroleum system elements of a certain type (eg, source, reservoir or seal) and a particular report (worktasks 1 and 2). By successive graph traversals (worktasks 3-5, 7-9, 11, 12) along specific edges and logical operations (worktasks 6, 10, 13, 14), we are able to obtain a list of candidate formations (worktask 15), ages (worktask 16) and rocks (worktask 17), ranked by their accumulated weight. Execution of this query takes less than 18 ms on average", "text-hash": 2397375916393726887, "type": "paragraph" @@ -826,6 +851,7 @@ "$ref": "#/page-elements/173" } ], + "sref": "#/figures/7", "type": "figure" }, { @@ -841,6 +867,7 @@ "$ref": "#/page-elements/217" } ], + "sref": "#/figures/8", "type": "figure" } ], @@ -854,6 +881,7 @@ "$ref": "#/page-elements/19" } ], + "sref": "#/footnotes/0", "text": "This is an open access article under the terms of the Creative Commons Attribution License, which permits use, distribution and reproduction in any medium, provided the original work is properly cited.", "text-hash": 11226800603937609484, "type": "footnote" @@ -867,6 +895,7 @@ "$ref": "#/page-elements/20" } ], + "sref": "#/footnotes/1", "text": "\u00a9 2020 The Authors. Applied AI Letters published by John Wiley & Sons Ltd.", "text-hash": 2671219352918255461, "type": "footnote" @@ -875,6 +904,27 @@ "hash": 18446744073709551615, "instances": { "data": [ + [ + "name", + "person-name", + 16781763356419781679, + "TEXT", + "#/texts/2", + 1.0, + 4686361850733567621, + 14538190648130419824, + 18446744073709551615, + 18446744073709551615, + 0, + 17, + 0, + 17, + 0, + 6, + true, + "Peter W J Staar", + "Peter W. J. Staar" + ], [ "numval", "ival", @@ -939,2437 +989,2416 @@ "taa@zurich.ibm.com" ], [ - "numval", - "ival", - 11913688961435238004, + "name", + "person-name", + 4017434568255781081, "TEXT", - "#/texts/13", + "#/texts/8", 1.0, - 17767354399704235161, - 9682837417262995739, + 9807900919297989315, + 8857913618678092312, 18446744073709551615, 18446744073709551615, 0, - 1, + 32, 0, - 1, + 32, 0, - 1, + 7, true, - "1", - "1" + "Correspondence Peter W J Staar", + "Correspondence Peter W. J. Staar" ], [ - "numval", - "year", - 9977041563469582014, + "sentence", + "", + 4017434568255781081, "TEXT", - "#/texts/14", + "#/texts/8", 1.0, - 389609625548777059, - 17632943630740203190, + 1463783400548512489, + 4562795260271874000, 18446744073709551615, 18446744073709551615, - 6, - 10, - 6, - 10, - 2, - 3, + 0, + 95, + 0, + 95, + 0, + 19, true, - "2015", - "2015" + "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland.", + "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland." ], [ - "numval", - "fval", - 9977041563469582014, + "term", + "single-term", + 4017434568255781081, "TEXT", - "#/texts/14", + "#/texts/8", 1.0, - 12178341415896439105, - 13434398423091096866, + 16114797969310195405, + 3117232298322129099, 18446744073709551615, 18446744073709551615, - 44, - 47, - 44, - 47, - 9, + 34, + 46, + 34, + 46, + 8, 10, true, - "2.7", - "2.7" + "IBM Research", + "IBM Research" ], [ - "numval", - "ival", - 4361549266817300114, + "term", + "single-term", + 4017434568255781081, "TEXT", - "#/texts/15", + "#/texts/8", 1.0, - 17767354399704235162, - 7526268954444592619, + 497725968887992147, + 15543972956793692858, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 48, + 61, + 48, + 61, + 11, + 12, true, - "2", - "2" + "Saumerstrasse", + "Saumerstrasse" ], [ - "numval", - "ival", - 4361549266817300114, + "term", + "single-term", + 4017434568255781081, "TEXT", - "#/texts/15", + "#/texts/8", 1.0, - 15441160910541481979, - 7890928592616001252, + 13928399879966460166, + 8300371900188525862, 18446744073709551615, 18446744073709551615, - 3, - 5, - 3, - 5, - 2, - 3, + 70, + 81, + 70, + 81, + 15, + 16, true, - "15", - "15" + "Rueschlikon", + "Rueschlikon" ], [ - "numval", - "ival", - 8425126282903547933, + "term", + "single-term", + 4017434568255781081, "TEXT", - "#/texts/16", + "#/texts/8", 1.0, - 17767354399704235161, - 14071188586038459490, + 2664439525053388608, + 478252263928496257, 18446744073709551615, 18446744073709551615, - 77, - 78, - 77, - 78, - 15, - 16, + 83, + 94, + 83, + 94, + 17, + 18, true, - "1", - "1" + "Switzerland", + "Switzerland" ], [ - "numval", - "year", - 14190244699299580163, + "term", + "single-term", + 4017434568255781081, "TEXT", - "#/texts/21", + "#/texts/8", 1.0, - 389609625548777062, - 16322066304153845812, + 329104147796246645, + 810864344826152709, 18446744073709551615, 18446744073709551615, - 40, - 44, - 40, - 44, - 9, - 10, + 96, + 101, + 96, + 101, + 19, + 20, true, - "2010", - "2010" + "Email", + "Email" ], [ - "numval", - "ival", - 1118972765223422660, + "parenthesis", + "round brackets", + 11695737263227886476, "TEXT", - "#/texts/27", + "#/texts/10", 1.0, - 17767354399704235161, - 16395526852875690261, + 329104053210154735, + 17075323869805573137, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 601, + 606, + 601, + 606, + 99, + 102, true, - "1", - "1" + "(CPS)", + "(CPS)" ], [ - "numval", - "ival", - 324023167304456371, + "expression", + "common", + 11695737263227886476, "TEXT", - "#/texts/28", + "#/texts/10", 1.0, - 17767354399704235162, - 964743056782930174, + 12178341415895450733, + 9671099957403583579, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 358, + 362, + 358, + 362, + 58, + 59, true, - "2", - "2" + "etc", + "etc." ], [ - "numval", - "ival", - 324023167304456371, + "expression", + "word-concatenation", + 11695737263227886476, "TEXT", - "#/texts/28", + "#/texts/10", 1.0, - 17767354399704235161, - 964743056733707724, + 5044385734724420019, + 12757516288413416407, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 5, - 6, + 821, + 837, + 821, + 837, + 140, + 141, true, - "1", - "1" + "state-of-the-art", + "state-of-the-art" ], [ - "numval", - "ival", - 4651508276868765576, + "expression", + "word-concatenation", + 11695737263227886476, "TEXT", - "#/texts/29", + "#/texts/10", 1.0, - 17767354399704235163, - 12716136939749916250, + 6165987369755118397, + 2933533005804678612, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 1539, + 1548, + 1539, + 1548, + 245, + 246, true, - "3", - "3" + "endto-end", + "endto-end" ], [ - "numval", - "ival", - 3052020526349962744, + "expression", + "word-concatenation", + 11695737263227886476, "TEXT", - "#/texts/30", + "#/texts/10", 1.0, - 17767354399704235162, - 4099649421554807498, + 15984801488078789848, + 11443881616252239060, 18446744073709551615, 18446744073709551615, - 498, - 499, - 498, - 499, - 85, - 86, + 1573, + 1583, + 1573, + 1583, + 250, + 251, true, - "2", - "2" + "real-world", + "real-world" ], [ - "numval", - "ival", - 6725501529910185390, + "sentence", + "", + 11695737263227886476, "TEXT", - "#/texts/31", + "#/texts/10", 1.0, - 17767354399704235163, - 14253331712813347451, + 2370655382906505271, + 8040324972313183116, 18446744073709551615, 18446744073709551615, - 171, - 172, - 171, - 172, - 28, - 29, + 0, + 123, + 0, + 123, + 0, + 21, true, - "3", - "3" + "Knowledge Graphs have been fast emerging as the de facto standard to model and explore knowledge in weakly structured data.", + "Knowledge Graphs have been fast emerging as the de facto standard to model and explore knowledge in weakly structured data." ], [ - "numval", - "ival", - 6725501529910185390, + "sentence", + "", + 11695737263227886476, "TEXT", - "#/texts/31", + "#/texts/10", 1.0, - 17767354399704235156, - 14253331712656803661, + 8027272490911089522, + 7702147940060331105, 18446744073709551615, 18446744073709551615, - 201, - 202, - 201, - 202, - 35, - 36, + 124, + 261, + 124, + 261, + 21, + 43, true, - "4", - "4" + "Large corpora of documents constitute a source of weakly structured data of particular interest for both the academic and business world.", + "Large corpora of documents constitute a source of weakly structured data of particular interest for both the academic and business world." ], [ - "numval", - "ival", - 14814111183601762276, + "sentence", + "", + 11695737263227886476, "TEXT", - "#/texts/32", + "#/texts/10", 1.0, - 17767354399704235162, - 3186926300182333312, + 17559485512387879488, + 4960776794400025005, 18446744073709551615, 18446744073709551615, - 152, - 153, - 152, - 153, - 28, - 29, + 262, + 469, + 262, + 469, + 43, + 76, true, - "2", - "2" + "Key examples include scientific publications, technical reports, manuals, patents, regulations, etc. Such corpora embed many facts that are elementary to critical decision making or enabling new discoveries.", + "Key examples include scientific publications, technical reports, manuals, patents, regulations, etc. Such corpora embed many facts that are elementary to critical decision making or enabling new discoveries." ], [ - "numval", - "ival", - 14814111183601762276, + "sentence", + "", + 11695737263227886476, "TEXT", - "#/texts/32", + "#/texts/10", 1.0, - 17767354399704235163, - 3186926300062863412, + 3570937525268539532, + 11346171061679122962, 18446744073709551615, 18446744073709551615, - 251, - 252, - 251, - 252, - 48, - 49, + 470, + 607, + 470, + 607, + 76, + 103, true, - "3", - "3" + "In this paper, we present a scalable cloud platform to create and serve Knowledge Graphs, which we named corpus processing service (CPS).", + "In this paper, we present a scalable cloud platform to create and serve Knowledge Graphs, which we named corpus processing service (CPS)." ], [ - "numval", - "year", - 18391264192891079539, + "sentence", + "", + 11695737263227886476, "TEXT", - "#/texts/33", + "#/texts/10", 1.0, - 389609625548777262, - 8826555294676663632, + 13906125717568729148, + 4155905020420410366, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 608, + 793, + 608, + 793, + 103, + 134, true, - "2020", - "2020" + "Its purpose is to process large document corpora, extract the content and embedded facts, and ultimately represent these in a consistent knowledge graph that can be intuitively queried.", + "Its purpose is to process large document corpora, extract the content and embedded facts, and ultimately represent these in a consistent knowledge graph that can be intuitively queried." ], [ - "numval", - "year", - 18391264192891079539, + "sentence", + "", + 11695737263227886476, "TEXT", - "#/texts/33", + "#/texts/10", 1.0, - 389609625548777251, - 8826555296349648778, + 7674845204641058037, + 6672554339198903999, 18446744073709551615, 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 794, + 1004, + 794, + 1004, + 134, + 162, true, - "2023", - "2023" + "To accomplish this, we use state-of-the-art natural language understanding models to extract entities and relationships from documents converted with our previously presented corpus conversion service platform.", + "To accomplish this, we use state-of-the-art natural language understanding models to extract entities and relationships from documents converted with our previously presented corpus conversion service platform." ], [ - "numval", - "fval", - 18391264192891079539, + "sentence", + "", + 11695737263227886476, "TEXT", - "#/texts/33", + "#/texts/10", 1.0, - 8104408072666212335, - 13552219042525319352, + 3532957815608940811, + 14429112738710635391, 18446744073709551615, 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 1005, + 1171, + 1005, + 1171, + 162, + 185, true, - "10.1002", - "10.1002" + "This pipeline is complemented with a newly developed graph engine which ensures extremely performant graph queries and provides powerful graph analytics capabilities.", + "This pipeline is complemented with a newly developed graph engine which ensures extremely performant graph queries and provides powerful graph analytics capabilities." ], [ - "numval", - "fval", - 18391264192891079539, + "sentence", + "", + 11695737263227886476, "TEXT", - "#/texts/33", + "#/texts/10", 1.0, - 389609625548868096, - 8826558551385119058, + 9674378140136415946, + 14302529272335550558, 18446744073709551615, 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 1172, + 1256, + 1172, + 1256, + 185, + 199, true, - "2.20", - "2.20" + "Both components are tightly integrated and can be easily consumed through REST APIs.", + "Both components are tightly integrated and can be easily consumed through REST APIs." ], [ - "numval", - "ival", - 18391264192891079539, + "sentence", + "", + 11695737263227886476, "TEXT", - "#/texts/33", + "#/texts/10", 1.0, - 14654386914267794441, - 12796143052106760105, + 4006066418266254732, + 8099847092788323681, 18446744073709551615, 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 1257, + 1391, + 1257, + 1391, + 199, + 220, true, - "26895595", - "26895595" + "Additionally, we provide user interfaces to control the data ingestion flow and formulate queries using a visual programming approach.", + "Additionally, we provide user interfaces to control the data ingestion flow and formulate queries using a visual programming approach." ], [ - "numval", - "ival", - 18391264192891079539, + "sentence", + "", + 11695737263227886476, "TEXT", - "#/texts/33", + "#/texts/10", 1.0, - 17767354399704235162, - 7753390158484899261, + 6036810454349605181, + 11082321410160481613, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 4, - 5, + 1392, + 1487, + 1392, + 1487, + 220, + 235, true, - "2", - "2" + "The CPS platform is designed as a modular microservice system operating on Kubernetes clusters.", + "The CPS platform is designed as a modular microservice system operating on Kubernetes clusters." ], [ - "numval", - "ival", - 18391264192891079539, + "sentence", + "", + 11695737263227886476, "TEXT", - "#/texts/33", + "#/texts/10", 1.0, - 15441160910541481791, - 3518619573290839093, + 9891169339298843383, + 12261378132459206353, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 1488, + 1624, + 1488, + 1624, + 235, + 259, true, - "23", - "23" + "Finally, we validate the quality of queries on our endto-end knowledge pipeline in a real-world application in the oil and gas industry.", + "Finally, we validate the quality of queries on our endto-end knowledge pipeline in a real-world application in the oil and gas industry." ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "enum-term-mark-2", + 11695737263227886476, "TEXT", - "#/texts/33", + "#/texts/10", 1.0, - 15441160910541481543, - 3518617976696906498, + 848781837929279741, + 6552561416683889377, 18446744073709551615, 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 1603, + 1623, + 1603, + 1623, + 254, + 258, true, - "08", - "08" + "oil and gas industry", + "oil and gas industry" ], [ - "link", - "url", - 18391264192891079539, + "term", + "enum-term-mark-3", + 11695737263227886476, "TEXT", - "#/texts/33", + "#/texts/10", 1.0, - 8536069645534292969, - 16063604623463467342, + 13335488353876392384, + 4272651448967908962, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 887, + 913, + 887, + 913, + 147, + 150, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "entities and relationships", + "entities and relationships" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/33", + "#/texts/10", 1.0, - 594099663775968682, - 14698211805947073928, + 1360625753915430118, + 621484166069441108, 18446744073709551615, 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 0, + 16, + 0, + 16, + 0, + 2, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "Knowledge Graphs", + "Knowledge Graphs" ], [ - "link", - "doi", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/33", + "#/texts/10", 1.0, - 1697220653346092555, - 8458710314769009562, + 4284121007729982956, + 6369572950383215681, 18446744073709551615, 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 124, + 137, + 124, + 137, + 21, + 23, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "Large corpora", + "Large corpora" ], [ - "numval", - "ival", - 4361549266681704196, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/34", + "#/texts/10", 1.0, - 17767354399704235163, - 165380245946403556, + 6424219500556179945, + 17076507051079708317, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 200, + 219, + 200, + 219, + 33, + 35, true, - "3", - "3" + "particular interest", + "particular interest" ], [ - "numval", - "ival", - 4361549266681704196, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/34", + "#/texts/10", 1.0, - 15441160910541481979, - 10132017072037949157, + 16404072464569347441, + 1083230896895154556, 18446744073709551615, 18446744073709551615, - 3, - 5, - 3, - 5, - 2, - 3, + 246, + 260, + 246, + 260, + 40, + 42, true, - "15", - "15" + "business world", + "business world" ], [ - "numval", - "ival", - 8043608144162608258, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/35", + "#/texts/10", 1.0, - 17767354399704235156, - 18342724908476302885, + 9425695345662332688, + 13993422688910449662, 18446744073709551615, 18446744073709551615, - 62, - 63, - 62, - 63, - 13, - 14, + 262, + 274, + 262, + 274, + 43, + 45, true, - "4", - "4" + "Key examples", + "Key examples" ], [ - "numval", - "ival", - 8043608144162608258, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/35", + "#/texts/10", 1.0, - 17767354399704235157, - 18342724908489108597, + 11722762610575282493, + 14312279897687610118, 18446744073709551615, 18446744073709551615, - 174, - 175, - 174, - 175, - 35, - 36, + 283, + 306, + 283, + 306, + 46, + 48, true, - "5", - "5" + "scientific publications", + "scientific publications" ], [ - "numval", - "ival", - 7159467829896778939, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/36", + "#/texts/10", 1.0, - 17767354399704235162, - 7924620771043007977, + 11290362013569613366, + 11245069165073242782, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 308, + 325, + 308, + 325, + 49, + 51, true, - "2", - "2" + "technical reports", + "technical reports" ], [ - "numval", - "ival", - 3276490574487379366, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/38", + "#/texts/10", 1.0, - 17767354399704235161, - 7431448323123128102, + 2954114896893293510, + 11151995989589581021, 18446744073709551615, 18446744073709551615, - 390, - 391, - 390, - 391, - 66, - 67, + 363, + 375, + 363, + 375, + 59, + 61, true, - "1", - "1" + "Such corpora", + "Such corpora" ], [ - "numval", - "fval", - 3367451956962330174, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/39", + "#/texts/10", 1.0, - 12178341415896439119, - 1493266672212178244, + 8380325310516057337, + 2077781812504009930, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 382, + 392, + 382, + 392, + 62, + 64, true, - "2.1", - "2.1" + "many facts", + "many facts" ], [ - "numval", - "ival", - 5509744459704235873, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/40", + "#/texts/10", 1.0, - 17767354399704235161, - 13327421909992595494, + 12137975992667888681, + 15723683980352691945, 18446744073709551615, 18446744073709551615, - 10, - 11, - 10, - 11, - 2, - 3, + 416, + 440, + 416, + 440, + 68, + 71, true, - "1", - "1" + "critical decision making", + "critical decision making" ], [ - "numval", - "ival", - 5509744459704235873, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/40", + "#/texts/10", 1.0, - 17767354399704235161, - 13327421909992715764, + 13137373831138315414, + 4250119513472996319, 18446744073709551615, 18446744073709551615, - 176, - 177, - 176, - 177, - 36, - 37, + 453, + 468, + 453, + 468, + 73, + 75, true, - "1", - "1" + "new discoveries", + "new discoveries" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/41", + "#/texts/10", 1.0, - 389609625548777262, - 8826555294676663632, + 16783306750459274333, + 14988035573203639431, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 498, + 521, + 498, + 521, + 83, + 86, true, - "2020", - "2020" + "scalable cloud platform", + "scalable cloud platform" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/41", + "#/texts/10", 1.0, - 389609625548777251, - 8826555296349648778, + 1360625753915430118, + 621484166069374236, 18446744073709551615, 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 542, + 558, + 542, + 558, + 90, + 92, true, - "2023", - "2023" + "Knowledge Graphs", + "Knowledge Graphs" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/41", + "#/texts/10", 1.0, - 8104408072666212335, - 13552219042525319352, + 12127370583554771998, + 2779705537726115692, 18446744073709551615, 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 575, + 600, + 575, + 600, + 96, + 99, true, - "10.1002", - "10.1002" + "corpus processing service", + "corpus processing service" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/41", + "#/texts/10", 1.0, - 389609625548868096, - 8826558551385119058, + 10668868939620055202, + 14232732675773488092, 18446744073709551615, 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 634, + 656, + 634, + 656, + 108, + 111, true, - "2.20", - "2.20" + "large document corpora", + "large document corpora" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/41", + "#/texts/10", 1.0, - 14654386914267794441, - 12796143052106760105, + 6703089473517255637, + 7569105345513072239, 18446744073709551615, 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 682, + 696, + 682, + 696, + 116, + 118, true, - "26895595", - "26895595" + "embedded facts", + "embedded facts" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/41", + "#/texts/10", 1.0, - 17767354399704235162, - 7753390158484899261, + 10769918443693798117, + 7507791508188846982, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 4, - 5, + 734, + 760, + 734, + 760, + 125, + 128, true, - "2", - "2" + "consistent knowledge graph", + "consistent knowledge graph" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/41", + "#/texts/10", 1.0, - 15441160910541481791, - 3518619573290839093, + 15287552844593740093, + 1294173908174567746, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 821, + 854, + 821, + 854, + 140, + 143, true, - "23", - "23" + "state-of-the-art natural language", + "state-of-the-art natural language" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/41", + "#/texts/10", 1.0, - 15441160910541481543, - 3518617976696906498, + 749951501626157695, + 10874548052056037028, 18446744073709551615, 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 969, + 1003, + 969, + 1003, + 157, + 161, true, - "08", - "08" + "corpus conversion service platform", + "corpus conversion service platform" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/41", + "#/texts/10", 1.0, - 8536069645534292969, - 16063604623463467342, + 2924972194163802578, + 13990249557984784415, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 1058, + 1070, + 1058, + 1070, + 170, + 172, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "graph engine", + "graph engine" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/41", + "#/texts/10", 1.0, - 594099663775968682, - 14698211805947073928, + 16233011764462755709, + 17795450440755478946, 18446744073709551615, 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 1095, + 1119, + 1095, + 1119, + 175, + 178, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "performant graph queries", + "performant graph queries" ], [ - "link", - "doi", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/41", + "#/texts/10", 1.0, - 1697220653346092555, - 8458710314769009562, + 5900427976382706474, + 2020481094135207993, 18446744073709551615, 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 1133, + 1170, + 1133, + 1170, + 180, + 184, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "powerful graph analytics capabilities", + "powerful graph analytics capabilities" ], [ - "numval", - "ival", - 4361549176688508574, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/42", + "#/texts/10", 1.0, - 17767354399704235156, - 7238925036885539838, + 6051789364687046473, + 7657992812587484855, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 1246, + 1255, + 1246, + 1255, + 196, + 198, true, - "4", - "4" + "REST APIs", + "REST APIs" ], [ - "numval", - "ival", - 4361549176688508574, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/42", + "#/texts/10", 1.0, - 15441160910541481979, - 7918922223876958481, + 11099293550303110249, + 14582293107993112691, 18446744073709551615, 18446744073709551615, - 3, - 5, - 3, - 5, - 2, - 3, + 1282, + 1297, + 1282, + 1297, + 203, + 205, true, - "15", - "15" + "user interfaces", + "user interfaces" ], [ - "numval", - "fval", - 12374482891052873875, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/43", + "#/texts/10", 1.0, - 12178341415896439119, - 1298001416237199126, + 12778474509082695823, + 15765869890879381457, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 1313, + 1332, + 1313, + 1332, + 208, + 211, true, - "2.1", - "2.1" + "data ingestion flow", + "data ingestion flow" ], [ - "numval", - "ival", - 12374482891052873875, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/43", + "#/texts/10", 1.0, - 17767354399704235161, - 4264503375288263632, + 17267900621492324657, + 1231638983278729390, 18446744073709551615, 18446744073709551615, - 4, - 5, - 4, - 5, - 2, - 3, + 1363, + 1390, + 1363, + 1390, + 216, + 219, true, - "1", - "1" + "visual programming approach", + "visual programming approach" ], [ - "numval", - "fval", - 6297710299044869343, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/47", + "#/texts/10", 1.0, - 12178341415896439119, - 9338691878670130519, + 12779036928191531604, + 8747945520374767523, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 1396, + 1408, + 1396, + 1408, + 221, + 223, true, - "2.1", - "2.1" + "CPS platform", + "CPS platform" ], [ - "numval", - "ival", - 6297710299044869343, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/47", + "#/texts/10", 1.0, - 17767354399704235162, - 17230475508982970052, + 9541732521997736647, + 11009450277199675260, 18446744073709551615, 18446744073709551615, - 4, - 5, - 4, - 5, - 2, - 3, + 1426, + 1453, + 1426, + 1453, + 227, + 230, true, - "2", - "2" + "modular microservice system", + "modular microservice system" ], [ - "numval", - "fval", - 1150871476689677866, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/49", + "#/texts/10", 1.0, - 12178341415896310341, - 6520357412536397527, + 4315218641775224883, + 11583210972753095337, 18446744073709551615, 18446744073709551615, - 170, - 173, - 168, - 171, - 27, - 28, + 1467, + 1486, + 1467, + 1486, + 232, + 234, true, - "5,6", - "5,6" + "Kubernetes clusters", + "Kubernetes clusters" ], [ - "numval", - "ival", - 1150871476689677866, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/49", + "#/texts/10", 1.0, - 17767354399704235159, - 17919867067928731763, + 15343209773106937885, + 15390203108645422434, 18446744073709551615, 18446744073709551615, - 228, - 229, - 226, - 227, - 42, - 43, + 1539, + 1567, + 1539, + 1567, + 245, + 248, true, - "7", - "7" + "endto-end knowledge pipeline", + "endto-end knowledge pipeline" ], [ - "numval", - "ival", - 1150871476689677866, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/49", + "#/texts/10", 1.0, - 17767354399704235162, - 17919867064012061628, + 8973266897479869153, + 9626503990142682309, 18446744073709551615, 18446744073709551615, - 423, - 424, - 419, - 420, - 77, - 78, + 1573, + 1595, + 1573, + 1595, + 250, + 252, true, - "2", - "2" + "real-world application", + "real-world application" ], [ - "numval", - "fval", - 5163702913945903725, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/50", + "#/texts/10", 1.0, - 12178341415896439119, - 15419364153911617129, + 17613546823892249124, + 1576417016664792020, 18446744073709551615, 18446744073709551615, - 573, - 576, - 572, - 575, - 100, - 101, + 1611, + 1623, + 1611, + 1623, + 256, + 258, true, - "2.1", - "2.1" + "gas industry", + "gas industry" ], [ - "numval", - "ival", - 5163702913945903725, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/50", + "#/texts/10", 1.0, - 17767354399704235161, - 8171001275372472332, + 329104161610777240, + 6943276019110900001, 18446744073709551615, 18446744073709551615, - 11, - 12, - 11, + 69, + 74, + 69, + 74, 12, - 2, - 3, + 13, true, - "1", - "1" + "model", + "model" ], [ - "numval", - "ival", - 5163702913945903725, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/50", + "#/texts/10", 1.0, - 17767354399704235156, - 8171001275288926016, + 6184122545182835014, + 10337587533357109733, 18446744073709551615, 18446744073709551615, - 577, - 578, - 576, - 577, - 102, - 103, + 87, + 96, + 87, + 96, + 15, + 16, true, - "4", - "4" + "knowledge", + "knowledge" ], [ - "numval", - "ival", - 5462319091745771382, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 17767354399704235162, - 11171804972701775781, + 389609625696431489, + 6015166006560019356, 18446744073709551615, 18446744073709551615, - 7, - 8, - 7, - 8, - 1, - 2, - true, - "2", - "2" - ], - [ - "numval", - "ival", - 5462319091745771382, - "TEXT", - "#/texts/51", - 1.0, - 17767354399704235152, - 11171804967920230653, - 18446744073709551615, - 18446744073709551615, - 112, - 113, - 112, - 113, - 18, + 118, + 122, + 118, + 122, 19, + 20, true, - "8", - "8" + "data", + "data" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/52", + "#/texts/10", 1.0, - 389609625548777262, - 8826555294676663632, + 6167933651658664291, + 7017623091478883550, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 141, + 150, + 141, + 150, + 24, + 25, true, - "2020", - "2020" + "documents", + "documents" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/52", + "#/texts/10", 1.0, - 389609625548777251, - 8826555296349648778, + 16381206579112188113, + 11795690975934241678, 18446744073709551615, 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 164, + 170, + 164, + 170, + 27, + 28, true, - "2023", - "2023" + "source", + "source" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/52", + "#/texts/10", 1.0, - 8104408072666212335, - 13552219042525319352, + 389609625696431489, + 6015166006560022281, 18446744073709551615, 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 192, + 196, + 192, + 196, + 31, + 32, true, - "10.1002", - "10.1002" + "data", + "data" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/52", + "#/texts/10", 1.0, - 389609625548868096, - 8826558551385119058, + 8106464587474035829, + 599250081059610946, 18446744073709551615, 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 327, + 334, + 327, + 334, + 52, + 53, true, - "2.20", - "2.20" + "manuals", + "manuals" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/52", + "#/texts/10", 1.0, - 14654386914267794441, - 12796143052106760105, + 8106479143938802112, + 3741013633356507891, 18446744073709551615, 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 336, + 343, + 336, + 343, + 54, + 55, true, - "26895595", - "26895595" + "patents", + "patents" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/52", + "#/texts/10", 1.0, - 17767354399704235162, - 7753390158484899261, + 4973525406703593304, + 5700149770998543624, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 4, - 5, + 345, + 356, + 345, + 356, + 56, + 57, true, - "2", - "2" + "regulations", + "regulations" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/52", + "#/texts/10", 1.0, - 15441160910541481791, - 3518619573290839093, + 329104161668023890, + 6940026313184513359, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 478, + 483, + 478, + 483, + 78, + 79, true, - "23", - "23" + "paper", + "paper" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/52", + "#/texts/10", 1.0, - 15441160910541481543, - 3518617976696906498, + 12178341415896222428, + 9671093415367483529, 18446744073709551615, 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 602, + 605, + 602, + 605, + 100, + 101, true, - "08", - "08" + "CPS", + "CPS" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/52", + "#/texts/10", 1.0, - 8536069645534292969, - 16063604623463467342, + 8106479265948440982, + 351105263671880898, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 612, + 619, + 612, + 619, + 104, + 105, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "purpose", + "purpose" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/52", + "#/texts/10", 1.0, - 594099663775968682, - 14698211805947073928, + 8106398484416916345, + 1095125247314724175, 18446744073709551615, 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 670, + 677, + 670, + 677, + 114, + 115, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "content", + "content" ], [ - "link", - "doi", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/52", + "#/texts/10", 1.0, - 1697220653346092555, - 8458710314769009562, + 16381206567230470443, + 11705694158167462403, 18446744073709551615, 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 869, + 875, + 869, + 875, + 144, + 145, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "models", + "models" ], [ - "numval", - "ival", - 958124839653591304, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/53", + "#/texts/10", 1.0, - 17767354399704235161, - 11087830826518420632, + 14652256560445338257, + 7220701613896570103, 18446744073709551615, 18446744073709551615, - 8, - 9, - 8, - 9, - 1, - 2, + 887, + 895, + 887, + 895, + 147, + 148, true, - "1", - "1" + "entities", + "entities" ], [ - "numval", - "ival", - 958124839653591304, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/53", + "#/texts/10", 1.0, - 17767354399704235152, - 11087830826319423704, + 8279380567349713241, + 5428767239015427768, 18446744073709551615, 18446744073709551615, - 63, - 64, - 63, - 64, - 11, - 12, + 900, + 913, + 900, + 913, + 149, + 150, true, - "8", - "8" + "relationships", + "relationships" ], [ - "numval", - "ival", - 1448405324616602032, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/54", + "#/texts/10", 1.0, - 12178341415896426714, - 14365907824633173416, + 6167933651658664291, + 7017623091478798627, 18446744073709551615, 18446744073709551615, - 503, - 506, - 503, - 506, - 85, - 86, + 919, + 928, + 919, + 928, + 151, + 152, true, - "100", - "100" + "documents", + "documents" ], [ - "numval", - "ival", - 1448405324616602032, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/54", + "#/texts/10", 1.0, - 12178341415896430891, - 14365907899184508224, + 14814125852840540191, + 6714967147835883438, 18446744073709551615, 18446744073709551615, - 507, - 510, - 507, - 510, - 86, - 87, + 1010, + 1018, + 1010, + 1018, + 163, + 164, true, - "000", - "000" + "pipeline", + "pipeline" ], [ - "numval", - "ival", - 1448405324616602032, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/54", + "#/texts/10", 1.0, - 17767354399704235163, - 2072240023181579806, + 2703018952916355661, + 7441154421291581585, 18446744073709551615, 18446744073709551615, - 549, - 550, - 549, - 550, - 93, - 94, + 1177, + 1187, + 1177, + 1187, + 186, + 187, true, - "3", - "3" + "components", + "components" ], [ - "numval", - "ival", - 1448405324616602032, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/54", + "#/texts/10", 1.0, - 17767354399704235163, - 2072240023181582239, + 8106477782290185579, + 13397737841409408978, 18446744073709551615, 18446744073709551615, - 657, - 658, - 657, - 658, - 109, - 110, + 1347, + 1354, + 1347, + 1354, + 213, + 214, true, - "3", - "3" + "queries", + "queries" ], [ - "numval", - "fval", - 2617775076168299948, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/55", + "#/texts/10", 1.0, - 12178341415896439119, - 18028276311967117811, + 8106477781724488761, + 13716403130135035691, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 1513, + 1520, + 1513, + 1520, + 240, + 241, true, - "2.1", - "2.1" + "quality", + "quality" ], [ - "numval", - "ival", - 2617775076168299948, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/55", + "#/texts/10", 1.0, - 17767354399704235163, - 11990453707355571146, + 8106477782290185579, + 13397737841409395272, 18446744073709551615, 18446744073709551615, - 4, - 5, - 4, - 5, - 2, - 3, + 1524, + 1531, + 1524, + 1531, + 242, + 243, true, - "3", - "3" + "queries", + "queries" ], [ - "numval", - "ival", - 13974986056043304735, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/56", + "#/texts/10", 1.0, - 17767354399704235161, - 6534238515883477149, + 12178341415895623363, + 9671102709835951159, 18446744073709551615, 18446744073709551615, - 488, - 489, - 487, - 488, - 88, - 89, + 1603, + 1606, + 1603, + 1606, + 254, + 255, true, - "1", - "1" + "oil", + "oil" ], [ "numval", - "fval", - 5985285694705576020, + "ival", + 11913688961435238004, "TEXT", - "#/texts/57", + "#/texts/13", 1.0, - 12178341415896439119, - 9356899144609064731, + 17767354399704235161, + 9682837417262995739, 18446744073709551615, 18446744073709551615, 0, - 3, + 1, 0, - 3, + 1, 0, 1, true, - "2.1", - "2.1" + "1", + "1" ], [ "numval", - "ival", - 5985285694705576020, + "year", + 9977041563469582014, "TEXT", - "#/texts/57", + "#/texts/14", 1.0, - 17767354399704235156, - 5166044511235843509, + 389609625548777059, + 17632943630740203190, 18446744073709551615, 18446744073709551615, - 4, - 5, - 4, - 5, + 6, + 10, + 6, + 10, 2, 3, true, - "4", - "4" + "2015", + "2015" ], [ "numval", - "ival", - 11235296141350659290, + "fval", + 9977041563469582014, "TEXT", - "#/texts/58", + "#/texts/14", 1.0, - 17767354399704235161, - 6700456192654799825, + 12178341415896439105, + 13434398423091096866, 18446744073709551615, 18446744073709551615, - 145, - 146, - 145, - 146, - 21, - 22, + 44, + 47, + 44, + 47, + 9, + 10, true, - "1", - "1" + "2.7", + "2.7" ], [ - "numval", - "ival", - 11235296141350659290, + "expression", + "common", + 9977041563469582014, "TEXT", - "#/texts/58", + "#/texts/14", 1.0, - 17767354399704235161, - 6700456192654780632, + 12178341415895450733, + 13434388706261344427, 18446744073709551615, 18446744073709551615, - 382, - 383, - 382, - 383, - 65, - 66, + 579, + 583, + 579, + 583, + 97, + 98, true, - "1", - "1" + "etc", + "etc." ], [ - "numval", - "year", - 18391264192891079539, + "expression", + "word-concatenation", + 9977041563469582014, "TEXT", - "#/texts/59", + "#/texts/14", 1.0, - 389609625548777262, - 8826555294676663632, + 8803983102511961753, + 11026648589532064531, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 102, + 114, + 102, + 114, + 19, + 20, true, - "2020", - "2020" + "self-evident", + "self-evident" ], [ - "numval", - "year", - 18391264192891079539, + "expression", + "word-concatenation", + 9977041563469582014, "TEXT", - "#/texts/59", + "#/texts/14", 1.0, - 389609625548777251, - 8826555296349648778, + 8043212133150675222, + 7506328330981893578, 18446744073709551615, 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 475, + 487, + 475, + 487, + 80, + 81, true, - "2023", - "2023" + "ever-growing", + "ever-growing" ], [ - "numval", - "fval", - 18391264192891079539, + "sentence", + "", + 9977041563469582014, "TEXT", - "#/texts/59", + "#/texts/14", 1.0, - 8104408072666212335, - 13552219042525319352, + 9580276197039337323, + 13841174173201944352, 18446744073709551615, 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 0, + 95, + 0, + 95, + 0, + 17, true, - "10.1002", - "10.1002" + "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally.", + "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally." ], [ - "numval", - "fval", - 18391264192891079539, + "sentence", + "", + 9977041563469582014, "TEXT", - "#/texts/59", + "#/texts/14", 1.0, - 389609625548868096, - 8826558551385119058, + 9079004519467152167, + 6860715527459606106, 18446744073709551615, 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 96, + 157, + 96, + 157, + 17, + 28, true, - "2.20", - "2.20" + "It is self-evident that this number has increased ever since.", + "It is self-evident that this number has increased ever since." ], [ - "numval", - "ival", - 18391264192891079539, + "sentence", + "", + 9977041563469582014, "TEXT", - "#/texts/59", + "#/texts/14", 1.0, - 14654386914267794441, - 12796143052106760105, + 12959192130376635610, + 18180244594714576233, 18446744073709551615, 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 158, + 322, + 158, + 322, + 28, + 54, true, - "26895595", - "26895595" + "The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world.", + "The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world." ], [ - "numval", - "ival", - 18391264192891079539, + "sentence", + "", + 9977041563469582014, "TEXT", - "#/texts/59", + "#/texts/14", 1.0, - 17767354399704235162, - 7753390158484899261, + 9260326806510524947, + 14982882722757884571, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 4, - 5, + 323, + 459, + 323, + 459, + 54, + 77, true, - "2", - "2" + "The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings.", + "The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings." ], [ - "numval", - "ival", - 18391264192891079539, + "sentence", + "", + 9977041563469582014, "TEXT", - "#/texts/59", + "#/texts/14", 1.0, - 15441160910541481791, - 3518619573290839093, + 5075859589505957998, + 18360416951435709110, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 460, + 639, + 460, + 639, + 77, + 107, true, - "23", - "23" + "Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable.", + "Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable." ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "enum-term-mark-1", + 9977041563469582014, "TEXT", - "#/texts/59", + "#/texts/14", 1.0, - 15441160910541481543, - 3518617976696906498, + 2327733945986976512, + 16359156217665106996, 18446744073709551615, 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 293, + 321, + 293, + 321, + 49, + 53, true, - "08", - "08" + "academic and corporate world", + "academic and corporate world" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/59", + "#/texts/14", 1.0, - 8536069645534292969, - 16063604623463467342, + 3693395590591757392, + 10750684571354632769, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 48, + 70, + 48, + 70, + 10, + 13, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "trillion PDF documents", + "trillion PDF documents" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/59", + "#/texts/14", 1.0, - 594099663775968682, - 14698211805947073928, + 11551851235882828048, + 8670374056430505501, 18446744073709551615, 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 162, + 178, + 162, + 178, + 29, + 31, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "explosive growth", + "explosive growth" ], [ - "link", - "doi", - 18391264192891079539, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/59", + "#/texts/14", 1.0, - 1697220653346092555, - 8458710314769009562, + 5652441786009596562, + 559076346625196990, 18446744073709551615, 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 214, + 232, + 214, + 232, + 37, + 39, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "digital publishing", + "digital publishing" ], [ - "numval", - "ival", - 4361549266576336732, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/60", + "#/texts/14", 1.0, - 17767354399704235158, - 5655206626033153623, + 11978931670712051192, + 9926895489093501949, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 263, + 280, + 263, + 280, + 44, + 46, true, - "6", - "6" + "serious challenge", + "serious challenge" ], [ - "numval", - "ival", - 4361549266576336732, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/60", + "#/texts/14", 1.0, - 15441160910541481979, - 15406507443958837158, + 7780875503607700578, + 7527213517068304878, 18446744073709551615, 18446744073709551615, - 3, - 5, - 3, - 5, - 2, - 3, + 306, + 321, + 306, + 321, + 51, + 53, true, - "15", - "15" + "corporate world", + "corporate world" ], [ - "numval", - "fval", - 5371685212527510397, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/62", + "#/texts/14", 1.0, - 12178341415896439118, - 9239884836110286517, + 3488136445312217472, + 563560862623828716, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 337, + 353, + 337, + 353, + 56, + 58, true, - "2.2", - "2.2" + "publication rate", + "publication rate" ], [ - "numval", - "ival", - 2929626768872004841, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/64", + "#/texts/14", 1.0, - 17767354399704235161, - 9308892477550455324, + 7863808487922385366, + 10797157915381492366, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 357, + 376, + 357, + 376, + 59, + 61, true, - "1", - "1" + "scientific articles", + "scientific articles" ], [ - "numval", - "ival", - 15879756297712818143, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/65", + "#/texts/14", 1.0, - 17767354399704235162, - 8832343908208005813, + 16667234436856023081, + 17857792665552379798, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 443, + 458, + 443, + 458, + 74, + 76, true, - "2", - "2" + "latest findings", + "latest findings" ], [ - "numval", - "ival", - 16116531546352845311, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/66", + "#/texts/14", 1.0, - 17767354399704235163, - 4307298561096377444, + 5751151653465478259, + 10695870790845961642, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 475, + 494, + 475, + 494, + 80, + 82, true, - "3", - "3" + "ever-growing number", + "ever-growing number" ], [ - "numval", - "ival", - 11590138063543342276, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/69", + "#/texts/14", 1.0, - 17767354399704235163, - 13032776934094914368, + 18216685920424760230, + 8188107583662209298, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 498, + 514, + 498, + 514, + 83, + 85, true, - "3", - "3" + "internal reports", + "internal reports" ], [ - "numval", - "ival", - 5393976293631695754, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/71", + "#/texts/14", 1.0, - 17767354399704235161, - 14832870493709788748, + 10815771517668250054, + 9700260059013966190, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 564, + 577, + 564, + 577, + 94, + 96, true, - "1", - "1" + "court filings", + "court filings" ], [ - "numval", - "ival", - 1988335831916069382, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/72", + "#/texts/14", 1.0, - 17767354399704235162, - 6940844591694806953, + 1324407453055449271, + 13944046065395206963, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 592, + 609, + 592, + 609, + 101, + 103, true, - "2", - "2" + "most corporations", + "most corporations" ], [ - "numval", - "fval", - 5147764798816678886, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/73", + "#/texts/14", 1.0, - 389609625533812191, - 1960551977415557980, + 329104162020590744, + 12542051113387534152, 18446744073709551615, 18446744073709551615, - 409, - 413, - 409, - 413, - 67, - 68, + 12, + 17, + 12, + 17, + 4, + 5, true, - "9,10", - "9,10" + "Adobe", + "Adobe" ], [ - "numval", - "ival", - 5147764798816678886, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/73", + "#/texts/14", 1.0, - 17767354399704235157, - 8804028754113186404, + 15526146950464474214, + 16227659806299083154, 18446744073709551615, 18446744073709551615, - 251, - 252, - 251, - 252, - 40, - 41, + 74, + 85, + 74, + 85, + 14, + 15, true, - "5", - "5" + "circulation", + "circulation" ], [ - "numval", - "fval", - 285583876932865368, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/74", + "#/texts/14", 1.0, - 329104147749426795, - 8846331132305971160, + 16381206574973295053, + 11707971985141737188, 18446744073709551615, 18446744073709551615, - 463, - 468, - 463, - 468, - 84, - 85, + 125, + 131, + 125, + 131, + 22, + 23, true, - "11,12", - "11,12" + "number", + "number" ], [ - "numval", - "ival", - 285583876932865368, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/74", + "#/texts/14", 1.0, - 17767354399704235162, - 13308611356903088115, + 6167933651658664291, + 4272164818252510490, 18446744073709551615, 18446744073709551615, - 657, - 658, - 657, - 658, - 119, - 120, + 182, + 191, + 182, + 191, + 32, + 33, true, - "2", - "2" + "documents", + "documents" ], [ - "numval", - "ival", - 285583876932865368, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/74", + "#/texts/14", 1.0, - 17767354399704235161, - 13308611356852178135, + 8380286976750653797, + 4381452488979706873, 18446744073709551615, 18446744073709551615, - 736, - 737, - 736, - 737, - 136, - 137, + 240, + 250, + 240, + 250, + 40, + 41, true, - "1", - "1" + "mainstream", + "mainstream" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/75", + "#/texts/14", 1.0, - 389609625548777262, - 8826555294676663632, + 5946940610854338392, + 9806611181783168768, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 408, + 417, + 408, + 417, + 67, + 68, true, - "2020", - "2020" + "academics", + "academics" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/75", + "#/texts/14", 1.0, - 389609625548777251, - 8826555296349648778, + 15559940482764101395, + 15782149731383100499, 18446744073709551615, 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 516, + 529, + 516, + 529, + 86, + 87, true, - "2023", - "2023" + "documentation", + "documentation" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/75", + "#/texts/14", 1.0, - 8104408072666212335, - 13552219042525319352, + 8106479143938802112, + 6864834667348376398, 18446744073709551615, 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 531, + 538, + 531, + 538, + 88, + 89, true, - "10.1002", - "10.1002" + "patents", + "patents" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/75", + "#/texts/14", 1.0, - 389609625548868096, - 8826558551385119058, + 5947882010261766213, + 14814557788069949454, 18446744073709551615, 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 540, + 549, + 540, + 549, + 90, + 91, true, - "2.20", - "2.20" + "contracts", + "contracts" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/75", + "#/texts/14", 1.0, - 14654386914267794441, - 12796143052106760105, + 4973525406703593304, + 17896266079288595392, 18446744073709551615, 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 551, + 562, + 551, + 562, + 92, + 93, true, - "26895595", - "26895595" + "regulations", + "regulations" ], [ "numval", "ival", - 18391264192891079539, + 4361549266817300114, "TEXT", - "#/texts/75", + "#/texts/15", 1.0, 17767354399704235162, - 7753390158484899261, + 7526268954444592619, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 4, - 5, + 0, + 1, + 0, + 1, + 0, + 1, true, "2", "2" @@ -3377,6018 +3406,6081 @@ [ "numval", "ival", - 18391264192891079539, + 4361549266817300114, "TEXT", - "#/texts/75", + "#/texts/15", 1.0, - 15441160910541481791, - 3518619573290839093, + 15441160910541481979, + 7890928592616001252, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 3, + 5, + 3, + 5, + 2, + 3, true, - "23", - "23" + "15", + "15" ], [ "numval", "ival", - 18391264192891079539, + 8425126282903547933, "TEXT", - "#/texts/75", + "#/texts/16", 1.0, - 15441160910541481543, - 3518617976696906498, + 17767354399704235161, + 14071188586038459490, 18446744073709551615, 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 77, + 78, + 77, + 78, + 15, + 16, true, - "08", - "08" + "1", + "1" ], [ - "link", - "url", - 18391264192891079539, + "parenthesis", + "round brackets", + 8425126282903547933, "TEXT", - "#/texts/75", + "#/texts/16", 1.0, - 8536069645534292969, - 16063604623463467342, + 329104053210116957, + 13219493015888408584, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 70, + 75, + 70, + 75, + 11, + 14, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "(CCS)", + "(CCS)" ], [ - "link", - "url", - 18391264192891079539, + "parenthesis", + "round brackets", + 8425126282903547933, "TEXT", - "#/texts/75", + "#/texts/16", 1.0, - 594099663775968682, - 14698211805947073928, + 9476133846942776004, + 15562593450704422350, 18446744073709551615, 18446744073709551615, - 156, - 208, - 156, - 208, + 193, + 220, + 193, + 220, + 33, 43, - 58, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "(eg, PDF, Word, and Bitmap)", + "(eg, PDF, Word, and Bitmap)" ], [ - "link", - "doi", - 18391264192891079539, + "parenthesis", + "round brackets", + 8425126282903547933, "TEXT", - "#/texts/75", + "#/texts/16", 1.0, - 1697220653346092555, - 8458710314769009562, + 329104053210154735, + 13219524899201785417, 18446744073709551615, 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 750, + 755, + 750, + 755, + 139, + 142, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "(CPS)", + "(CPS)" ], [ - "numval", - "ival", - 4361549257370278754, + "parenthesis", + "round brackets", + 8425126282903547933, "TEXT", - "#/texts/76", + "#/texts/16", 1.0, - 17767354399704235159, - 18348318207235940730, + 329104053571454679, + 13220133656194459813, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 835, + 840, + 835, + 840, + 158, + 161, true, - "7", - "7" + "(KBs)", + "(KBs)" ], [ - "numval", - "ival", - 4361549257370278754, + "expression", + "word-concatenation", + 8425126282903547933, "TEXT", - "#/texts/76", + "#/texts/16", 1.0, - 15441160910541481979, - 2772124700731079428, + 5044385734724420019, + 12331404767350744597, 18446744073709551615, 18446744073709551615, - 3, - 5, - 3, - 5, - 2, - 3, + 132, + 148, + 132, + 148, + 26, + 27, true, - "15", - "15" + "state-of-the-art", + "state-of-the-art" ], [ - "numval", - "fval", - 13183039880198077038, + "expression", + "word-concatenation", + 8425126282903547933, "TEXT", - "#/texts/77", + "#/texts/16", 1.0, - 12178341415896435198, - 14026574630810798704, + 6187817560337829240, + 2071545417773836371, 18446744073709551615, 18446744073709551615, - 100, - 103, - 100, - 103, - 19, - 20, + 900, + 909, + 900, + 909, + 171, + 172, true, - "3.1", - "3.1" + "in-memory", + "in-memory" ], [ - "numval", - "fval", - 13183039880198077038, + "sentence", + "", + 8425126282903547933, "TEXT", - "#/texts/77", + "#/texts/16", 1.0, - 12178341415896435199, - 14026574630786503486, + 1135552400875089788, + 10398380939440765449, 18446744073709551615, 18446744073709551615, - 154, - 157, - 154, - 157, - 29, - 30, + 0, + 76, + 0, + 76, + 0, + 15, true, - "3.2", - "3.2" + "In a previous publication, we presented the corpus conversion service (CCS).", + "In a previous publication, we presented the corpus conversion service (CCS)." ], [ - "numval", - "fval", - 13183039880198077038, + "sentence", + "", + 8425126282903547933, "TEXT", - "#/texts/77", + "#/texts/16", 1.0, - 12178341415896435196, - 14026574630635842602, + 4277096165456505390, + 9879371965799806231, 18446744073709551615, 18446744073709551615, - 233, - 236, - 233, - 236, - 47, - 48, + 79, + 283, + 79, + 283, + 16, + 53, true, - "3.3", - "3.3" + "The CCS is a scalable cloud service, which leverages state-of-the-art machine learning to convert complex formats (eg, PDF, Word, and Bitmap) into a richly structured JSON representation of their content.", + "The CCS is a scalable cloud service, which leverages state-of-the-art machine learning to convert complex formats (eg, PDF, Word, and Bitmap) into a richly structured JSON representation of their content." ], [ - "numval", - "ival", - 13183039880198077038, + "sentence", + "", + 8425126282903547933, "TEXT", - "#/texts/77", + "#/texts/16", 1.0, - 17767354399704235156, - 5196757730407799108, + 10130417114443229865, + 11965664839000809257, 18446744073709551615, 18446744073709551615, - 211, - 212, - 211, - 212, - 40, - 41, + 284, + 447, + 284, + 447, + 53, + 83, true, - "4", - "4" + "As such, the CCS solves the first problem when confronted with a large corpus of documents, that is, make the content of the documents programmatically accessible.", + "As such, the CCS solves the first problem when confronted with a large corpus of documents, that is, make the content of the documents programmatically accessible." ], [ - "numval", - "fval", - 13428900458866068249, + "sentence", + "", + 8425126282903547933, "TEXT", - "#/texts/78", + "#/texts/16", 1.0, - 12178341415896435198, - 3629736405801839701, + 9678853880541043034, + 16952153608571362238, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 448, + 580, + 448, + 580, + 83, + 108, true, - "3.1", - "3.1" + "Examples of the latter would be ' List all images with their caption from the corpus or list all titles with their publication date.", + "Examples of the latter would be ' List all images with their caption from the corpus or list all titles with their publication date." ], [ - "numval", - "fval", - 1430911655724119030, + "sentence", + "", + 8425126282903547933, "TEXT", - "#/texts/79", + "#/texts/16", 1.0, - 329104147748831777, - 2423845697831217766, + 3508421805124215579, + 4502877190283918832, 18446744073709551615, 18446744073709551615, - 148, - 153, - 148, - 153, - 24, - 25, + 583, + 683, + 583, + 683, + 109, + 128, true, - "13,14", - "13,14" + "The second problem is to obviously search or explore the content of the documents in a large corpus.", + "The second problem is to obviously search or explore the content of the documents in a large corpus." ], [ - "numval", - "ival", - 13770706479324480755, + "sentence", + "", + 8425126282903547933, "TEXT", - "#/texts/80", + "#/texts/16", 1.0, - 15441160910541481977, - 17073318187218057934, + 10024472320233330255, + 7106376215568736379, 18446744073709551615, 18446744073709551615, - 111, - 113, - 111, - 113, - 19, - 20, + 684, + 788, + 684, + 788, + 128, + 150, true, - "13", - "13" + "For this problem, we have developed the corpus processing service (CPS), which we present in this paper.", + "For this problem, we have developed the corpus processing service (CPS), which we present in this paper." ], [ - "numval", - "ival", - 11165481757050847950, + "sentence", + "", + 8425126282903547933, "TEXT", - "#/texts/81", + "#/texts/16", 1.0, - 17767354399704235161, - 16151623650567223960, + 12060287303134868241, + 9438158154487551148, 18446744073709551615, 18446744073709551615, - 36, - 37, - 36, - 37, - 25, - 26, + 789, + 933, + 789, + 933, + 150, + 176, true, - "1", - "1" + "The CPS is intended to create knowledge bases (KBs) from the converted JSON corpus and serve these KBs through in-memory knowledge graph stores.", + "The CPS is intended to create knowledge bases (KBs) from the converted JSON corpus and serve these KBs through in-memory knowledge graph stores." ], [ - "numval", - "ival", - 11165481757050847950, + "sentence", + "", + 8425126282903547933, "TEXT", - "#/texts/81", + "#/texts/16", 1.0, - 17767354399704235160, - 16151623650470238720, + 9526325656046383708, + 8160084158762044158, 18446744073709551615, 18446744073709551615, - 53, - 54, - 53, - 54, - 31, - 32, + 934, + 1082, + 934, + 1082, + 176, + 205, true, - "0", - "0" + "As such, the CPS is the natural extension of the CCS and has as an express purpose to make corpora of documents available for deep data exploration.", + "As such, the CPS is the natural extension of the CCS and has as an express purpose to make corpora of documents available for deep data exploration." ], [ - "numval", - "ival", - 11165481757050847950, + "term", + "enum-term-mark-2", + 8425126282903547933, "TEXT", - "#/texts/81", + "#/texts/16", 1.0, - 17767354399704235162, - 16151623650448785184, + 3983234601812206677, + 13801040943353688387, 18446744073709551615, 18446744073709551615, - 67, - 68, - 67, - 68, - 36, - 37, + 526, + 540, + 526, + 540, + 98, + 101, true, - "2", - "2" + "corpus or list", + "corpus or list" ], [ - "numval", - "ival", - 11165481757050847950, + "term", + "enum-term-mark-4", + 8425126282903547933, "TEXT", - "#/texts/81", + "#/texts/16", 1.0, - 15441160910541481788, - 6320979167967070076, + 7033450248954463440, + 7228398253957921718, 18446744073709551615, 18446744073709551615, - 80, - 82, - 80, - 82, + 198, + 219, + 198, + 219, + 36, 42, - 43, true, - "26", - "26" + "PDF, Word, and Bitmap", + "PDF, Word, and Bitmap" ], [ - "numval", - "ival", - 11165481757050847950, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/81", + "#/texts/16", 1.0, - 17767354399704235161, - 16151623650567217445, + 8265732631932437791, + 2220042094064301042, 18446744073709551615, 18446744073709551615, - 87, - 88, - 86, - 87, - 45, - 46, + 5, + 25, + 5, + 25, + 2, + 4, true, - "1", - "1" + "previous publication", + "previous publication" ], [ - "numval", - "ival", - 9572077971492738329, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/82", + "#/texts/16", 1.0, - 17767354399704235161, - 3519266004279806136, + 17177676392774013037, + 14553703638392357264, 18446744073709551615, 18446744073709551615, - 298, - 299, - 298, - 299, - 53, - 54, + 44, + 69, + 44, + 69, + 8, + 11, true, - "1", - "1" + "corpus conversion service", + "corpus conversion service" ], [ - "numval", - "ival", - 14951391138799557075, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/83", + "#/texts/16", 1.0, - 15441160910541481860, - 1648917876881521913, + 15981076704825208308, + 2475151637876039228, 18446744073709551615, 18446744073709551615, - 46, - 48, - 44, - 46, - 30, - 31, + 92, + 114, + 92, + 114, + 20, + 23, true, - "16", - "16" + "scalable cloud service", + "scalable cloud service" ], [ - "numval", - "ival", - 14951391138799557075, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/83", + "#/texts/16", 1.0, - 15441160910541481861, - 1648917874734449247, + 5631916036570422679, + 18432897231273228407, 18446744073709551615, 18446744073709551615, - 56, - 58, - 54, - 56, - 35, - 36, + 132, + 156, + 132, + 156, + 26, + 28, true, - "17", - "17" + "state-of-the-art machine", + "state-of-the-art machine" ], [ - "numval", - "ival", - 14951391138799557075, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/83", + "#/texts/16", 1.0, - 15441160910541481860, - 1648917876881497232, + 3916377961847074895, + 18044235462591786071, 18446744073709551615, 18446744073709551615, - 67, - 69, - 65, - 67, - 40, - 41, + 177, + 192, + 177, + 192, + 31, + 33, true, - "16", - "16" + "complex formats", + "complex formats" ], [ - "numval", - "ival", - 14951391138799557075, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/83", + "#/texts/16", 1.0, - 15441160910541481861, - 1648917874734455276, + 2385031725262916889, + 756980895935750832, 18446744073709551615, 18446744073709551615, - 78, - 80, - 76, - 78, - 45, - 46, + 246, + 265, + 246, + 265, + 47, + 49, true, - "17", - "17" + "JSON representation", + "JSON representation" ], [ - "numval", - "ival", - 14951391138799557075, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/83", + "#/texts/16", 1.0, - 15441160910541481860, - 1648917876881494293, + 17731846125087813676, + 8455332773724787034, 18446744073709551615, 18446744073709551615, - 89, - 91, - 87, - 89, - 50, - 51, + 312, + 325, + 312, + 325, + 60, + 62, true, - "16", - "16" + "first problem", + "first problem" ], [ - "numval", - "ival", - 14951391138799557075, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/83", + "#/texts/16", 1.0, - 15441160910541481861, - 1648917874734450570, + 13692193161437794645, + 9956670629313084700, 18446744073709551615, 18446744073709551615, - 100, - 102, - 98, - 100, - 55, - 56, + 349, + 361, + 349, + 361, + 66, + 68, true, - "17", - "17" + "large corpus", + "large corpus" ], [ - "numval", - "ival", - 14951391138799557075, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/83", + "#/texts/16", 1.0, - 17767354399704235162, - 10344599291481597093, + 3488136445298151371, + 1226005527782385655, 18446744073709551615, 18446744073709551615, - 109, - 110, - 106, + 563, + 579, + 563, + 579, + 105, 107, - 59, - 60, - true, - "2", - "2" - ], - [ - "numval", - "fval", - 16602156009514813718, - "TEXT", - "#/texts/84", - 1.0, - 329104147748297973, - 7115759532919018249, - 18446744073709551615, - 18446744073709551615, - 503, - 508, - 503, - 508, - 93, - 94, true, - "15,16", - "15,16" + "publication date", + "publication date" ], [ - "numval", - "ival", - 16602156009514813718, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/84", + "#/texts/16", 1.0, - 17767354399704235161, - 18186955703423630693, + 1265616011515792632, + 14695525506544830411, 18446744073709551615, 18446744073709551615, - 76, - 77, - 76, - 77, - 13, - 14, + 587, + 601, + 587, + 601, + 110, + 112, true, - "1", - "1" + "second problem", + "second problem" ], [ - "numval", - "ival", - 16602156009514813718, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/84", + "#/texts/16", 1.0, - 17767354399704235161, - 18186955703423585534, + 13692193161437794645, + 9956670629300352938, 18446744073709551615, 18446744073709551615, - 265, - 266, - 265, - 266, - 45, - 46, + 670, + 682, + 670, + 682, + 125, + 127, true, - "1", - "1" + "large corpus", + "large corpus" ], [ - "numval", - "ival", - 16602156009514813718, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/84", + "#/texts/16", 1.0, - 17767354399704235161, - 18186955703423582023, + 12127370583554771998, + 15404787925282309163, 18446744073709551615, 18446744073709551615, - 372, - 373, - 372, - 373, - 69, - 70, + 724, + 749, + 724, + 749, + 136, + 139, true, - "1", - "1" + "corpus processing service", + "corpus processing service" ], [ - "numval", - "ival", - 16602156009514813718, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/84", + "#/texts/16", 1.0, - 17767354399704235161, - 18186955703423684934, + 11554018427294801296, + 11153106677011630080, 18446744073709551615, 18446744073709551615, - 681, - 682, - 681, - 682, - 125, - 126, + 819, + 834, + 819, + 834, + 156, + 158, true, - "1", - "1" + "knowledge bases", + "knowledge bases" ], [ - "numval", - "ival", - 16602156009514813718, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/84", + "#/texts/16", 1.0, - 17767354399704235162, - 18186955704176922280, + 11674492288517932322, + 14336676760781935976, 18446744073709551615, 18446744073709551615, - 776, - 777, - 776, - 777, - 151, - 152, + 860, + 871, + 860, + 871, + 164, + 166, true, - "2", - "2" + "JSON corpus", + "JSON corpus" ], [ - "numval", - "fval", - 15385417954505503552, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/86", + "#/texts/16", 1.0, - 12178341415896435199, - 16109275631913765862, + 15817971959542800432, + 2504485662923913654, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 900, + 932, + 900, + 932, + 171, + 175, true, - "3.2", - "3.2" + "in-memory knowledge graph stores", + "in-memory knowledge graph stores" ], [ - "numval", - "ival", - 10815650641518265876, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/87", + "#/texts/16", 1.0, - 15441160910541481861, - 93251422791520216, + 7689378915680143128, + 16879986750908948676, 18446744073709551615, 18446744073709551615, - 1214, - 1216, - 1214, - 1216, - 212, - 213, + 958, + 975, + 958, + 975, + 183, + 185, true, - "17", - "17" + "natural extension", + "natural extension" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/88", + "#/texts/16", 1.0, - 389609625548777262, - 8826555294676663632, + 5379557486728079330, + 6154195465114598565, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 1001, + 1016, + 1001, + 1016, + 192, + 194, true, - "2020", - "2020" + "express purpose", + "express purpose" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/88", + "#/texts/16", 1.0, - 389609625548777251, - 8826555296349648778, + 13671659409933113155, + 601608255800948931, 18446744073709551615, 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 1060, + 1081, + 1060, + 1081, + 201, + 204, true, - "2023", - "2023" + "deep data exploration", + "deep data exploration" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/88", + "#/texts/16", 1.0, - 8104408072666212335, - 13552219042525319352, + 12178341415896221596, + 13017016017066220315, 18446744073709551615, 18446744073709551615, 71, - 78, + 74, 71, - 78, - 20, - 21, + 74, + 12, + 13, true, - "10.1002", - "10.1002" + "CCS", + "CCS" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/88", + "#/texts/16", 1.0, - 389609625548868096, - 8826558551385119058, + 12178341415896221596, + 13017016017066221060, 18446744073709551615, 18446744073709551615, - 82, + 83, 86, - 82, + 83, 86, - 23, - 24, + 17, + 18, true, - "2.20", - "2.20" + "CCS", + "CCS" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/88", + "#/texts/16", 1.0, - 14654386914267794441, - 12796143052106760105, + 15441160910541487324, + 5634669655872272920, 18446744073709551615, 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 194, + 196, + 194, + 196, + 34, + 35, true, - "26895595", - "26895595" + "eg", + "eg" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/88", + "#/texts/16", 1.0, - 17767354399704235162, - 7753390158484899261, + 12178341415896289890, + 13017018516674550748, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 4, - 5, + 198, + 201, + 198, + 201, + 36, + 37, true, - "2", - "2" + "PDF", + "PDF" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/88", + "#/texts/16", 1.0, - 15441160910541481791, - 3518619573290839093, + 389609625525634674, + 11016105259556551385, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 203, + 207, + 203, + 207, + 38, + 39, true, - "23", - "23" + "Word", + "Word" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/88", + "#/texts/16", 1.0, - 15441160910541481543, - 3518617976696906498, + 16381206534547648658, + 12013628225183231558, 18446744073709551615, 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 213, + 219, + 213, + 219, + 41, + 42, true, - "08", - "08" + "Bitmap", + "Bitmap" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/88", + "#/texts/16", 1.0, - 8536069645534292969, - 16063604623463467342, + 8106398484416916345, + 12921829449712973319, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 275, + 282, + 275, + 282, + 51, + 52, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "content", + "content" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/88", + "#/texts/16", 1.0, - 594099663775968682, - 14698211805947073928, + 12178341415896221596, + 13017016017066264238, 18446744073709551615, 18446744073709551615, - 156, - 208, - 156, - 208, - 43, + 297, + 300, + 297, + 300, + 57, 58, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "CCS", + "CCS" ], [ - "link", - "doi", - 18391264192891079539, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/88", + "#/texts/16", 1.0, - 1697220653346092555, - 8458710314769009562, + 6167933651658664291, + 8382979507824387405, 18446744073709551615, 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 365, + 374, + 365, + 374, + 69, + 70, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "documents", + "documents" ], [ - "numval", - "fval", - 12004249365408683930, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/89", + "#/texts/16", 1.0, - 12178341415896427344, - 10294451467892719516, + 8106398484416916345, + 12921829449712981243, 18446744073709551615, 18446744073709551615, - 302, - 305, - 294, - 297, - 68, - 69, + 394, + 401, + 394, + 401, + 76, + 77, true, - "1.5", - "1.5" + "content", + "content" ], [ - "numval", - "ival", - 12004249365408683930, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/89", + "#/texts/16", 1.0, - 17767354399704235156, - 4257155890605923351, + 6167933651658664291, + 8382979507824381420, 18446744073709551615, 18446744073709551615, - 165, - 166, - 165, - 166, - 33, - 34, + 409, + 418, + 409, + 418, + 79, + 80, true, - "4", - "4" + "documents", + "documents" ], [ - "numval", - "ival", - 12004249365408683930, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/89", + "#/texts/16", 1.0, - 17767354399704235163, - 4257155890625182636, + 14650277098690689540, + 7552940690165303911, 18446744073709551615, 18446744073709551615, - 206, - 207, - 202, - 203, - 42, - 43, + 448, + 456, + 448, + 456, + 83, + 84, true, - "3", - "3" + "Examples", + "Examples" ], [ - "numval", - "ival", - 12004249365408683930, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/89", + "#/texts/16", 1.0, - 12178341415896310600, - 10294532231444872390, + 389609625527096807, + 11016132030823058328, 18446744073709551615, 18446744073709551615, - 257, - 260, - 253, - 256, - 55, - 56, + 482, + 486, + 482, + 486, + 90, + 91, true, - "500", - "500" + "List", + "List" ], [ - "numval", - "ival", - 12004249365408683930, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/89", + "#/texts/16", 1.0, - 15441160910541481167, - 17130124993064148148, + 16381206560620045048, + 449918251045717749, 18446744073709551615, 18446744073709551615, - 267, - 269, - 261, - 263, - 58, - 59, + 491, + 497, + 491, + 497, + 92, + 93, true, - "64", - "64" + "images", + "images" ], [ - "numval", - "ival", - 12004249365408683930, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/89", + "#/texts/16", 1.0, - 17767354399704235156, - 4257155890605968002, + 8106397824302472167, + 4127392216413536700, 18446744073709551615, 18446744073709551615, - 466, - 467, - 458, - 459, - 104, - 105, + 509, + 516, + 509, + 516, + 95, + 96, true, - "4", - "4" + "caption", + "caption" ], [ - "numval", - "ival", - 12004249365408683930, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/89", + "#/texts/16", 1.0, - 17767354399704235161, - 4257155890657921360, + 16381206562408205435, + 773218971062957925, 18446744073709551615, 18446744073709551615, 526, - 527, - 518, - 519, - 118, - 119, + 532, + 526, + 532, + 98, + 99, true, - "1", - "1" + "corpus", + "corpus" ], [ - "numval", - "ival", - 12004249365408683930, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/89", + "#/texts/16", 1.0, - 17767354399704235156, - 4257155890605956247, + 389609625633315922, + 11023769407556867718, 18446744073709551615, 18446744073709551615, - 677, - 678, - 669, - 670, - 151, - 152, + 536, + 540, + 536, + 540, + 100, + 101, true, - "4", - "4" + "list", + "list" ], [ - "numval", - "ival", - 7223381657047466215, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/90", + "#/texts/16", 1.0, - 15441160910541481862, - 6611832599487460343, + 16381206513234356680, + 1028465141050473201, 18446744073709551615, 18446744073709551615, - 111, - 113, - 111, - 113, - 17, - 18, + 545, + 551, + 545, + 551, + 102, + 103, true, - "18", - "18" + "titles", + "titles" ], [ - "numval", - "ival", - 7223381657047466215, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/90", + "#/texts/16", 1.0, - 17767354399704235162, - 16086706123952683919, + 8106398484416916345, + 12921829449712933002, 18446744073709551615, 18446744073709551615, - 435, - 436, - 435, - 436, - 86, - 87, + 640, + 647, + 640, + 647, + 119, + 120, true, - "2", - "2" + "content", + "content" ], [ - "numval", - "ival", - 7223381657047466215, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/90", + "#/texts/16", 1.0, - 15441160910541481860, - 6611832599456912896, + 6167933651658664291, + 8382979507824362037, 18446744073709551615, 18446744073709551615, - 437, - 439, - 437, - 439, - 87, - 88, + 655, + 664, + 655, + 664, + 122, + 123, true, - "16", - "16" + "documents", + "documents" ], [ - "numval", - "ival", - 7223381657047466215, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/90", + "#/texts/16", 1.0, - 15441160910541481166, - 6611754875794384515, + 8106476000253296785, + 17445845102820457377, 18446744073709551615, 18446744073709551615, - 442, - 444, - 442, - 444, - 89, - 90, + 693, + 700, + 693, + 700, + 130, + 131, true, - "65", - "65" + "problem", + "problem" ], [ - "numval", - "ival", - 7223381657047466215, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/90", + "#/texts/16", 1.0, - 12178341415896310785, - 14422419606559923738, + 12178341415896222428, + 13017016042499633244, 18446744073709551615, 18446744073709551615, - 445, - 448, - 445, - 448, - 90, - 91, + 751, + 754, + 751, + 754, + 140, + 141, true, - "536", - "536" + "CPS", + "CPS" ], [ - "numval", - "ival", - 7223381657047466215, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/90", + "#/texts/16", 1.0, - 17767354399704235156, - 16086706123708070212, + 329104161668023890, + 6446354913609043760, 18446744073709551615, 18446744073709551615, - 590, - 591, - 590, - 591, - 117, - 118, + 782, + 787, + 782, + 787, + 148, + 149, true, - "4", - "4" + "paper", + "paper" ], [ - "numval", - "ival", - 7223381657047466215, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/90", + "#/texts/16", 1.0, - 15441160910541481849, - 6611832587831823848, + 12178341415896222428, + 13017016042499629349, 18446744073709551615, 18446744073709551615, - 622, - 624, - 622, - 624, - 124, - 125, + 793, + 796, + 793, + 796, + 151, + 152, true, - "32", - "32" + "CPS", + "CPS" ], [ - "numval", - "ival", - 7223381657047466215, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/90", + "#/texts/16", 1.0, - 17767354399704235152, - 16086706131746349816, + 12178341415896253732, + 13017016418109460469, 18446744073709551615, 18446744073709551615, - 676, - 677, - 676, - 677, - 137, - 138, + 836, + 839, + 836, + 839, + 159, + 160, true, - "8", - "8" + "KBs", + "KBs" ], [ - "numval", - "ival", - 7223381657047466215, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/90", + "#/texts/16", 1.0, - 17767354399704235156, - 16086706123708539554, + 12178341415896253732, + 13017016418109469302, 18446744073709551615, 18446744073709551615, - 756, - 757, - 756, - 757, - 152, - 153, + 888, + 891, + 888, + 891, + 169, + 170, true, - "4", - "4" + "KBs", + "KBs" ], [ - "numval", - "ival", - 7223381657047466215, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/90", + "#/texts/16", 1.0, - 15441160910541481854, - 6611832586181155412, + 12178341415896222428, + 13017016042499621811, 18446744073709551615, 18446744073709551615, - 786, - 788, - 786, - 788, - 159, - 160, + 947, + 950, + 947, + 950, + 180, + 181, true, - "33", - "33" + "CPS", + "CPS" ], [ - "numval", - "ival", - 7223381657047466215, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/90", + "#/texts/16", 1.0, - 17767354399704235152, - 16086706131746369369, + 12178341415896221596, + 13017016017072955111, 18446744073709551615, 18446744073709551615, - 878, - 879, - 872, - 873, - 181, - 182, + 983, + 986, + 983, + 986, + 187, + 188, true, - "8", - "8" + "CCS", + "CCS" ], [ - "numval", - "ival", - 7223381657047466215, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/90", + "#/texts/16", 1.0, - 15441160910541481849, - 6611832587831816065, + 8106398483106473371, + 12727302002187506834, 18446744073709551615, 18446744073709551615, - 920, - 922, - 914, - 916, - 189, - 190, + 1025, + 1032, + 1025, + 1032, + 196, + 197, true, - "32", - "32" + "corpora", + "corpora" ], [ - "numval", - "ival", - 7223381657047466215, + "term", + "single-term", + 8425126282903547933, "TEXT", - "#/texts/90", + "#/texts/16", 1.0, - 17767354399704235153, - 16086706131730015547, + 6167933651658664291, + 8382979507824405632, 18446744073709551615, 18446744073709551615, - 1000, - 1001, - 994, - 995, - 208, - 209, + 1036, + 1045, + 1036, + 1045, + 198, + 199, true, - "9", - "9" + "documents", + "documents" ], [ - "numval", - "ival", - 7223381657047466215, + "expression", + "word-concatenation", + 16507313240019459642, "TEXT", - "#/texts/90", + "#/texts/17", 1.0, - 17767354399704235156, - 16086706123708163426, + 6462800775355420195, + 10482892595537672495, 18446744073709551615, 18446744073709551615, - 1012, - 1013, - 1006, - 1007, - 211, - 212, + 545, + 557, + 545, + 557, + 97, + 98, true, - "4", - "4" + "state-of-the", + "state-of-the" ], [ - "numval", - "fval", - 15132906055887224772, + "expression", + "word-concatenation", + 16507313240019459642, "TEXT", - "#/texts/91", + "#/texts/17", 1.0, - 12178341415896435196, - 16211286906118314940, + 6167817039057408775, + 307499868601910971, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 712, + 721, + 712, + 721, + 128, + 129, true, - "3.3", - "3.3" + "onthe-fly", + "onthe-fly" ], [ - "numval", - "ival", - 10350406469077463155, + "sentence", + "", + 16507313240019459642, "TEXT", - "#/texts/93", + "#/texts/17", 1.0, - 17767354399704235156, - 18395627803235450761, + 886296729426492867, + 1013289275644415593, 18446744073709551615, 18446744073709551615, - 640, - 641, - 640, - 641, - 118, - 119, + 0, + 80, + 0, + 80, + 0, + 15, true, - "4", - "4" + "The purpose of CPS is to enable deep data exploration directly on large corpora.", + "The purpose of CPS is to enable deep data exploration directly on large corpora." ], [ - "numval", - "year", - 18391264192891079539, + "sentence", + "", + 16507313240019459642, "TEXT", - "#/texts/95", + "#/texts/17", 1.0, - 389609625548777262, - 8826555294676663632, + 9541411622024920334, + 12022440871342173898, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 81, + 332, + 81, + 332, + 15, + 58, true, - "2020", - "2020" + "Here, we define deep data exploration as the capability to ingest large corpora of documents into a scalable service and detect, extract and combine facts contained in these corpora in order to make new discoveries or support critical decision making.", + "Here, we define deep data exploration as the capability to ingest large corpora of documents into a scalable service and detect, extract and combine facts contained in these corpora in order to make new discoveries or support critical decision making." ], [ - "numval", - "year", - 18391264192891079539, + "sentence", + "", + 16507313240019459642, "TEXT", - "#/texts/95", + "#/texts/17", 1.0, - 389609625548777251, - 8826555296349648778, + 17816497549868937733, + 7078888263180871234, 18446744073709551615, 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 333, + 499, + 333, + 499, + 58, + 88, true, - "2023", - "2023" + "It is key to understand that our goal of creating and querying Knowledge Graphs to enable deep data exploration goes beyond search in the spirit of rank and retrieve.", + "It is key to understand that our goal of creating and querying Knowledge Graphs to enable deep data exploration goes beyond search in the spirit of rank and retrieve." ], [ - "numval", - "fval", - 18391264192891079539, + "sentence", + "", + 16507313240019459642, "TEXT", - "#/texts/95", + "#/texts/17", 1.0, - 8104408072666212335, - 13552219042525319352, + 15573263741918160323, + 18217205935490247651, 18446744073709551615, 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 500, + 595, + 500, + 595, + 88, + 105, true, - "10.1002", - "10.1002" + "Although search is by no means trivial, many state-of-the art solutions exist for this purpose.", + "Although search is by no means trivial, many state-of-the art solutions exist for this purpose." ], [ - "numval", - "fval", - 18391264192891079539, + "sentence", + "", + 16507313240019459642, "TEXT", - "#/texts/95", + "#/texts/17", 1.0, - 389609625548868096, - 8826558551385119058, + 10802482582364267320, + 3442896351905446061, 18446744073709551615, 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 598, + 786, + 598, + 786, + 106, + 140, true, - "2.20", - "2.20" + "We argue, however, that one needs query capabilities which allow for a combination of extracted facts and a fast, onthe-fly creation of new datasets to enable actual deep data exploration.", + "We argue, however, that one needs query capabilities which allow for a combination of extracted facts and a fast, onthe-fly creation of new datasets to enable actual deep data exploration." ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "enum-term-mark-1", + 16507313240019459642, "TEXT", - "#/texts/95", + "#/texts/17", 1.0, - 14654386914267794441, - 12796143052106760105, + 4628000634792382774, + 9002491739727908585, 18446744073709551615, 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 210, + 235, + 210, + 235, + 38, + 42, true, - "26895595", - "26895595" + "extract and combine facts", + "extract and combine facts" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "enum-term-mark-2", + 16507313240019459642, "TEXT", - "#/texts/95", + "#/texts/17", 1.0, - 17767354399704235162, - 7753390158484899261, + 7713689687566008780, + 7935004201419028727, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 4, - 5, + 190, + 208, + 190, + 208, + 34, + 37, true, - "2", - "2" + "service and detect", + "service and detect" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "enum-term-mark-2", + 16507313240019459642, "TEXT", - "#/texts/95", + "#/texts/17", 1.0, - 15441160910541481791, - 3518619573290839093, + 1284196299332013932, + 863910340348721145, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 481, + 498, + 481, + 498, + 84, + 87, true, - "23", - "23" + "rank and retrieve", + "rank and retrieve" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/95", + "#/texts/17", 1.0, - 15441160910541481543, - 3518617976696906498, + 13671659409933113155, + 8539197931072578295, 18446744073709551615, 18446744073709551615, - 116, - 118, - 116, - 118, 32, - 33, + 53, + 32, + 53, + 7, + 10, true, - "08", - "08" + "deep data exploration", + "deep data exploration" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/95", + "#/texts/17", 1.0, - 8536069645534292969, - 16063604623463467342, + 11805624510445989958, + 16713650511855322410, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 66, + 79, + 66, + 79, + 12, + 14, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "large corpora", + "large corpora" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/95", + "#/texts/17", 1.0, - 594099663775968682, - 14698211805947073928, + 13671659409933113155, + 8539197931072557639, 18446744073709551615, 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 97, + 118, + 97, + 118, + 19, + 22, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "deep data exploration", + "deep data exploration" ], [ - "link", - "doi", - 18391264192891079539, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/95", + "#/texts/17", 1.0, - 1697220653346092555, - 8458710314769009562, + 11805624510445989958, + 16713650511855331654, 18446744073709551615, 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 147, + 160, + 147, + 160, + 27, + 29, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "large corpora", + "large corpora" ], [ - "numval", - "ival", - 4361549266593946746, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/96", + "#/texts/17", 1.0, - 17767354399704235153, - 1792635071361844496, + 16841521601048517221, + 10304735983782247376, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 181, + 197, + 181, + 197, + 33, + 35, true, - "9", - "9" + "scalable service", + "scalable service" ], [ - "numval", - "ival", - 4361549266593946746, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/96", + "#/texts/17", 1.0, - 15441160910541481979, - 7911155768595088752, + 5036868565433231343, + 8800882412221662115, 18446744073709551615, 18446744073709551615, - 3, - 5, - 3, - 5, - 2, - 3, + 222, + 235, + 222, + 235, + 40, + 42, true, - "15", - "15" + "combine facts", + "combine facts" ], [ - "numval", - "fval", - 9802652237802670052, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/97", + "#/texts/17", 1.0, - 12178341415896435196, - 198388536621247129, + 13137373831138315414, + 4045690283859393399, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 280, + 295, + 280, + 295, + 50, + 52, true, - "3.3", - "3.3" + "new discoveries", + "new discoveries" ], [ - "numval", - "ival", - 9802652237802670052, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/97", + "#/texts/17", 1.0, - 17767354399704235161, - 3052200858272860943, + 12137975992667888681, + 18210969434513448169, 18446744073709551615, 18446744073709551615, - 4, - 5, - 4, - 5, - 2, - 3, + 307, + 331, + 307, + 331, + 54, + 57, true, - "1", - "1" + "critical decision making", + "critical decision making" ], [ - "numval", - "ival", - 4043385013945968936, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/99", + "#/texts/17", 1.0, - 17767354399704235161, - 3863023118325513235, + 1360625753915430118, + 11684924719465067944, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 14, - 15, + 396, + 412, + 396, + 412, + 70, + 72, true, - "1", - "1" + "Knowledge Graphs", + "Knowledge Graphs" ], [ - "numval", - "ival", - 4043385013945968936, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/99", + "#/texts/17", 1.0, - 17767354399704235160, - 3863023118293440507, + 13671659409933113155, + 8539197931072602374, 18446744073709551615, 18446744073709551615, - 33, - 34, - 33, - 34, - 20, - 21, + 423, + 444, + 423, + 444, + 74, + 77, true, - "0", - "0" + "deep data exploration", + "deep data exploration" ], [ - "numval", - "ival", - 4043385013945968936, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/99", + "#/texts/17", 1.0, - 17767354399704235162, - 3863023118274566919, + 5917422969004799389, + 5864833839898817031, 18446744073709551615, 18446744073709551615, - 47, - 48, - 47, - 48, - 25, - 26, + 540, + 571, + 540, + 571, + 96, + 100, true, - "2", - "2" + "many state-of-the art solutions", + "many state-of-the art solutions" ], [ - "numval", - "ival", - 4043385013945968936, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/99", + "#/texts/17", 1.0, - 15441160910541481788, - 1525860005576289474, + 14669513449876101491, + 10132680267693553575, 18446744073709551615, 18446744073709551615, - 60, - 62, - 60, - 62, - 31, - 32, + 632, + 650, + 632, + 650, + 114, + 116, true, - "26", - "26" + "query capabilities", + "query capabilities" ], [ - "numval", - "ival", - 4043385013945968936, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/99", + "#/texts/17", 1.0, - 17767354399704235163, - 3863023118291550190, + 5101483109317469571, + 2542782085392923291, 18446744073709551615, 18446744073709551615, - 67, - 68, - 66, - 67, - 34, - 35, + 684, + 699, + 684, + 699, + 122, + 124, true, - "3", - "3" + "extracted facts", + "extracted facts" ], [ - "numval", - "fval", - 12875050310340408203, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/101", + "#/texts/17", 1.0, - 12178341415896435196, - 17738549797942293450, + 10073590939063022968, + 6598517023993843857, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 712, + 730, + 712, + 730, + 128, + 130, true, - "3.3", - "3.3" + "onthe-fly creation", + "onthe-fly creation" ], [ - "numval", - "ival", - 12875050310340408203, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/101", + "#/texts/17", 1.0, - 17767354399704235162, - 16045717610508207921, + 15983059512171872769, + 7129249928945687506, 18446744073709551615, 18446744073709551615, - 4, - 5, - 4, - 5, - 2, - 3, + 734, + 746, + 734, + 746, + 131, + 133, true, - "2", - "2" + "new datasets", + "new datasets" ], [ - "numval", - "fval", - 3785875504044487339, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/102", + "#/texts/17", 1.0, - 12178341415896435198, - 9356552374251491539, + 4236705882725154612, + 17202157709512677971, 18446744073709551615, 18446744073709551615, - 102, - 105, - 102, - 105, - 20, - 21, + 757, + 785, + 757, + 785, + 135, + 139, true, - "3.1", - "3.1" + "actual deep data exploration", + "actual deep data exploration" ], [ - "numval", - "ival", - 12105626155924658285, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/103", + "#/texts/17", 1.0, - 17767354399704235162, - 9989301225055039953, + 8156062656453467049, + 15445666889551241345, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, + 823, + 835, + 823, + 835, + 147, + 149, + true, + "further anal", + "further anal" + ], + [ + "term", + "single-term", + 16507313240019459642, + "TEXT", + "#/texts/17", + 1.0, + 8106479265948440982, + 9682744694557138097, + 18446744073709551615, + 18446744073709551615, + 4, 11, - 12, + 4, + 11, + 1, + 2, true, - "2", - "2" + "purpose", + "purpose" ], [ - "numval", - "ival", - 12105626155924658285, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/103", + "#/texts/17", 1.0, - 17767354399704235162, - 9989301225055040085, + 12178341415896222428, + 11528399152608403159, 18446744073709551615, 18446744073709551615, + 15, 18, - 19, + 15, 18, - 19, - 12, - 13, + 3, + 4, true, - "2", - "2" + "CPS", + "CPS" ], [ - "numval", - "ival", - 12105626155924658285, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/103", + "#/texts/17", 1.0, - 17767354399704235163, - 9989301225441111510, + 2873671966753113989, + 2127797010805161329, 18446744073709551615, 18446744073709551615, - 26, - 27, - 26, - 27, - 16, - 17, + 126, + 136, + 126, + 136, + 24, + 25, true, - "3", - "3" + "capability", + "capability" ], [ - "numval", - "ival", - 12105626155924658285, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/103", + "#/texts/17", 1.0, - 17767354399704235163, - 9989301225441111382, + 6167933651658664291, + 6247850582883629850, 18446744073709551615, 18446744073709551615, - 28, - 29, - 28, - 29, - 17, - 18, + 164, + 173, + 164, + 173, + 30, + 31, true, - "3", - "3" + "documents", + "documents" ], [ - "numval", - "ival", - 12105626155924658285, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/103", + "#/texts/17", 1.0, - 17767354399704235161, - 9989301227833998387, + 16381206568246497178, + 5711807947654123028, 18446744073709551615, 18446744073709551615, - 41, - 42, - 41, - 42, - 23, - 24, + 202, + 208, + 202, + 208, + 36, + 37, true, - "1", - "1" + "detect", + "detect" ], [ - "numval", - "ival", - 12105626155924658285, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/103", + "#/texts/17", 1.0, - 17767354399704235161, - 9989301227833995448, + 8106398483106473371, + 2575623431105004932, 18446744073709551615, 18446744073709551615, - 51, - 52, - 51, - 52, - 28, - 29, + 255, + 262, + 255, + 262, + 45, + 46, true, - "1", - "1" + "corpora", + "corpora" ], [ - "numval", - "ival", - 12105626155924658285, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/103", + "#/texts/17", 1.0, - 17767354399704235161, - 9989301227833985318, + 329104161571401725, + 13306796550431040446, 18446744073709551615, 18446744073709551615, - 61, - 62, - 61, - 62, - 33, - 34, + 266, + 271, + 266, + 271, + 47, + 48, true, - "1", - "1" + "order", + "order" ], [ - "numval", - "ival", - 12105626155924658285, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/103", + "#/texts/17", 1.0, - 15441160910541481862, - 7426216222773784579, + 389609625699055241, + 11175364452222306028, 18446744073709551615, 18446744073709551615, - 71, - 73, - 71, - 73, - 38, - 39, + 366, + 370, + 366, + 370, + 65, + 66, true, - "18", - "18" + "goal", + "goal" ], [ - "numval", - "ival", - 12105626155924658285, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/103", + "#/texts/17", 1.0, - 15441160910541481863, - 7426216222719391073, + 16381206577802837709, + 4141450160478429176, 18446744073709551615, 18446744073709551615, - 82, - 84, - 82, - 84, - 43, - 44, + 457, + 463, + 457, + 463, + 79, + 80, true, - "19", - "19" + "search", + "search" ], [ - "numval", - "ival", - 12105626155924658285, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/103", + "#/texts/17", 1.0, - 12178341415896413249, - 17313632338592011779, + 16381206579133443680, + 4301292735794752905, 18446744073709551615, 18446744073709551615, - 103, - 106, - 103, - 106, - 60, - 61, + 471, + 477, + 471, + 477, + 82, + 83, true, - "- 1", - "- 1" + "spirit", + "spirit" ], [ - "numval", - "ival", - 12105626155924658285, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/103", + "#/texts/17", 1.0, - 17767354399704235160, - 9989301226364846173, + 389609625632775501, + 11176946972678052671, 18446744073709551615, 18446744073709551615, - 114, - 115, - 114, - 115, - 64, - 65, + 481, + 485, + 481, + 485, + 84, + 85, true, - "0", - "0" + "rank", + "rank" ], [ - "numval", - "ival", - 12105626155924658285, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/103", + "#/texts/17", 1.0, - 17767354399704235161, - 9989301227833984724, + 14634109585341561832, + 13633295059146293502, 18446744073709551615, 18446744073709551615, - 124, - 125, - 124, - 125, - 69, - 70, + 490, + 498, + 490, + 498, + 86, + 87, true, - "1", - "1" + "retrieve", + "retrieve" ], [ - "numval", - "ival", - 12105626155924658285, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/103", + "#/texts/17", 1.0, - 17767354399704235156, - 9989301228016144908, + 16381206577802837709, + 4141450160478415117, 18446744073709551615, 18446744073709551615, - 140, - 141, - 139, - 140, - 80, - 81, + 509, + 515, + 509, + 515, + 89, + 90, true, - "4", - "4" + "search", + "search" ], [ - "numval", - "ival", - 16265612055607243129, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/104", + "#/texts/17", 1.0, - 17767354399704235162, - 13895472510679550781, + 8106479265948440982, + 9682744694557248835, 18446744073709551615, 18446744073709551615, - 194, - 195, - 192, - 193, - 55, - 56, + 587, + 594, + 587, + 594, + 103, + 104, true, - "2", - "2" + "purpose", + "purpose" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/105", + "#/texts/17", 1.0, - 389609625548777262, - 8826555294676663632, + 2989796650905950968, + 16528879162457070625, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 669, + 680, + 669, + 680, + 120, + 121, true, - "2020", - "2020" + "combination", + "combination" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 16507313240019459642, "TEXT", - "#/texts/105", + "#/texts/17", 1.0, - 389609625548777251, - 8826555296349648778, + 14650399832217777324, + 3746457268245887789, 18446744073709551615, 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 793, + 801, + 793, + 801, + 141, + 142, true, - "2023", - "2023" + "datasets", + "datasets" ], [ - "numval", - "fval", - 18391264192891079539, + "sentence", + "", + 12186698460099365002, "TEXT", - "#/texts/105", + "#/texts/20", 1.0, - 8104408072666212335, - 13552219042525319352, + 1178767304508420023, + 10033646531393255296, 18446744073709551615, 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 3, + 49, + 3, + 49, + 2, + 8, true, - "10.1002", - "10.1002" + "Definition of high temperature superconductor.", + "Definition of high temperature superconductor." ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 12186698460099365002, "TEXT", - "#/texts/105", + "#/texts/20", 1.0, - 389609625548868096, - 8826558551385119058, + 14176504946364501255, + 11569035959886188174, 18446744073709551615, 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 17, + 48, + 17, + 48, + 4, + 7, true, - "2.20", - "2.20" + "high temperature superconductor", + "high temperature superconductor" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 12186698460099365002, "TEXT", - "#/texts/105", + "#/texts/20", 1.0, - 14654386914267794441, - 12796143052106760105, + 7182187449689677233, + 18267537832000191930, 18446744073709551615, 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 3, + 13, + 3, + 13, + 2, + 3, true, - "26895595", - "26895595" + "Definition", + "Definition" ], [ "numval", - "ival", - 18391264192891079539, + "year", + 14190244699299580163, "TEXT", - "#/texts/105", + "#/texts/21", 1.0, - 17767354399704235162, - 7753390158484899261, + 389609625548777062, + 16322066304153845812, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 4, - 5, + 40, + 44, + 40, + 44, + 7, + 8, true, - "2", - "2" + "2010", + "2010" ], [ - "numval", - "ival", - 18391264192891079539, + "expression", + "wtoken-concatenation", + 14190244699299580163, "TEXT", - "#/texts/105", + "#/texts/21", 1.0, - 15441160910541481791, - 3518619573290839093, + 14650948670182226136, + 16086954502817773001, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 19, + 27, + 19, + 27, + 4, + 5, true, - "23", - "23" + "", + "" ], [ - "numval", - "ival", - 18391264192891079539, + "sentence", + "", + 14190244699299580163, "TEXT", - "#/texts/105", + "#/texts/21", 1.0, - 15441160910541481543, - 3518617976696906498, + 17114970716067793368, + 17892671305468965680, 18446744073709551615, 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 3, + 45, + 3, + 45, + 2, + 9, true, - "08", - "08" + "Publications of before year 2010.", + "Publications of before year 2010." ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 14190244699299580163, "TEXT", - "#/texts/105", + "#/texts/21", 1.0, - 8536069645534292969, - 16063604623463467342, + 7049010920607555536, + 14574446082424609224, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 3, + 15, + 3, + 15, + 2, + 3, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "Publications", + "Publications" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 14190244699299580163, "TEXT", - "#/texts/105", + "#/texts/21", 1.0, - 594099663775968682, - 14698211805947073928, + 14650948670182226136, + 16086954502817773001, 18446744073709551615, 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 19, + 27, + 19, + 27, + 4, + 5, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "", + "" ], [ - "link", - "doi", - 18391264192891079539, + "term", + "single-term", + 14190244699299580163, "TEXT", - "#/texts/105", + "#/texts/21", 1.0, - 1697220653346092555, - 8458710314769009562, + 389609625740550397, + 16512324461665891687, 18446744073709551615, 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 35, + 39, + 35, + 39, + 6, + 7, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "year", + "year" ], [ - "numval", - "fval", - 10252446451495472512, + "sentence", + "", + 1376279050886549305, "TEXT", - "#/texts/106", + "#/texts/22", 1.0, - 12178341415896435196, - 4867750156681578759, + 11828744795764754421, + 9031682916292278032, 18446744073709551615, 18446744073709551615, - 0, 3, - 0, + 29, 3, - 0, - 1, + 29, + 2, + 8, true, - "3.3", - "3.3" + "Maps of the Permian basin.", + "Maps of the Permian basin." ], [ - "numval", - "ival", - 10252446451495472512, + "term", + "single-term", + 1376279050886549305, "TEXT", - "#/texts/106", + "#/texts/22", 1.0, - 17767354399704235163, - 11397855393475351535, + 13962245658001463579, + 14601050113340142397, 18446744073709551615, 18446744073709551615, - 4, - 5, - 4, + 15, + 28, + 15, + 28, 5, - 2, - 3, + 7, true, - "3", - "3" + "Permian basin", + "Permian basin" ], [ - "numval", - "fval", - 16289627123982758705, + "term", + "single-term", + 1376279050886549305, "TEXT", - "#/texts/108", + "#/texts/22", 1.0, - 12178341415896435196, - 4375676351556568035, + 389609625541180066, + 844236868687538702, 18446744073709551615, 18446744073709551615, - 0, 3, - 0, + 7, + 3, + 7, + 2, 3, - 0, - 1, true, - "3.3", - "3.3" + "Maps", + "Maps" ], [ - "numval", - "ival", - 16289627123982758705, + "sentence", + "", + 10155628801693924200, "TEXT", - "#/texts/108", + "#/texts/23", 1.0, - 17767354399704235156, - 14141377842797647357, + 11529297519432858487, + 4227353319390710547, 18446744073709551615, 18446744073709551615, - 4, - 5, - 4, - 5, - 2, 3, + 112, + 3, + 112, + 2, + 20, true, - "4", - "4" + "Geological formations from the Miocene age with their depth, thickness, geographic location, and composition.", + "Geological formations from the Miocene age with their depth, thickness, geographic location, and composition." ], [ - "numval", - "ival", - 105697770555684555, + "term", + "enum-term-mark-2", + 10155628801693924200, "TEXT", - "#/texts/110", + "#/texts/23", 1.0, - 17767354399704235160, - 3668124634718140630, + 6404388065355556380, + 1414583348578000819, 18446744073709551615, 18446744073709551615, - 351, - 352, - 351, - 352, - 76, - 77, + 86, + 111, + 86, + 111, + 15, + 19, true, - "0", - "0" + "location, and composition", + "location, and composition" ], [ - "numval", - "ival", - 16505790528099785698, + "term", + "single-term", + 10155628801693924200, "TEXT", - "#/texts/112", + "#/texts/23", 1.0, - 17767354399704235156, - 6951916224121472658, + 11536091645160224997, + 11675986273674768837, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 3, + 24, + 3, + 24, + 2, + 4, true, - "4", - "4" + "Geological formations", + "Geological formations" ], [ - "numval", - "ival", - 14738723905055920039, + "term", + "single-term", + 10155628801693924200, "TEXT", - "#/texts/113", + "#/texts/23", 1.0, - 15441160910541481039, - 13036410263911933256, + 13913749731470667949, + 3850638292102934182, 18446744073709551615, 18446744073709551615, - 309, - 311, - 309, - 311, - 58, - 59, + 34, + 45, + 34, + 45, + 6, + 8, true, - "86", - "86" + "Miocene age", + "Miocene age" ], [ - "numval", - "ival", - 5699550326698755904, + "term", + "single-term", + 10155628801693924200, "TEXT", - "#/texts/114", + "#/texts/23", 1.0, - 17767354399704235157, - 8852681642826623127, + 10848824456461591623, + 7386525518787609810, 18446744073709551615, 18446744073709551615, - 10, - 11, - 10, - 11, - 2, - 3, + 75, + 94, + 75, + 94, + 14, + 16, true, - "5", - "5" + "geographic location", + "geographic location" ], [ - "numval", - "ival", - 11609131422778723150, + "term", + "single-term", + 10155628801693924200, "TEXT", - "#/texts/115", + "#/texts/23", 1.0, - 17767354399704235161, - 9537684729007623, + 329104162100250438, + 13610267414365582951, 18446744073709551615, 18446744073709551615, - 112, - 113, - 112, - 113, - 20, - 21, + 57, + 62, + 57, + 62, + 10, + 11, true, - "1", - "1" + "depth", + "depth" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 10155628801693924200, "TEXT", - "#/texts/118", + "#/texts/23", 1.0, - 389609625548777262, - 8826555294676663632, + 3504050857170707483, + 596059642443336109, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 64, + 73, + 64, + 73, + 12, + 13, true, - "2020", - "2020" + "thickness", + "thickness" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 10155628801693924200, "TEXT", - "#/texts/118", + "#/texts/23", 1.0, - 389609625548777251, - 8826555296349648778, + 14749101077007455096, + 6683642016798435769, 18446744073709551615, 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 100, + 111, + 100, + 111, + 18, + 19, true, - "2023", - "2023" + "composition", + "composition" ], [ - "numval", - "fval", - 18391264192891079539, + "expression", + "word-concatenation", + 9107499507097280105, "TEXT", - "#/texts/118", + "#/texts/24", 1.0, - 8104408072666212335, - 13552219042525319352, + 8106397471578324091, + 17544472309867440760, 18446744073709551615, 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 12, + 19, + 12, + 19, + 4, + 5, true, - "10.1002", - "10.1002" + "high-Tc", + "high-Tc" ], [ - "numval", - "fval", - 18391264192891079539, + "sentence", + "", + 9107499507097280105, "TEXT", - "#/texts/118", + "#/texts/24", 1.0, - 389609625548868096, - 8826558551385119058, + 1974328525313479394, + 15740602897253173811, 18446744073709551615, 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 3, + 94, + 3, + 94, + 2, + 14, true, - "2.20", - "2.20" + "List all high-Tc superconductors with their known crystallographic and material properties?", + "List all high-Tc superconductors with their known crystallographic and material properties?" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 9107499507097280105, "TEXT", - "#/texts/118", + "#/texts/24", 1.0, - 14654386914267794441, - 12796143052106760105, + 6384409931856450279, + 18006728059469725457, 18446744073709551615, 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 12, + 35, + 12, + 35, + 4, + 6, true, - "26895595", - "26895595" + "high-Tc superconductors", + "high-Tc superconductors" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 9107499507097280105, "TEXT", - "#/texts/118", + "#/texts/24", 1.0, - 17767354399704235162, - 7753390158484899261, + 3841511266640975261, + 8141116605713377189, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 4, - 5, + 74, + 93, + 74, + 93, + 11, + 13, true, - "2", - "2" + "material properties", + "material properties" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 9107499507097280105, "TEXT", - "#/texts/118", + "#/texts/24", 1.0, - 15441160910541481791, - 3518619573290839093, + 389609625527096807, + 17993706797399040827, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 3, + 7, + 3, + 7, + 2, + 3, true, - "23", - "23" + "List", + "List" ], [ - "numval", - "ival", - 18391264192891079539, + "parenthesis", + "round brackets", + 7248467870339433322, "TEXT", - "#/texts/118", + "#/texts/25", 1.0, - 15441160910541481543, - 3518617976696906498, + 12178341415896394054, + 11564909962300040492, 18446744073709551615, 18446744073709551615, - 116, - 118, - 116, - 118, + 9, + 12, + 9, + 12, + 1, + 4, + true, + "(a)", + "(a)" + ], + [ + "parenthesis", + "round brackets", + 7248467870339433322, + "TEXT", + "#/texts/25", + 1.0, + 12379829975541768606, + 13865488235026578313, + 18446744073709551615, + 18446744073709551615, + 145, + 162, + 145, + 162, + 27, 32, - 33, true, - "08", - "08" + "(ie, definitions)", + "(ie, definitions)" ], [ - "link", - "url", - 18391264192891079539, + "parenthesis", + "round brackets", + 7248467870339433322, "TEXT", - "#/texts/118", + "#/texts/25", 1.0, - 8536069645534292969, - 16063604623463467342, + 12178341415896394119, + 11564909936483728813, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 183, + 186, + 183, + 186, + 36, + 39, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "(b)", + "(b)" ], [ - "link", - "url", - 18391264192891079539, + "parenthesis", + "round brackets", + 7248467870339433322, "TEXT", - "#/texts/118", + "#/texts/25", 1.0, - 594099663775968682, - 14698211805947073928, + 12178341415896393924, + 11564909955844336362, 18446744073709551615, 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 281, + 284, + 281, + 284, + 54, + 57, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "(c)", + "(c)" ], [ - "link", - "doi", - 18391264192891079539, + "parenthesis", + "round brackets", + 7248467870339433322, "TEXT", - "#/texts/118", + "#/texts/25", 1.0, - 1697220653346092555, - 8458710314769009562, + 12178341415896393989, + 11564909965731230263, 18446744073709551615, 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 497, + 500, + 497, + 500, + 95, + 98, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "(d)", + "(d)" ], [ - "numval", - "ival", - 2144926686518491811, + "parenthesis", + "round brackets", + 7248467870339433322, "TEXT", - "#/texts/119", + "#/texts/25", 1.0, - 15441160910541481983, - 7629680595941988994, + 12178341415896394307, + 11564909949268562290, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 505, + 508, + 505, + 508, + 99, + 102, true, - "11", - "11" + "(e)", + "(e)" ], [ - "numval", - "ival", - 2144926686518491811, + "parenthesis", + "round brackets", + 7248467870339433322, "TEXT", - "#/texts/119", + "#/texts/25", 1.0, - 15441160910541481979, - 7629680596056147236, + 7548253312880200059, + 14338346281668154436, 18446744073709551615, 18446744073709551615, - 4, - 6, - 4, - 6, - 2, - 3, + 750, + 868, + 750, + 868, + 141, + 163, true, - "15", - "15" + "(eg, a table in which the rows list the formations or materials while the columns contain their respective properties)", + "(eg, a table in which the rows list the formations or materials while the columns contain their respective properties)" ], [ - "numval", - "ival", - 4030998538427149966, + "sentence", + "", + 7248467870339433322, "TEXT", - "#/texts/121", + "#/texts/25", 1.0, - 17767354399704235157, - 11518089933568466075, + 11815587436253641919, + 9694283959050279543, 18446744073709551615, 18446744073709551615, 0, - 1, + 163, 0, - 1, + 163, 0, - 1, + 33, true, - "5", - "5" + "Question (a) undoubtedly fits the classic search paradigm, since here one can expect a search engine to find a number sources with exact answers (ie, definitions).", + "Question (a) undoubtedly fits the classic search paradigm, since here one can expect a search engine to find a number sources with exact answers (ie, definitions)." ], [ - "numval", - "ival", - 10633780781731536747, + "sentence", + "", + 7248467870339433322, "TEXT", - "#/texts/123", + "#/texts/25", 1.0, - 15441160910541481863, - 7242502688177594361, + 414059460071051178, + 9537463599014151627, 18446744073709551615, 18446744073709551615, - 378, - 380, - 378, - 380, - 67, - 68, + 164, + 271, + 164, + 271, + 33, + 53, true, - "19", - "19" + "Likewise, question (b) can be easily answered through metadata based filter rules on a literature database.", + "Likewise, question (b) can be easily answered through metadata based filter rules on a literature database." ], [ - "numval", - "year", - 18391264192891079539, + "sentence", + "", + 7248467870339433322, "TEXT", - "#/texts/124", + "#/texts/25", 1.0, - 389609625548777262, - 8826555294676663632, + 4249137505398623360, + 15817638664358866975, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 272, + 486, + 272, + 486, + 53, + 94, true, - "2020", - "2020" + "Question (c) already requires some extent of domain knowledge to be encoded in a model to accurately classify the relevance of all known maps to the query, at least assuming no manual curation effort has been done.", + "Question (c) already requires some extent of domain knowledge to be encoded in a model to accurately classify the relevance of all known maps to the query, at least assuming no manual curation effort has been done." ], [ - "numval", - "year", - 18391264192891079539, + "sentence", + "", + 7248467870339433322, "TEXT", - "#/texts/124", + "#/texts/25", 1.0, - 389609625548777251, - 8826555296349648778, + 16737319616048446271, + 14108564952457254186, 18446744073709551615, 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 487, + 674, + 487, + 674, + 94, + 129, true, - "2023", - "2023" + "Questions (d) and (e) ultimately impose query capabilities which are clearly infeasible to support through manual curation, and are very unlikely to be answered in any single data source.", + "Questions (d) and (e) ultimately impose query capabilities which are clearly infeasible to support through manual curation, and are very unlikely to be answered in any single data source." ], [ - "numval", - "fval", - 18391264192891079539, + "sentence", + "", + 7248467870339433322, "TEXT", - "#/texts/124", + "#/texts/25", 1.0, - 8104408072666212335, - 13552219042525319352, + 7228458266121330253, + 7626297788632302031, 18446744073709551615, 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 675, + 869, + 675, + 869, + 129, + 164, true, - "10.1002", - "10.1002" + "These questions require the system to return a more complex data structure (eg, a table in which the rows list the formations or materials while the columns contain their respective properties).", + "These questions require the system to return a more complex data structure (eg, a table in which the rows list the formations or materials while the columns contain their respective properties)." ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "enum-term-mark-3", + 7248467870339433322, "TEXT", - "#/texts/124", + "#/texts/25", 1.0, - 389609625548868096, - 8826558551385119058, + 8705710812738155139, + 4173932638461788376, 18446744073709551615, 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 790, + 813, + 790, + 813, + 152, + 155, true, - "2.20", - "2.20" + "formations or materials", + "formations or materials" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/124", + "#/texts/25", 1.0, - 14654386914267794441, - 12796143052106760105, + 1081977986607740386, + 13369606897380109283, 18446744073709551615, 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 34, + 57, + 34, + 57, + 7, + 10, true, - "26895595", - "26895595" + "classic search paradigm", + "classic search paradigm" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/124", + "#/texts/25", 1.0, - 17767354399704235162, - 7753390158484899261, + 4504082466399500918, + 14073774627107365452, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, + 87, + 100, + 87, + 100, 17, - 4, - 5, + 19, true, - "2", - "2" + "search engine", + "search engine" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/124", + "#/texts/25", 1.0, - 15441160910541481791, - 3518619573290839093, + 12002758730476261783, + 6168539106973887837, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 111, + 125, + 111, + 125, + 22, + 24, true, - "23", - "23" + "number sources", + "number sources" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/124", + "#/texts/25", 1.0, - 15441160910541481543, - 3518617976696906498, + 16604364587013013096, + 5886022793160196344, 18446744073709551615, 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 131, + 144, + 131, + 144, + 25, + 27, true, - "08", - "08" + "exact answers", + "exact answers" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/124", + "#/texts/25", 1.0, - 8536069645534292969, - 16063604623463467342, + 11809545502212496257, + 8478007624491184080, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 233, + 245, + 233, + 245, + 46, + 48, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "filter rules", + "filter rules" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/124", + "#/texts/25", 1.0, - 594099663775968682, - 14698211805947073928, + 15951062515149504329, + 16980500711213785871, 18446744073709551615, 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 251, + 270, + 251, + 270, + 50, + 52, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "literature database", + "literature database" ], [ - "link", - "doi", - 18391264192891079539, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/124", + "#/texts/25", 1.0, - 1697220653346092555, - 8458710314769009562, + 5329435588693387761, + 15530791178865888084, 18446744073709551615, 18446744073709551615, - 67, - 87, - 67, + 317, + 333, + 317, + 333, + 62, + 64, + true, + "domain knowledge", + "domain knowledge" + ], + [ + "term", + "single-term", + 7248467870339433322, + "TEXT", + "#/texts/25", + 1.0, + 11130817838525238749, + 11909014372498478623, + 18446744073709551615, + 18446744073709551615, + 449, + 471, + 449, + 471, 87, - 18, - 25, + 90, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "manual curation effort", + "manual curation effort" ], [ - "numval", - "ival", - 1080447728722590413, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/125", + "#/texts/25", 1.0, - 15441160910541481976, - 12490743152134877753, + 14669513449876101491, + 3515293922915694043, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 527, + 545, + 527, + 545, + 104, + 106, true, - "12", - "12" + "query capabilities", + "query capabilities" ], [ - "numval", - "ival", - 4361549257087816853, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/126", + "#/texts/25", 1.0, - 15441160910541481979, - 9983816787922721487, + 3392901146434670347, + 1719180218007220136, 18446744073709551615, 18446744073709551615, - 3, - 5, - 3, - 5, - 1, - 2, + 594, + 609, + 594, + 609, + 113, + 115, true, - "15", - "15" + "manual curation", + "manual curation" ], [ - "numval", - "ival", - 12426662601736619109, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/129", + "#/texts/25", 1.0, - 17767354399704235158, - 13179516689827493860, + 3099738386292325982, + 1290161288237999950, 18446744073709551615, 18446744073709551615, - 345, - 346, - 345, - 346, - 62, - 63, + 655, + 673, + 655, + 673, + 125, + 128, true, - "6", - "6" + "single data source", + "single data source" ], [ - "numval", - "ival", - 4162783521620221579, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/130", + "#/texts/25", 1.0, - 17767354399704235161, - 16668792304570951258, + 528741001868643171, + 9976817434875577411, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 727, + 749, + 727, + 749, + 138, + 141, true, - "1", - "1" + "complex data structure", + "complex data structure" ], [ - "numval", - "ival", - 5135259059216244866, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/131", + "#/texts/25", 1.0, - 17767354399704235162, - 17330663619054335778, + 10514013392853408912, + 13055603857313609190, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 846, + 867, + 846, + 867, + 160, + 162, true, - "2", - "2" + "respective properties", + "respective properties" ], [ - "numval", - "ival", - 16998817296948099535, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/132", + "#/texts/25", 1.0, - 17767354399704235163, - 14373480556157138435, + 14650942982668217094, + 17302408507521948522, 18446744073709551615, 18446744073709551615, 0, - 1, + 8, 0, - 1, + 8, 0, 1, true, - "3", - "3" + "Question", + "Question" ], [ - "numval", - "ival", - 1205649569241141618, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/133", + "#/texts/25", 1.0, - 389609625536078676, - 4142990959296314501, + 15441160910541486545, + 12227689146572455673, 18446744073709551615, 18446744073709551615, - 88, - 92, - 88, - 92, - 18, - 19, + 146, + 148, + 146, + 148, + 28, + 29, true, - "1051", - "1051" + "ie", + "ie" ], [ - "numval", - "fval", - 12257840490666828590, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/134", + "#/texts/25", 1.0, - 389609625534532312, - 11597792617376893235, + 1536294900910083314, + 8031917503160766119, 18446744073709551615, 18446744073709551615, - 230, - 234, - 230, - 234, - 47, - 48, + 150, + 161, + 150, + 161, + 30, + 31, true, - "99.7", - "99.7" + "definitions", + "definitions" ], [ - "numval", - "fval", - 12257840490666828590, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/134", + "#/texts/25", 1.0, - 389609625534532316, - 11597792631633065669, + 14637920976934857672, + 10531673380158100191, 18446744073709551615, 18446744073709551615, - 247, - 251, - 247, - 251, - 51, - 52, + 174, + 182, + 174, + 182, + 35, + 36, true, - "99.3", - "99.3" + "question", + "question" ], [ - "numval", - "ival", - 12257840490666828590, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/134", + "#/texts/25", 1.0, - 17767354399704235161, - 10009347220024759156, + 14638347573453462708, + 13381141265012229755, 18446744073709551615, 18446744073709551615, - 10, - 11, - 10, - 11, - 3, - 4, + 218, + 226, + 218, + 226, + 44, + 45, true, - "1", - "1" + "metadata", + "metadata" ], [ - "numval", - "ival", - 12257840490666828590, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/134", + "#/texts/25", 1.0, - 389609625536078676, - 11597977105526404591, + 14650942982668217094, + 17302408507521932942, 18446744073709551615, 18446744073709551615, - 46, - 50, - 46, - 50, - 12, - 13, + 272, + 280, + 272, + 280, + 53, + 54, true, - "1051", - "1051" + "Question", + "Question" ], [ - "numval", - "ival", - 12257840490666828590, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/134", + "#/texts/25", 1.0, - 12178341415896435064, - 2035594838057841276, + 16381206569053819062, + 10460140271249301149, 18446744073709551615, 18446744073709551615, - 114, - 117, - 114, - 117, - 23, - 24, + 307, + 313, + 307, + 313, + 60, + 61, true, - "300", - "300" + "extent", + "extent" ], [ - "numval", - "ival", - 12257840490666828590, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/134", + "#/texts/25", 1.0, - 15441160910541486270, - 17171794145981856951, + 329104161610777240, + 9405584604279882191, 18446744073709551615, 18446744073709551615, - 126, - 128, - 126, - 128, - 27, - 28, + 353, + 358, + 353, + 358, + 69, + 70, true, - "46", - "46" + "model", + "model" ], [ - "numval", - "ival", - 12257840490666828590, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/134", + "#/texts/25", 1.0, - 12178341415896430817, - 2035594491968753454, + 6165970819764784401, + 80764262191222596, 18446744073709551615, 18446744073709551615, - 129, - 132, - 129, - 132, - 28, - 29, + 386, + 395, + 386, + 395, + 74, + 75, true, - "019", - "019" + "relevance", + "relevance" ], [ - "numval", - "ival", - 12257840490666828590, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/134", + "#/texts/25", 1.0, - 17767354399704235161, - 10009347220024810595, + 389609625618383420, + 8585944591956156333, 18446744073709551615, 18446744073709551615, - 360, - 361, - 360, - 361, - 72, - 73, + 409, + 413, + 409, + 413, + 78, + 79, true, - "1", - "1" + "maps", + "maps" ], [ - "numval", - "ival", - 7040847965650746591, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/135", + "#/texts/25", 1.0, - 17767354399704235162, - 9584179333675572235, + 329104158730975457, + 12415524210016868054, 18446744073709551615, 18446744073709551615, - 9, - 10, - 9, - 10, - 3, - 4, + 421, + 426, + 421, + 426, + 81, + 82, true, - "2", - "2" + "query", + "query" ], [ - "numval", - "ival", - 7040847965650746591, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/135", + "#/texts/25", 1.0, - 389609625655454200, - 9968482244883150940, + 2906549781684343771, + 1487082764881216534, 18446744073709551615, 18446744073709551615, - 396, - 400, - 396, - 400, - 81, - 82, + 487, + 496, + 487, + 496, + 94, + 95, true, - "4597", - "4597" + "Questions", + "Questions" ], [ - "numval", - "ival", - 7040847965650746591, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/135", + "#/texts/25", 1.0, - 389609625533565630, - 9993658546277119180, + 6168848426972573469, + 13229835019663629347, 18446744073709551615, 18446744073709551615, - 407, - 411, - 407, - 411, - 84, - 85, + 681, + 690, + 681, + 690, + 130, + 131, true, - "8811", - "8811" + "questions", + "questions" ], [ - "numval", - "ival", - 7040847965650746591, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/135", + "#/texts/25", 1.0, - 12178341415896307158, - 3013762769356241010, + 16381206550376895780, + 9515784264781594172, 18446744073709551615, 18446744073709551615, - 424, - 427, - 424, - 427, - 87, - 88, + 703, + 709, + 703, + 709, + 133, + 134, true, - "471", - "471" + "system", + "system" ], [ - "numval", - "ival", - 7040847965650746591, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/135", + "#/texts/25", 1.0, - 15441160910541481167, - 17325109575647682885, + 15441160910541487324, + 12227689149305300000, 18446744073709551615, 18446744073709551615, - 449, - 451, - 449, - 451, - 92, - 93, + 751, + 753, + 751, + 753, + 142, + 143, true, - "64", - "64" + "eg", + "eg" ], [ - "numval", - "ival", - 7040847965650746591, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/135", + "#/texts/25", 1.0, - 12178341415896424078, - 3013760380687037623, + 329104159216638303, + 12465464266404107462, 18446744073709551615, 18446744073709551615, - 539, - 542, - 539, - 542, - 111, - 112, + 757, + 762, + 757, + 762, + 145, + 146, true, - "130", - "130" + "table", + "table" ], [ - "numval", - "ival", - 7040847965650746591, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/135", + "#/texts/25", 1.0, - 12178341415896199548, - 3013765380002608726, + 389609625632815211, + 8556557572644543816, 18446744073709551615, 18446744073709551615, - 659, - 662, - 659, - 662, - 136, - 137, + 776, + 780, + 776, + 780, + 149, + 150, true, - "679", - "679" + "rows", + "rows" ], [ - "numval", - "ival", - 7040847965650746591, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/135", + "#/texts/25", 1.0, - 12178341415896436418, - 3013773685141369379, + 16064217528453934834, + 17904668929606079942, 18446744073709551615, 18446744073709551615, - 663, - 666, - 663, - 666, - 137, - 138, + 790, + 800, + 790, + 800, + 152, + 153, true, - "296", - "296" + "formations", + "formations" ], [ - "numval", - "ival", - 7040847965650746591, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/135", + "#/texts/25", 1.0, - 12178341415896426647, - 3013760979502921720, + 6179392753523812130, + 4193644628432114698, 18446744073709551615, 18446744073709551615, - 684, - 687, - 684, - 687, - 140, - 141, + 804, + 813, + 804, + 813, + 154, + 155, true, - "116", - "116" + "materials", + "materials" ], [ - "numval", - "ival", - 7040847965650746591, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/135", + "#/texts/25", 1.0, - 12178341415896199474, - 3013765420737865825, + 8106398484785590092, + 4216933149211892873, 18446744073709551615, 18446744073709551615, - 688, - 691, - 688, - 691, - 141, - 142, + 824, + 831, + 824, + 831, + 157, + 158, true, - "662", - "662" + "columns", + "columns" ], [ "numval", "ival", - 7927601225025519287, + 1118972765223422660, "TEXT", - "#/texts/136", + "#/texts/27", 1.0, - 17767354399704235163, - 96563067760012599, + 17767354399704235161, + 16395526852875690261, 18446744073709551615, 18446744073709551615, - 9, - 10, - 9, - 10, - 3, - 4, + 0, + 1, + 0, + 1, + 0, + 1, true, - "3", - "3" + "1", + "1" ], [ - "numval", - "year", - 18391264192891079539, + "sentence", + "", + 1118972765223422660, "TEXT", - "#/texts/137", + "#/texts/27", 1.0, - 389609625548777262, - 8826555294676663632, + 14523617476315776232, + 15368741834253149117, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, 3, + 111, + 3, + 111, + 2, + 20, true, - "2020", - "2020" + "It can answer queries by combining different data elements from different sources into a new data structure.", + "It can answer queries by combining different data elements from different sources into a new data structure." ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 1118972765223422660, "TEXT", - "#/texts/137", + "#/texts/27", 1.0, - 389609625548777251, - 8826555296349648778, + 6804442699501962146, + 17203402041390290286, 18446744073709551615, 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 38, + 61, + 38, + 61, + 8, + 11, true, - "2023", - "2023" + "different data elements", + "different data elements" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 1118972765223422660, "TEXT", - "#/texts/137", + "#/texts/27", 1.0, - 8104408072666212335, - 13552219042525319352, + 600429551108811238, + 12839428366267894769, 18446744073709551615, 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 67, + 84, + 67, + 84, + 12, + 14, true, - "10.1002", - "10.1002" + "different sources", + "different sources" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 1118972765223422660, "TEXT", - "#/texts/137", + "#/texts/27", 1.0, - 389609625548868096, - 8826558551385119058, + 12659510570308685827, + 16006829420770455202, 18446744073709551615, 18446744073709551615, - 82, - 86, - 82, - 86, - 23, + 92, + 110, + 92, + 110, + 16, + 19, + true, + "new data structure", + "new data structure" + ], + [ + "term", + "single-term", + 1118972765223422660, + "TEXT", + "#/texts/27", + 1.0, + 8106477782290185579, + 4397511644820592752, + 18446744073709551615, + 18446744073709551615, + 17, + 24, + 17, 24, + 5, + 6, true, - "2.20", - "2.20" + "queries", + "queries" ], [ "numval", "ival", - 18391264192891079539, + 324023167304456371, "TEXT", - "#/texts/137", + "#/texts/28", 1.0, - 14654386914267794441, - 12796143052106760105, + 17767354399704235162, + 964743056782930174, 18446744073709551615, 18446744073709551615, 0, - 8, + 1, 0, - 8, + 1, 0, 1, true, - "26895595", - "26895595" + "2", + "2" ], [ "numval", "ival", - 18391264192891079539, + 324023167304456371, "TEXT", - "#/texts/137", + "#/texts/28", 1.0, - 17767354399704235162, - 7753390158484899261, + 17767354399704235161, + 964743056733707724, 18446744073709551615, 18446744073709551615, 16, 17, 16, 17, - 4, 5, + 6, true, - "2", - "2" + "1", + "1" ], [ - "numval", - "ival", - 18391264192891079539, + "parenthesis", + "reference", + 324023167304456371, "TEXT", - "#/texts/137", + "#/texts/28", 1.0, - 15441160910541481791, - 3518619573290839093, + 12178341415896395122, + 294993208777838466, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 15, + 18, + 15, + 18, + 4, + 7, true, - "23", - "23" + "(1)", + "(1)" ], [ - "numval", - "ival", - 18391264192891079539, + "sentence", + "", + 324023167304456371, "TEXT", - "#/texts/137", + "#/texts/28", 1.0, - 15441160910541481543, - 3518617976696906498, + 5902071177970408282, + 15210357510825208849, 18446744073709551615, 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 3, + 117, + 3, + 117, + 2, + 24, true, - "08", - "08" + "It supports (1) by creating a knowledge model from a controlled, unstructured corpus in a mostly unsupervised way.", + "It supports (1) by creating a knowledge model from a controlled, unstructured corpus in a mostly unsupervised way." ], [ - "link", - "url", - 18391264192891079539, + "sentence", + "", + 324023167304456371, "TEXT", - "#/texts/137", + "#/texts/28", 1.0, - 8536069645534292969, - 16063604623463467342, + 18302146700320907131, + 18137989444182809109, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 118, + 180, + 118, + 180, + 24, + 37, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "It may profit from, but not require any manually curated data.", + "It may profit from, but not require any manually curated data." ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 324023167304456371, "TEXT", - "#/texts/137", + "#/texts/28", 1.0, - 594099663775968682, - 14698211805947073928, + 11554018429271009560, + 14601176146139573086, 18446744073709551615, 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 33, + 48, + 33, + 48, + 10, + 12, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "knowledge model", + "knowledge model" ], [ - "link", - "doi", - 18391264192891079539, + "term", + "single-term", + 324023167304456371, "TEXT", - "#/texts/137", + "#/texts/28", 1.0, - 1697220653346092555, - 8458710314769009562, + 332950716206334614, + 14329605590209969858, 18446744073709551615, 18446744073709551615, - 67, + 68, 87, - 67, + 68, 87, + 16, 18, - 25, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "unstructured corpus", + "unstructured corpus" ], [ - "numval", - "ival", - 1080447728722590402, + "term", + "single-term", + 324023167304456371, "TEXT", - "#/texts/138", + "#/texts/28", 1.0, - 15441160910541481977, - 12490742773547210041, + 11553735866943991178, + 16918357182749210763, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 100, + 116, + 100, + 116, + 21, + 23, true, - "13", - "13" + "unsupervised way", + "unsupervised way" ], [ - "numval", - "ival", - 4361549257087816853, + "term", + "single-term", + 324023167304456371, "TEXT", - "#/texts/139", + "#/texts/28", 1.0, - 15441160910541481979, - 9983816787922721487, + 389609625696431489, + 17273072847800545799, 18446744073709551615, 18446744073709551615, - 3, - 5, - 3, - 5, - 1, - 2, + 175, + 179, + 175, + 179, + 35, + 36, true, - "15", - "15" + "data", + "data" ], [ "numval", "ival", - 8207961846673301043, + 4651508276868765576, "TEXT", - "#/texts/140", + "#/texts/29", 1.0, - 17767354399704235159, - 15458436803011088578, + 17767354399704235163, + 12716136939749916250, 18446744073709551615, 18446744073709551615, - 23, - 24, - 23, - 24, - 4, - 5, + 0, + 1, + 0, + 1, + 0, + 1, true, - "7", - "7" + "3", + "3" ], [ - "numval", - "fval", - 11998199584890640594, + "parenthesis", + "round brackets", + 4651508276868765576, "TEXT", - "#/texts/141", + "#/texts/29", 1.0, - 14652250303396477617, - 6263954298368962822, + 2722729733807857233, + 7156973764189890273, 18446744073709551615, 18446744073709551615, - 457, - 465, - 457, - 465, - 94, - 95, + 58, + 81, + 58, + 81, + 11, + 18, true, - "0.75-0.9", - "0.75-0.9" + "(eg, a technical field)", + "(eg, a technical field)" ], [ - "numval", - "fval", - 11998199584890640594, + "sentence", + "", + 4651508276868765576, "TEXT", - "#/texts/141", + "#/texts/29", 1.0, - 389609625535995626, - 11162238664629223042, + 5416144356132738083, + 6452265117056514365, 18446744073709551615, 18446744073709551615, - 631, - 635, - 629, - 633, - 132, - 133, + 3, + 82, + 3, + 82, + 2, + 19, true, - "0.97", - "0.97" + "It may restrict supported queries to a specific domain (eg, a technical field).", + "It may restrict supported queries to a specific domain (eg, a technical field)." ], [ - "numval", - "ival", - 11998199584890640594, + "term", + "single-term", + 4651508276868765576, "TEXT", - "#/texts/141", + "#/texts/29", 1.0, - 17767354399704235161, - 17845175019612967856, + 2648546400259159503, + 9428685315310219813, 18446744073709551615, 18446744073709551615, - 264, - 265, - 264, - 265, - 49, - 50, + 42, + 57, + 42, + 57, + 9, + 11, true, - "1", - "1" + "specific domain", + "specific domain" ], [ - "numval", - "ival", - 11998199584890640594, + "term", + "single-term", + 4651508276868765576, "TEXT", - "#/texts/141", + "#/texts/29", 1.0, - 15441160910541482672, - 15292900460193668121, + 6630151693041027733, + 5469669854747800448, 18446744073709551615, 18446744073709551615, - 282, - 284, - 282, - 284, - 55, - 56, + 65, + 80, + 65, + 80, + 15, + 17, true, - "-1", - "-1" + "technical field", + "technical field" ], [ - "numval", - "ival", - 11998199584890640594, + "term", + "single-term", + 4651508276868765576, "TEXT", - "#/texts/141", + "#/texts/29", 1.0, - 15441160910541482673, - 15292900459317583926, + 8106477782290185579, + 4528294841171204155, 18446744073709551615, 18446744073709551615, - 289, - 291, - 289, - 291, - 58, - 59, + 29, + 36, + 29, + 36, + 6, + 7, true, - "-2", - "-2" + "queries", + "queries" ], [ - "numval", - "ival", - 11998199584890640594, + "term", + "single-term", + 4651508276868765576, "TEXT", - "#/texts/141", + "#/texts/29", 1.0, - 15441160910541482674, - 15292900461018240016, + 15441160910541487324, + 14197444882771576140, 18446744073709551615, 18446744073709551615, - 296, - 298, - 296, - 298, + 59, 61, - 62, + 59, + 61, + 12, + 13, true, - "-3", - "-3" + "eg", + "eg" ], [ "numval", "ival", - 11998199584890640594, + 3052020526349962744, "TEXT", - "#/texts/141", + "#/texts/30", 1.0, - 15441160910541482676, - 15292900461174373895, + 17767354399704235162, + 4099649421554807498, 18446744073709551615, 18446744073709551615, - 307, - 309, - 307, - 309, - 65, - 66, + 498, + 499, + 498, + 499, + 79, + 80, true, - "-5", - "-5" + "2", + "2" ], [ - "numval", - "ival", - 11998199584890640594, + "parenthesis", + "round brackets", + 3052020526349962744, "TEXT", - "#/texts/141", + "#/texts/30", 1.0, - 15441160910541482672, - 15292900460193644573, + 329104053347765356, + 2109302919745639425, 18446744073709551615, 18446744073709551615, - 426, - 428, - 426, - 428, - 87, - 88, + 309, + 314, + 309, + 314, + 45, + 48, true, - "-1", - "-1" + "(NLU)", + "(NLU)" ], [ - "numval", - "ival", - 11998199584890640594, + "expression", + "word-concatenation", + 3052020526349962744, "TEXT", - "#/texts/141", + "#/texts/30", 1.0, - 17767354399704235163, - 17845175019597634812, + 5044385734724420019, + 8851830242204350949, 18446744073709551615, 18446744073709551615, - 484, - 485, - 484, - 485, - 99, - 100, + 244, + 260, + 244, + 260, + 39, + 40, true, - "3", - "3" + "state-of-the-art", + "state-of-the-art" ], [ - "numval", - "ival", - 11998199584890640594, + "sentence", + "", + 3052020526349962744, "TEXT", - "#/texts/141", + "#/texts/30", 1.0, - 17767354399704235156, - 17845175019331480896, + 17629874561869362054, + 1364582601176274676, 18446744073709551615, 18446744073709551615, - 489, - 490, - 489, - 490, - 101, - 102, + 0, + 103, + 0, + 103, + 0, + 16, true, - "4", - "4" + "To meet the objectives defined earlier, CPS implements and tightly integrates two essential components.", + "To meet the objectives defined earlier, CPS implements and tightly integrates two essential components." ], [ - "numval", - "ival", - 11998199584890640594, + "sentence", + "", + 3052020526349962744, "TEXT", - "#/texts/141", + "#/texts/30", 1.0, - 15441160910541482676, - 15292900461174286862, + 3384851794162116958, + 6847462864995440661, 18446744073709551615, 18446744073709551615, - 601, - 603, - 601, - 603, - 125, - 126, + 104, + 371, + 104, + 371, + 16, + 57, true, - "-5", - "-5" + "The first component is a scalable Knowledge Graph creation pipeline, which is used to automatically process text, tables and images through state-of-the-art segmentation and natural language understanding (NLU) models and extract entities and relationships from them.", + "The first component is a scalable Knowledge Graph creation pipeline, which is used to automatically process text, tables and images through state-of-the-art segmentation and natural language understanding (NLU) models and extract entities and relationships from them." ], [ - "numval", - "ival", - 16446129547721407877, + "sentence", + "", + 3052020526349962744, "TEXT", - "#/texts/142", + "#/texts/30", 1.0, - 17767354399704235158, - 11362596522813034737, + 7829367821776224855, + 12429790798463300743, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 372, + 497, + 372, + 497, + 57, + 79, true, - "6", - "6" + "The second component serves the created KG, enabling users to perform deep queries and advanced graph analytics in real time.", + "The second component serves the created KG, enabling users to perform deep queries and advanced graph analytics in real time." ], [ - "numval", - "ival", - 6720443978031524294, + "sentence", + "", + 3052020526349962744, "TEXT", - "#/texts/143", + "#/texts/30", 1.0, - 17767354399704235161, - 16606870843966802051, + 854837096675760532, + 7651946466094830673, 18446744073709551615, 18446744073709551615, - 521, - 522, - 521, - 522, - 82, - 83, + 500, + 647, + 500, + 647, + 80, + 102, true, - "1", - "1" + "This is supported through an underlying, highly optimized graph engine we developed to specifically address requirements for deep data exploration.", + "This is supported through an underlying, highly optimized graph engine we developed to specifically address requirements for deep data exploration." ], [ - "numval", - "ival", - 6720443978031524294, + "term", + "enum-term-mark-3", + 3052020526349962744, "TEXT", - "#/texts/143", + "#/texts/30", 1.0, - 17767354399704235162, - 16606870838110795262, + 16462824725023446153, + 13126993570789821262, 18446744073709551615, 18446744073709551615, - 579, - 580, - 579, - 580, - 95, - 96, + 218, + 235, + 218, + 235, + 35, + 38, true, - "2", - "2" + "tables and images", + "tables and images" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "enum-term-mark-3", + 3052020526349962744, "TEXT", - "#/texts/144", + "#/texts/30", 1.0, - 389609625548777262, - 8826555294676663632, + 13335488353876392384, + 2597733537392511997, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 334, + 360, + 334, + 360, + 51, + 54, true, - "2020", - "2020" + "entities and relationships", + "entities and relationships" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/144", + "#/texts/30", 1.0, - 389609625548777251, - 8826555296349648778, + 7885245284142706193, + 4158653228934455880, 18446744073709551615, 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 40, + 54, + 40, + 54, + 7, + 9, true, - "2023", - "2023" + "CPS implements", + "CPS implements" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/144", + "#/texts/30", 1.0, - 8104408072666212335, - 13552219042525319352, + 1520105468889282504, + 17108999815917583587, 18446744073709551615, 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 82, + 102, + 82, + 102, + 13, + 15, true, - "10.1002", - "10.1002" + "essential components", + "essential components" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/144", + "#/texts/30", 1.0, - 389609625548868096, - 8826558551385119058, + 3741141293805179509, + 17420802040208319620, 18446744073709551615, 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 108, + 123, + 108, + 123, + 17, + 19, true, - "2.20", - "2.20" + "first component", + "first component" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/144", + "#/texts/30", 1.0, - 14654386914267794441, - 12796143052106760105, + 2704211529742541242, + 4905940686306094827, 18446744073709551615, 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 129, + 171, + 129, + 171, + 21, + 26, true, - "26895595", - "26895595" + "scalable Knowledge Graph creation pipeline", + "scalable Knowledge Graph creation pipeline" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/144", + "#/texts/30", 1.0, - 17767354399704235162, - 7753390158484899261, + 5129835390832145091, + 6616158247763758569, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 4, - 5, + 244, + 273, + 244, + 273, + 39, + 41, true, - "2", - "2" + "state-of-the-art segmentation", + "state-of-the-art segmentation" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/144", + "#/texts/30", 1.0, - 15441160910541481791, - 3518619573290839093, + 3070945404202872591, + 10669708669164076111, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 278, + 294, + 278, + 294, + 42, + 44, true, - "23", - "23" + "natural language", + "natural language" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/144", + "#/texts/30", 1.0, - 15441160910541481543, - 3518617976696906498, + 8746927308312045639, + 6749934577204905868, 18446744073709551615, 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 326, + 342, + 326, + 342, + 50, + 52, true, - "08", - "08" + "extract entities", + "extract entities" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/144", + "#/texts/30", 1.0, - 8536069645534292969, - 16063604623463467342, + 864107477833444286, + 4664743449195732093, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 376, + 392, + 376, + 392, + 58, + 60, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "second component", + "second component" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/144", + "#/texts/30", 1.0, - 594099663775968682, - 14698211805947073928, + 7076268937724050913, + 2952839948443340364, 18446744073709551615, 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 442, + 454, + 442, + 454, + 69, + 71, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "deep queries", + "deep queries" ], [ - "link", - "doi", - 18391264192891079539, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/144", + "#/texts/30", 1.0, - 1697220653346092555, - 8458710314769009562, + 1325639643510008878, + 14062961483119642395, 18446744073709551615, 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 459, + 483, + 459, + 483, + 72, + 75, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "advanced graph analytics", + "advanced graph analytics" ], [ - "numval", - "ival", - 2144926730621142072, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/145", + "#/texts/30", 1.0, - 15441160910541481978, - 18064563043183731132, + 6165973182635301010, + 2523980927240404445, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 487, + 496, + 487, + 496, + 76, + 78, true, - "14", - "14" + "real time", + "real time" ], [ - "numval", - "ival", - 2144926730621142072, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/145", + "#/texts/30", 1.0, - 15441160910541481979, - 18064563042796865823, + 13015591071425028695, + 16405170400950667634, 18446744073709551615, 18446744073709551615, - 4, - 6, - 4, - 6, - 2, - 3, + 548, + 570, + 548, + 570, + 88, + 91, true, - "15", - "15" + "optimized graph engine", + "optimized graph engine" ], [ - "numval", - "ival", - 14222671032550229818, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/146", + "#/texts/30", 1.0, - 17767354399704235163, - 2699991593779864855, + 13671659409933113155, + 3777936483828020599, 18446744073709551615, 18446744073709551615, - 24, - 25, - 24, - 25, - 6, - 7, + 625, + 646, + 625, + 646, + 98, + 101, true, - "3", - "3" + "deep data exploration", + "deep data exploration" ], [ - "numval", - "irng", - 3523281823889115814, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/152", + "#/texts/30", 1.0, - 10302035827600178331, - 6710097973531677104, + 15868223159689591859, + 16370341749731323775, 18446744073709551615, 18446744073709551615, - 36, - 45, - 36, - 45, - 14, - 15, + 12, + 22, + 12, + 22, + 3, + 4, true, - "0000-0002", - "0000-0002" + "objectives", + "objectives" ], [ - "numval", - "irng", - 3523281823889115814, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/152", + "#/texts/30", 1.0, - 6624857390961351666, - 3541555616013892515, + 389609625631325904, + 9256317306341982494, 18446744073709551615, 18446744073709551615, - 46, - 55, - 46, - 55, - 16, - 17, + 212, + 216, + 212, + 216, + 33, + 34, true, - "8088-0823", - "8088-0823" + "text", + "text" ], [ - "numval", - "irng", - 3523281823889115814, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/152", + "#/texts/30", 1.0, - 10302035827600178332, - 6710097973532471075, + 16381206513098478539, + 15214895821358181557, 18446744073709551615, 18446744073709551615, - 88, - 97, - 88, - 97, - 27, - 28, + 218, + 224, + 218, + 224, + 35, + 36, true, - "0000-0001", - "0000-0001" + "tables", + "tables" ], [ - "numval", - "irng", - 3523281823889115814, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/152", + "#/texts/30", 1.0, - 6560223242063427106, - 13609528576140932418, + 16381206560620045048, + 15781754046980462859, 18446744073709551615, 18446744073709551615, - 98, - 107, - 98, - 107, - 29, - 30, + 229, + 235, + 229, + 235, + 37, + 38, true, - "7216-8505", - "7216-8505" + "images", + "images" ], [ - "numval", - "irng", - 3523281823889115814, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/152", + "#/texts/30", 1.0, - 10302035827600178332, - 6710097973532498930, + 12178341415896299941, + 15271888912268631188, 18446744073709551615, 18446744073709551615, - 141, - 150, - 141, - 150, - 40, - 41, + 310, + 313, + 310, + 313, + 46, + 47, true, - "0000-0001", - "0000-0001" + "NLU", + "NLU" ], [ - "numval", - "irng", - 3523281823889115814, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/152", + "#/texts/30", 1.0, - 6573923715856392023, - 13497670743408223376, + 16381206567230470443, + 13599704792953880118, 18446744073709551615, 18446744073709551615, - 151, - 160, - 151, - 160, - 42, - 43, + 315, + 321, + 315, + 321, + 48, + 49, true, - "5761-0422", - "5761-0422" + "models", + "models" ], [ - "link", - "url", - 3523281823889115814, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/152", + "#/texts/30", 1.0, - 7086030415698247677, - 10516035679311822965, + 8279380567349713241, + 11473550069815089395, 18446744073709551615, 18446744073709551615, - 18, - 55, - 18, - 55, - 6, - 17, + 347, + 360, + 347, + 360, + 53, + 54, true, - "https://orcid.org/0000-0002-8088-0823", - "https://orcid.org/0000-0002-8088-0823" + "relationships", + "relationships" ], [ - "link", - "url", - 3523281823889115814, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/152", + "#/texts/30", 1.0, - 2033258390552333901, - 14596379607593903375, + 15441160910541480204, + 11436442094831901011, 18446744073709551615, 18446744073709551615, - 70, - 107, - 70, - 107, - 19, - 30, + 412, + 414, + 412, + 414, + 63, + 64, true, - "https://orcid.org/0000-0001-7216-8505", - "https://orcid.org/0000-0001-7216-8505" + "KG", + "KG" ], [ - "link", - "url", - 3523281823889115814, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/152", + "#/texts/30", 1.0, - 2031879929749239141, - 13323569836539834175, + 329104159157820437, + 6168600621351033593, 18446744073709551615, 18446744073709551615, - 123, - 160, - 123, - 160, - 32, - 43, + 425, + 430, + 425, + 430, + 66, + 67, true, - "https://orcid.org/0000-0001-5761-0422", - "https://orcid.org/0000-0001-5761-0422" + "users", + "users" ], [ - "link", - "url", - 7813503946963688644, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/154", + "#/texts/30", 1.0, - 3527101060180289873, - 4288347075719597580, + 13240311013633905449, + 13899855425573318778, 18446744073709551615, 18446744073709551615, - 30, - 52, - 30, - 52, - 6, - 15, + 608, + 620, + 608, + 620, + 96, + 97, true, - "https://www.elastic.co", - "https://www.elastic.co" + "requirements", + "requirements" ], [ - "link", - "url", - 7813503946963688644, + "numval", + "ival", + 6725501529910185390, "TEXT", - "#/texts/154", + "#/texts/31", 1.0, - 7381438071617048818, - 3762754436696500331, + 17767354399704235163, + 14253331712813347451, 18446744073709551615, 18446744073709551615, - 72, - 97, - 72, - 97, - 19, - 28, + 171, + 172, + 171, + 172, + 26, + 27, true, - "https://lucene.apache.org", - "https://lucene.apache.org" + "3", + "3" ], [ - "link", - "url", - 7813503946963688644, + "numval", + "ival", + 6725501529910185390, "TEXT", - "#/texts/154", + "#/texts/31", 1.0, - 7699234159584878934, - 8720273332387288393, + 17767354399704235156, + 14253331712656803661, 18446744073709551615, 18446744073709551615, - 38, - 52, - 38, - 52, - 10, - 15, + 201, + 202, + 201, + 202, + 33, + 34, true, - "www.elastic.co", - "www.elastic.co" + "4", + "4" ], [ - "link", - "url", - 1997735398126013155, + "expression", + "word-concatenation", + 6725501529910185390, "TEXT", - "#/texts/156", + "#/texts/31", 1.0, - 11080755855567888942, - 12138756017738546093, + 15984801488078789848, + 14766777380716059078, 18446744073709551615, 18446744073709551615, - 4, - 24, - 2, + 130, + 140, + 130, + 140, + 21, 22, - 1, - 10, true, - "https://www.nltk.org", - "https://www.nltk.org" + "real-world", + "real-world" ], [ - "link", - "url", - 1997735398126013155, + "sentence", + "", + 6725501529910185390, "TEXT", - "#/texts/156", + "#/texts/31", 1.0, - 7030452472279930374, - 3139262024232962844, + 6831551111511447609, + 13413788254932797194, 18446744073709551615, 18446744073709551615, - 12, - 24, - 10, - 22, - 5, - 10, + 0, + 200, + 0, + 200, + 0, + 33, true, - "www.nltk.org", - "www.nltk.org" + "It is worth noting that the CPS platform is a fully functioning cloud application that has been successfully deployed in multiple real-world scenarios in material science 3 and oil and gas industries.", + "It is worth noting that the CPS platform is a fully functioning cloud application that has been successfully deployed in multiple real-world scenarios in material science 3 and oil and gas industries." ], [ - "numval", - "ival", - 4925537010788978399, + "term", + "enum-term-mark-2", + 6725501529910185390, "TEXT", - "#/texts/158", + "#/texts/31", 1.0, - 17767354399704235161, - 13902073100028876379, + 9418848057117014737, + 18338967318945171834, 18446744073709551615, 18446744073709551615, - 148, - 149, - 147, - 148, - 29, - 30, + 177, + 188, + 177, + 188, + 28, + 31, true, - "1", - "1" + "oil and gas", + "oil and gas" ], [ - "numval", - "ival", - 16552665876195410077, + "term", + "single-term", + 6725501529910185390, "TEXT", - "#/texts/159", + "#/texts/31", 1.0, - 17767354399704235156, - 1305421191768306174, + 12779036928191531604, + 16622894821397688807, 18446744073709551615, 18446744073709551615, - 18, - 19, - 18, - 19, - 4, - 5, + 28, + 40, + 28, + 40, + 6, + 8, true, - "4", - "4" + "CPS platform", + "CPS platform" ], [ - "numval", - "year", - 17579390613842440572, + "term", + "single-term", + 6725501529910185390, "TEXT", - "#/texts/160", + "#/texts/31", 1.0, - 389609625548777059, - 14748978429801291102, + 7724009801520989273, + 7797950652455693225, 18446744073709551615, 18446744073709551615, - 178, - 182, - 174, - 178, - 52, - 53, - true, - "2015", - "2015" - ], - [ - "numval", - "ival", - 17579390613842440572, - "TEXT", - "#/texts/160", - 1.0, - 17767354399704235163, - 14663762662264921246, - 18446744073709551615, - 18446744073709551615, - 73, - 74, - 69, - 70, - 15, - 16, + 64, + 81, + 64, + 81, + 12, + 14, true, - "3", - "3" + "cloud application", + "cloud application" ], [ - "numval", - "ival", - 17579390613842440572, + "term", + "single-term", + 6725501529910185390, "TEXT", - "#/texts/160", + "#/texts/31", 1.0, - 17767354399704235156, - 14663762663007797994, + 16998720417278708113, + 16159794011975202711, 18446744073709551615, 18446744073709551615, - 136, - 137, - 132, - 133, - 35, - 36, + 121, + 150, + 121, + 150, + 20, + 23, true, - "4", - "4" + "multiple real-world scenarios", + "multiple real-world scenarios" ], [ - "numval", - "ival", - 17579390613842440572, + "term", + "single-term", + 6725501529910185390, "TEXT", - "#/texts/160", + "#/texts/31", 1.0, - 15441160910541481913, - 12659057306413090614, + 10788814978233814896, + 9709242714425521456, 18446744073709551615, 18446744073709551615, - 183, - 185, - 179, - 181, - 54, - 55, + 154, + 170, + 154, + 170, + 24, + 26, true, - "02", - "02" + "material science", + "material science" ], [ - "numval", - "ival", - 17579390613842440572, + "term", + "single-term", + 6725501529910185390, "TEXT", - "#/texts/160", + "#/texts/31", 1.0, - 17767354399704235156, - 14663762663007808920, + 9846194482272547581, + 8008602840197678050, 18446744073709551615, 18446744073709551615, - 189, - 190, 185, - 186, - 57, - 58, + 199, + 185, + 199, + 30, + 32, true, - "4", - "4" + "gas industries", + "gas industries" ], [ - "link", - "url", - 17579390613842440572, + "term", + "single-term", + 6725501529910185390, "TEXT", - "#/texts/160", + "#/texts/31", 1.0, - 3438649888016089446, - 14315872303660489441, + 12178341415895623363, + 8134859084711314461, 18446744073709551615, 18446744073709551615, - 65, - 127, - 61, - 123, - 10, - 32, + 177, + 180, + 177, + 180, + 28, + 29, true, - "http://s3.thinkaurelius.com/docs/titan/current/data-model.html", - "http://s3.thinkaurelius.com/docs/titan/current/data-model.html" + "oil", + "oil" ], [ - "link", - "url", - 17579390613842440572, + "numval", + "ival", + 14814111183601762276, "TEXT", - "#/texts/160", + "#/texts/32", 1.0, - 9361941850829391161, - 1324878578738734655, + 17767354399704235162, + 3186926300182333312, 18446744073709551615, 18446744073709551615, - 140, - 209, - 136, - 205, - 38, - 63, + 152, + 153, + 152, + 153, + 28, + 29, true, - "http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html", - "http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html" + "2", + "2" ], [ "numval", "ival", - 722212543953276862, + 14814111183601762276, "TEXT", - "#/texts/161", + "#/texts/32", 1.0, - 17767354399704235156, - 17688058591094674309, + 17767354399704235163, + 3186926300062863412, 18446744073709551615, 18446744073709551615, - 19, - 20, - 15, - 16, - 4, - 5, + 251, + 252, + 251, + 252, + 48, + 49, true, - "4", - "4" + "3", + "3" ], [ - "link", - "url", - 722212543953276862, + "sentence", + "", + 14814111183601762276, "TEXT", - "#/texts/161", + "#/texts/32", 1.0, - 12568677210829628871, - 1680746501251640588, + 10957561452305435035, + 14135301223768703134, 18446744073709551615, 18446744073709551615, - 105, - 139, - 101, - 135, - 21, - 35, + 0, + 140, + 0, + 140, + 0, + 26, true, - "https://db-engines.com/en/ranking_", - "https://db-engines.com/en/ranking_" + "In the remainder of this paper, we discuss in detail the technical aspects and implementation details of the two main components of the CPS.", + "In the remainder of this paper, we discuss in detail the technical aspects and implementation details of the two main components of the CPS." ], [ - "numval", - "ival", - 11085577343317113173, + "sentence", + "", + 14814111183601762276, "TEXT", - "#/texts/162", + "#/texts/32", 1.0, - 12178341415896310600, - 9970685264370540412, + 13779837797648362784, + 3604173677770761086, 18446744073709551615, 18446744073709551615, - 17, - 20, - 15, - 18, - 6, - 7, + 141, + 239, + 141, + 239, + 26, + 46, true, - "500", - "500" + "In section 2, we present in depth how the platform extracts facts from corpora at a massive scale.", + "In section 2, we present in depth how the platform extracts facts from corpora at a massive scale." ], [ - "link", - "url", - 11085577343317113173, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/162", + "#/texts/32", 1.0, - 1244385257359010144, - 3127203609822040452, + 11289641655891678136, + 5471089380666220349, 18446744073709551615, 18446744073709551615, - 5, - 25, - 3, - 23, - 1, - 10, + 57, + 74, + 57, + 74, + 12, + 14, true, - "http://graph500.org/", - "http://graph500.org/" + "technical aspects", + "technical aspects" ], [ - "reference", - "url", - 1792096630133661292, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/163", + "#/texts/32", 1.0, - 16747146533825186967, - 2165348395015827092, + 1138422908050553065, + 14505894906473969976, 18446744073709551615, 18446744073709551615, - 0, - 54, - 0, - 52, - 0, - 18, + 79, + 101, + 79, + 101, + 15, + 17, true, - "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html", - "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html" + "implementation details", + "implementation details" ], [ - "reference", - "url", - 16611805225457383637, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/165", + "#/texts/32", 1.0, - 4512570954370983408, - 11763158631698282386, + 3812062755894317903, + 11765129466643902740, 18446744073709551615, 18446744073709551615, - 0, - 75, - 0, - 69, - 0, - 23, + 113, + 128, + 113, + 128, + 20, + 22, true, - "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", - "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/" + "main components", + "main components" ], [ - "reference", - "url", - 1531505125666754945, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/166", + "#/texts/32", 1.0, - 16922240937803157180, - 3329452043224775053, + 15616229620328161347, + 13533638390690607221, 18446744073709551615, 18446744073709551615, - 0, + 225, + 238, + 225, + 238, 43, - 0, - 37, - 0, - 11, + 45, true, - "\u2021\u2021\u2021 https://www.naturalearthdata.com/", - "\u2021\u2021\u2021 https://www.naturalearthdata.com/" + "massive scale", + "massive scale" ], [ - "reference", - "url", - 15684389308320953629, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/167", + "#/texts/32", 1.0, - 2845896203864732456, - 4760469342904968768, + 7076268937724050913, + 11092663505100283305, 18446744073709551615, 18446744073709551615, - 0, - 36, - 0, - 33, - 0, - 11, + 285, + 297, + 285, + 297, + 56, + 58, true, - "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/", - "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/" + "deep queries", + "deep queries" ], [ - "reference", - "author", - 10480452763767134455, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/169", + "#/texts/32", 1.0, - 11879540473470058199, - 12427853451193245392, + 6165970943308474352, + 5673487969622609676, 18446744073709551615, 18446744073709551615, - 3, - 17, - 3, - 17, + 7, + 16, + 7, + 16, 2, - 5, + 3, true, - "Staar Peter WJ", - "Staar Peter WJ" + "remainder", + "remainder" ], [ - "reference", - "author", - 10480452763767134455, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/169", + "#/texts/32", 1.0, - 6613162031266505134, - 16138057201536909006, + 329104161668023890, + 17278239549203684871, 18446744073709551615, 18446744073709551615, - 19, - 28, - 19, - 28, + 25, + 30, + 25, + 30, + 5, 6, - 8, true, - "Michele D", - "Michele D" + "paper", + "paper" ], [ - "reference", - "author", - 10480452763767134455, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/169", + "#/texts/32", 1.0, - 4457167794784606628, - 16487730286724222122, + 16381206568246674273, + 5139794507444996832, 18446744073709551615, 18446744073709551615, - 30, - 41, - 30, - 41, - 9, + 46, + 52, + 46, + 52, + 10, 11, true, - "Christoph A", - "Christoph A" + "detail", + "detail" ], [ - "reference", - "author", - 10480452763767134455, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/169", + "#/texts/32", 1.0, - 6560601913145533820, - 12701816617387729389, + 12178341415896222428, + 15683249918480756789, 18446744073709551615, 18446744073709551615, - 43, - 52, - 43, - 52, - 12, - 15, + 136, + 139, + 136, + 139, + 24, + 25, true, - "Costas B.", - "Costas B." + "CPS", + "CPS" ], [ - "reference", - "citation-number", - 10480452763767134455, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/169", + "#/texts/32", 1.0, - 17767354399704235161, - 16208788960124925205, + 8106478708629288965, + 7671933616087053356, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 144, + 151, + 144, + 151, + 27, + 28, true, - "1", - "1" + "section", + "section" ], [ - "reference", - "container-title", - 10480452763767134455, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/169", + "#/texts/32", 1.0, - 8106351470704634736, - 17995829417296331915, + 329104162100250438, + 17070642353424787571, 18446744073709551615, 18446744073709551615, - 138, - 145, - 138, - 145, - 29, - 32, + 169, + 174, + 169, + 174, + 33, + 34, true, - "KDD '18", - "KDD '18" + "depth", + "depth" ], [ - "reference", - "date", - 10480452763767134455, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/169", + "#/texts/32", 1.0, - 8104408419226439021, - 7524634383995046949, + 14814125365076808131, + 17530184478432761737, 18446744073709551615, 18446744073709551615, - 164, - 171, - 164, - 171, - 39, - 42, + 183, + 191, + 183, + 191, + 36, + 37, true, - "; 2018:", - "; 2018:" + "platform", + "platform" ], [ - "reference", - "location", - 10480452763767134455, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/169", + "#/texts/32", 1.0, - 6517026456739326224, - 8283202906327186871, + 329104161809952077, + 17277977825643184942, 18446744073709551615, 18446744073709551615, - 147, - 160, - 147, - 160, - 33, + 201, + 206, + 201, + 206, 38, + 39, true, - "New York, NY:", - "New York, NY:" + "facts", + "facts" ], [ - "reference", - "pages", - 10480452763767134455, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/169", + "#/texts/32", 1.0, - 8104408789160133341, - 11698475954970405279, + 8106398483106473371, + 590821573198333812, 18446744073709551615, 18446744073709551615, - 171, - 178, - 171, - 178, - 42, - 43, + 212, + 219, + 212, + 219, + 40, + 41, true, - "774-782", - "774-782" + "corpora", + "corpora" ], [ - "reference", - "publisher", - 10480452763767134455, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/169", + "#/texts/32", 1.0, - 12178341415896228980, - 16661682738511655292, + 8106478708629288965, + 7671933616087044621, 18446744073709551615, 18446744073709551615, - 161, - 164, - 161, - 164, - 38, - 39, - true, - "ACM", - "ACM" + 243, + 250, + 243, + 250, + 47, + 48, + true, + "section", + "section" ], [ - "reference", - "title", - 10480452763767134455, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/169", + "#/texts/32", 1.0, - 3346237141252876309, - 13011534883222988606, + 16381206568246674273, + 5139794507445024315, 18446744073709551615, 18446744073709551615, + 265, + 271, + 265, + 271, 53, - 136, - 53, - 136, - 15, - 28, + 54, true, - "Corpus conversion service: a machine learning platform to ingest documents at scale", - "Corpus conversion service: a machine learning platform to ingest documents at scale" + "detail", + "detail" ], [ - "reference", - "author", - 11866471329779366855, + "numval", + "year", + 18391264192891079539, "TEXT", - "#/texts/170", + "#/texts/33", 1.0, - 11879540473470058199, - 6818801233014041471, + 389609625548777262, + 8826555294676663632, 18446744073709551615, 18446744073709551615, - 3, - 17, - 3, - 17, + 10, + 14, + 10, + 14, 2, - 5, + 3, true, - "Staar Peter WJ", - "Staar Peter WJ" + "2020", + "2020" ], [ - "reference", - "author", - 11866471329779366855, + "numval", + "year", + 18391264192891079539, "TEXT", - "#/texts/170", + "#/texts/33", 1.0, - 329104159232588720, - 1186563503698797045, + 389609625548777251, + 8826555296349648778, 18446744073709551615, 18446744073709551615, - 19, - 24, - 19, - 24, - 6, + 119, + 123, + 119, + 123, + 14, + 14, + false, + "2023", + "2023" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/33", + 1.0, + 8104408072666212335, + 13552219042525319352, + 18446744073709551615, + 18446744073709551615, + 71, + 78, + 71, + 78, 8, - true, - "Kl BP", - "Kl BP" + 8, + false, + "10.1002", + "10.1002" ], [ - "reference", - "author", - 11866471329779366855, + "numval", + "fval", + 18391264192891079539, "TEXT", - "#/texts/170", + "#/texts/33", 1.0, - 14652187939873997159, - 718674333250886747, + 389609625548868096, + 8826558551385119058, 18446744073709551615, 18446744073709551615, - 26, - 34, - 26, - 34, + 82, + 86, + 82, + 86, + 8, 9, - 11, - true, - "Roxana I", - "Roxana I" + false, + "2.20", + "2.20" ], [ - "reference", - "citation-number", - 11866471329779366855, + "numval", + "ival", + 18391264192891079539, "TEXT", - "#/texts/170", + "#/texts/33", 1.0, - 17767354399704235162, - 7639029136784882071, + 14654386914267794441, + 12796143052106760105, 18446744073709551615, 18446744073709551615, 0, - 1, + 8, 0, - 1, + 8, 0, 1, true, + "26895595", + "26895595" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/33", + 1.0, + 17767354399704235162, + 7753390158484899261, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 4, + 5, + true, "2", "2" ], [ - "reference", - "date", - 11866471329779366855, + "numval", + "ival", + 18391264192891079539, "TEXT", - "#/texts/170", + "#/texts/33", 1.0, - 325347433255123998, - 9431696322833619113, + 15441160910541481791, + 3518619573290839093, 18446744073709551615, 18446744073709551615, - 150, - 162, - 150, - 162, - 36, - 39, - true, - "2016:812-821", - "2016:812-821" + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" ], [ - "reference", - "journal", - 11866471329779366855, + "numval", + "ival", + 18391264192891079539, "TEXT", - "#/texts/170", + "#/texts/33", 1.0, - 8106350741667376964, - 2037770047407614341, + 15441160910541481543, + 3518617976696906498, 18446744073709551615, 18446744073709551615, - 131, - 138, - 131, - 138, - 30, - 31, - true, - "Chicago", - "Chicago" + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" ], [ - "reference", - "publisher", - 11866471329779366855, + "link", + "url", + 18391264192891079539, "TEXT", - "#/texts/170", + "#/texts/33", 1.0, - 329104161865740710, - 2100895836958644546, + 8536069645534292969, + 16063604623463467342, 18446744073709551615, 18446744073709551615, - 144, - 149, - 144, - 149, - 34, - 36, + 35, + 87, + 35, + 87, + 8, + 10, true, - "IEEE;", - "IEEE;" + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," ], [ - "reference", - "title", - 11866471329779366855, + "link", + "url", + 18391264192891079539, "TEXT", - "#/texts/170", + "#/texts/33", 1.0, - 7105706713138331748, - 8882313339767931673, + 594099663775968682, + 14698211805947073928, 18446744073709551615, 18446744073709551615, - 43, - 129, - 43, - 129, - 15, - 29, + 156, + 208, + 156, + 208, + 22, + 37, true, - "Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance", - "Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance" + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" ], [ - "reference", - "volume", - 11866471329779366855, + "link", + "doi", + 18391264192891079539, "TEXT", - "#/texts/170", + "#/texts/33", 1.0, - 12178341415896263665, - 6233863430018819825, + 1697220653346092555, + 8458710314769009562, 18446744073709551615, 18446744073709551615, - 140, - 143, - 140, - 143, - 32, - 34, - true, - "IL:", - "IL:" + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," ], [ - "reference", - "author", - 6016885898370676469, + "parenthesis", + "round brackets", + 18391264192891079539, "TEXT", - "#/texts/171", + "#/texts/33", 1.0, - 14650311461945683358, - 1978144735469983705, + 12213187056216195918, + 14309760985361468471, 18446744073709551615, 18446744073709551615, - 3, - 11, - 3, - 11, - 2, - 4, + 155, + 209, + 155, + 209, + 21, + 38, true, - "Matteo M", - "Matteo M" + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" ], [ - "reference", - "author", - 6016885898370676469, + "parenthesis", + "square brackets", + 18391264192891079539, "TEXT", - "#/texts/171", + "#/texts/33", 1.0, - 4457167794784606628, - 3737697229009384388, + 15691754593896323724, + 15433429984583237828, 18446744073709551615, 18446744073709551615, - 13, - 24, - 13, - 24, - 5, - 7, + 112, + 124, + 112, + 124, + 14, + 15, true, - "Christoph A", - "Christoph A" + "[23/08/2023]", + "[23/08/2023]" ], [ - "reference", - "author", - 6016885898370676469, + "expression", + "wtoken-concatenation", + 18391264192891079539, "TEXT", - "#/texts/171", + "#/texts/33", 1.0, - 6183363009296336817, - 2886377010043332845, + 3856967589249015473, + 3576147774941915841, 18446744073709551615, 18446744073709551615, - 26, 35, - 26, + 86, 35, + 86, 8, - 12, + 9, true, - "Val'ery W", - "Val'ery W" + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" ], [ - "reference", - "citation-number", - 6016885898370676469, + "expression", + "wtoken-concatenation", + 18391264192891079539, "TEXT", - "#/texts/171", + "#/texts/33", 1.0, - 17767354399704235163, - 13510159049290326510, + 15691754593896323724, + 15433429984583237828, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 112, + 124, + 112, + 124, + 14, + 15, true, - "3", - "3" + "[23/08/2023]", + "[23/08/2023]" ], [ - "reference", - "date", - 6016885898370676469, + "sentence", + "", + 18391264192891079539, "TEXT", - "#/texts/171", + "#/texts/33", 1.0, - 16381206542172555288, - 10693536807570486686, + 10933383461306782608, + 10178418358179275356, 18446744073709551615, 18446744073709551615, - 161, - 167, - 161, - 167, - 35, - 37, + 19, + 125, + 19, + 125, + 6, + 16, true, - "; 2019", - "; 2019" + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." ], [ - "reference", - "journal", - 6016885898370676469, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/171", + "#/texts/33", 1.0, - 7543597897356589805, - 187532807533800461, + 12466457873768409517, + 3430070082404029638, 18446744073709551615, 18446744073709551615, - 141, - 151, - 141, - 151, - 28, - 32, + 88, + 108, + 88, + 108, + 10, + 13, true, - "ArXiv.abs/", - "ArXiv.abs/" + "Wiley Online Library", + "Wiley Online Library" ], [ - "reference", - "pages", - 6016885898370676469, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/171", + "#/texts/33", 1.0, - 329104147742543445, - 2092785390571099911, + 12466457873768409517, + 3430070082403846184, 18446744073709551615, 18446744073709551615, - 156, - 161, - 156, - 161, - 34, - 35, + 213, + 233, + 213, + 233, + 39, + 42, true, - "08400", - "08400" + "Wiley Online Library", + "Wiley Online Library" ], [ - "reference", - "title", - 6016885898370676469, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/171", + "#/texts/33", 1.0, - 14518759528420507379, - 35296972575901155, + 10086796047802705645, + 11637015082128438412, 18446744073709551615, 18446744073709551615, - 44, - 139, - 44, - 139, - 16, - 27, + 252, + 263, + 252, + 263, + 47, + 49, true, - "An information extraction and knowledge graph platform for accelerating biochemical discoveries", - "An information extraction and knowledge graph platform for accelerating biochemical discoveries" + "OA articles", + "OA articles" ], [ - "reference", - "volume", - 6016885898370676469, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/171", + "#/texts/33", 1.0, - 389609625536083120, - 2278168081688323653, + 6687370681685741393, + 17939310132506951168, 18446744073709551615, 18446744073709551615, - 151, - 155, - 151, - 155, - 32, - 33, + 284, + 319, + 284, + 319, + 53, + 57, true, - "1907", - "1907" + "applicable Creative Commons License", + "applicable Creative Commons License" ], [ - "reference", - "author", - 13946275785662847920, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/172", + "#/texts/33", 1.0, - 8106352039693059414, - 189526913306248274, + 329104161846385964, + 16017248647642597247, 18446744073709551615, 18446744073709551615, - 3, - 10, - 3, - 10, - 2, - 4, + 134, + 139, + 134, + 139, + 18, + 19, true, - "Paolo R", - "Paolo R" + "Terms", + "Terms" ], [ - "reference", - "author", - 13946275785662847920, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/172", + "#/texts/33", 1.0, - 8106471247241844081, - 12829126084417792103, + 969969168017005656, + 2961182532179915323, 18446744073709551615, 18446744073709551615, - 12, - 19, - 12, - 19, - 5, - 7, - true, - "Marco P", - "Marco P" - ], - [ - "reference", - "author", - 13946275785662847920, - "TEXT", - "#/texts/172", - 1.0, - 15356089124994678984, - 18000216761919637454, - 18446744073709551615, - 18446744073709551615, - 21, - 31, + 144, + 154, + 144, + 154, + 20, 21, - 31, - 8, - 10, true, - "Floriana B", - "Floriana B" + "Conditions", + "Conditions" ], [ - "reference", - "author", - 13946275785662847920, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/172", + "#/texts/33", 1.0, - 8106352035144611657, - 2775049790770760163, + 329104161825278214, + 16021621362593374209, 18446744073709551615, 18446744073709551615, - 33, - 40, - 33, - 40, - 11, - 13, + 238, + 243, + 238, + 243, + 43, + 44, true, - "Peter S", - "Peter S" + "rules", + "rules" ], [ - "reference", - "author", - 13946275785662847920, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/172", + "#/texts/33", 1.0, - 6560601913145533820, - 12130024709208567744, + 12178341415895516060, + 12061595171928625555, 18446744073709551615, 18446744073709551615, - 42, - 51, - 42, - 51, - 14, - 17, + 247, + 250, + 247, + 250, + 45, + 46, true, - "Costas B.", - "Costas B." + "use", + "use" ], [ - "reference", - "citation-number", - 13946275785662847920, + "numval", + "ival", + 4361549266681704196, "TEXT", - "#/texts/172", + "#/texts/34", 1.0, - 17767354399704235156, - 2787669627718018145, + 17767354399704235163, + 165380245946403556, 18446744073709551615, 18446744073709551615, 0, @@ -9398,312 +9490,375 @@ 0, 1, true, - "4", - "4" + "3", + "3" ], [ - "reference", - "container-title", - 13946275785662847920, + "numval", + "ival", + 4361549266681704196, "TEXT", - "#/texts/172", + "#/texts/34", 1.0, - 4292761212337338605, - 773134743697376497, + 15441160910541481979, + 10132017072037949157, 18446744073709551615, 18446744073709551615, - 177, - 245, - 177, - 245, - 38, - 48, + 3, + 5, + 3, + 5, + 2, + 3, true, - "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi", - "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi" + "15", + "15" ], [ - "reference", - "location", - 13946275785662847920, + "numval", + "ival", + 8043608144162608258, "TEXT", - "#/texts/172", + "#/texts/35", 1.0, - 16381206478137548706, - 9744551904329916157, + 17767354399704235156, + 18342724908476302885, 18446744073709551615, 18446744073709551615, - 247, - 253, - 247, - 253, - 49, - 52, + 62, + 63, + 62, + 63, + 11, + 12, true, - "UAE, :", - "UAE, :" + "4", + "4" ], [ - "reference", - "title", - 13946275785662847920, + "numval", + "ival", + 8043608144162608258, "TEXT", - "#/texts/172", + "#/texts/35", 1.0, - 14371818679908732529, - 10294554605073457499, + 17767354399704235157, + 18342724908489108597, 18446744073709551615, 18446744073709551615, - 52, 174, - 52, + 175, 174, - 17, - 36, + 175, + 33, + 34, true, - "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019", - "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019" + "5", + "5" ], [ - "reference", - "url", - 13946275785662847920, + "expression", + "word-concatenation", + 8043608144162608258, "TEXT", - "#/texts/172", + "#/texts/35", 1.0, - 7742135058095281026, - 17571544217117981683, + 524264419207632938, + 10547923618079885832, 18446744073709551615, 18446744073709551615, - 257, - 268, - 257, - 268, - 54, - 59, + 13, + 29, + 13, + 29, + 3, + 4, true, - "https://doi", - "https://doi" + "high-performance", + "high-performance" ], [ - "reference", - "url", - 13946275785662847920, + "sentence", + "", + 8043608144162608258, "TEXT", - "#/texts/172", + "#/texts/35", 1.0, - 14023706993569865773, - 12197548886916811054, + 5042236808703360545, + 8650712924483655573, 18446744073709551615, 18446744073709551615, - 270, - 291, - 270, - 291, - 60, - 67, + 13, + 43, + 13, + 43, + 3, + 7, true, - "org/10.2118/197610-MS", - "org/10.2118/197610-MS" + "high-performance graph engine.", + "high-performance graph engine." ], [ - "reference", - "volume", - 13946275785662847920, + "sentence", + "", + 8043608144162608258, "TEXT", - "#/texts/172", + "#/texts/35", 1.0, - 15441160910541481982, - 13393763465685487585, + 13307765353894382590, + 13661934614468722351, 18446744073709551615, 18446744073709551615, - 253, - 255, - 253, - 255, - 52, - 53, + 44, + 153, + 44, + 153, + 7, + 29, true, - "10", - "10" + "Later, in section 4, we will discuss in detail how both components are deployed and interacting on the cloud.", + "Later, in section 4, we will discuss in detail how both components are deployed and interacting on the cloud." ], [ - "reference", - "author", - 7693798302433367973, + "sentence", + "", + 8043608144162608258, "TEXT", - "#/texts/173", + "#/texts/35", 1.0, - 3027248490321213074, - 16283814403211008850, + 1047931557873137932, + 18017234894347992215, 18446744073709551615, 18446744073709551615, + 154, + 262, + 154, + 262, + 29, + 51, + true, + "Finally, in section 5, we present the complete system in a real world case study and benchmark its accuracy.", + "Finally, in section 5, we present the complete system in a real world case study and benchmark its accuracy." + ], + [ + "term", + "single-term", + 8043608144162608258, + "TEXT", + "#/texts/35", + 1.0, + 2080025608098656880, + 1971601176160671565, + 18446744073709551615, + 18446744073709551615, + 13, + 42, + 13, + 42, 3, - 14, - 3, - 14, - 2, - 4, + 6, true, - "Guillaume L", - "Guillaume L" + "high-performance graph engine", + "high-performance graph engine" ], [ - "reference", - "author", - 7693798302433367973, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/173", + "#/texts/35", 1.0, - 14650310996645589292, - 14357325801323977565, + 4743061560066569517, + 9046325022279510283, 18446744073709551615, 18446744073709551615, - 16, - 24, - 16, - 24, - 5, - 7, + 192, + 207, + 192, + 207, + 38, + 40, true, - "Miguel B", - "Miguel B" + "complete system", + "complete system" ], [ - "reference", - "author", - 7693798302433367973, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/173", + "#/texts/35", 1.0, - 6049415556904669075, - 4491667145265607561, + 12197753754046053748, + 545238828955449628, 18446744073709551615, 18446744073709551615, - 26, - 35, - 26, - 35, - 8, - 10, + 213, + 234, + 213, + 234, + 42, + 46, true, - "Sandeep S", - "Sandeep S" + "real world case study", + "real world case study" ], [ - "reference", - "author", - 7693798302433367973, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/173", + "#/texts/35", 1.0, - 14650438760956024332, - 12941354247565292233, + 12178341415895525628, + 4396159551467569503, 18446744073709551615, 18446744073709551615, - 37, - 45, - 37, - 45, + 0, + 3, + 0, + 3, + 0, + 1, + true, + "way", + "way" + ], + [ + "term", + "single-term", + 8043608144162608258, + "TEXT", + "#/texts/35", + 1.0, + 8106478708629288965, + 2690053722178369781, + 18446744073709551615, + 18446744073709551615, + 54, + 61, + 54, + 61, + 10, 11, - 13, true, - "Kazuya K", - "Kazuya K" + "section", + "section" ], [ - "reference", - "author", - 7693798302433367973, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/173", + "#/texts/35", 1.0, - 14650449385951782031, - 12018837533588020118, + 16381206568246674273, + 9950902184857858955, 18446744073709551615, 18446744073709551615, - 47, - 55, - 47, - 55, - 14, + 84, + 90, + 84, + 90, 17, + 18, true, - "Chris D.", - "Chris D." + "detail", + "detail" ], [ - "reference", - "citation-number", - 7693798302433367973, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/173", + "#/texts/35", 1.0, - 17767354399704235157, - 9080683344301571175, + 2703018952916355661, + 16475014154010855623, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 100, + 110, + 100, + 110, + 20, + 21, true, - "5", - "5" + "components", + "components" ], [ - "reference", - "author", - 3109792572574236398, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/174", + "#/texts/35", 1.0, - 12139207556299923335, - 12395232115938598978, + 329104161517016668, + 8976287706705647369, 18446744073709551615, 18446744073709551615, - 3, - 16, - 3, - 16, - 2, - 5, + 147, + 152, + 147, + 152, + 27, + 28, true, - "Chiu Jason PC", - "Chiu Jason PC" + "cloud", + "cloud" ], [ - "reference", - "author", - 3109792572574236398, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/174", + "#/texts/35", 1.0, - 8106350848262626922, - 5052428205716655678, + 8106478708629288965, + 2690053722178360825, 18446744073709551615, 18446744073709551615, - 18, - 25, - 18, - 25, - 6, - 9, + 166, + 173, + 166, + 173, + 32, + 33, true, - "Eric N.", - "Eric N." + "section", + "section" ], [ - "reference", - "citation-number", - 3109792572574236398, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/174", + "#/texts/35", 1.0, - 17767354399704235158, - 2935027410945303089, + 14650440612701450082, + 7631421264816179483, + 18446744073709551615, + 18446744073709551615, + 253, + 261, + 253, + 261, + 49, + 50, + true, + "accuracy", + "accuracy" + ], + [ + "numval", + "ival", + 7159467829896778939, + "TEXT", + "#/texts/36", + 1.0, + 17767354399704235162, + 7924620771043007977, 18446744073709551615, 18446744073709551615, 0, @@ -9713,2661 +9868,2997 @@ 0, 1, true, - "6", - "6" + "2", + "2" ], [ - "reference", - "date", - 3109792572574236398, + "parenthesis", + "round brackets", + 5617240156952377, "TEXT", - "#/texts/174", + "#/texts/37", 1.0, - 389609625548777056, - 1668465275038003542, + 2293008940386739952, + 9841196227768892901, 18446744073709551615, 18446744073709551615, - 87, - 91, - 87, - 91, - 20, - 21, + 605, + 658, + 605, + 658, + 112, + 121, true, - "2016", - "2016" + "(' has-material-property ' or ' has-geological-age ')", + "(' has-material-property ' or ' has-geological-age ')" ], [ - "reference", - "journal", - 3109792572574236398, + "expression", + "common", + 5617240156952377, "TEXT", - "#/texts/174", + "#/texts/37", 1.0, - 389609625541773713, - 1712767977156820574, + 12178341415895450733, + 3018062721998632434, 18446744073709551615, 18446744073709551615, - 81, - 85, - 81, + 450, + 454, + 450, + 454, + 84, 85, - 18, - 19, true, - "TACL", - "TACL" + "etc", + "etc." ], [ - "reference", - "title", - 3109792572574236398, + "expression", + "word-concatenation", + 5617240156952377, "TEXT", - "#/texts/174", + "#/texts/37", 1.0, - 16636370883913883252, - 5810162511985509685, + 4906245502857778203, + 10671415923670610924, 18446744073709551615, 18446744073709551615, - 26, - 79, - 26, - 79, - 9, - 17, + 608, + 629, + 608, + 629, + 114, + 115, true, - "Named entity recognition with bidirectional LSTM-CNNs", - "Named entity recognition with bidirectional LSTM-CNNs" + "has-material-property", + "has-material-property" ], [ - "reference", - "title", - 3109792572574236398, + "expression", + "word-concatenation", + 5617240156952377, "TEXT", - "#/texts/174", + "#/texts/37", 1.0, - 9584872678510603869, - 10893893406063870923, + 18200117896215154992, + 3778470233715427826, 18446744073709551615, 18446744073709551615, - 91, - 101, - 91, - 101, - 21, - 25, + 637, + 655, + 637, + 655, + 118, + 119, true, - ";4:357-370", - ";4:357-370" + "has-geological-age", + "has-geological-age" ], [ - "reference", - "author", - 8111170387462350170, + "sentence", + "", + 5617240156952377, "TEXT", - "#/texts/175", + "#/texts/37", 1.0, - 6611312511369759405, - 3019524304480366334, + 12144997519516518537, + 9639908354679176796, 18446744073709551615, 18446744073709551615, - 3, - 12, - 3, - 12, - 2, - 4, + 0, + 124, + 0, + 124, + 0, + 23, true, - "Matthew H", - "Matthew H" + "In CPS, a Knowledge Graph is defined as a collection of entities and their relationships forming the graphs nodes and edges.", + "In CPS, a Knowledge Graph is defined as a collection of entities and their relationships forming the graphs nodes and edges." ], [ - "reference", - "author", - 8111170387462350170, + "sentence", + "", + 5617240156952377, "TEXT", - "#/texts/175", + "#/texts/37", 1.0, - 8106350362383531053, - 10877267985434630613, + 15563824490297277995, + 17150277444298513036, 18446744073709551615, 18446744073709551615, - 14, - 21, - 14, - 21, - 5, - 8, + 125, + 167, + 125, + 167, + 23, + 32, true, - "Ines M.", - "Ines M." + "Entities can have a wide variety of types.", + "Entities can have a wide variety of types." ], [ - "reference", - "citation-number", - 8111170387462350170, + "sentence", + "", + 5617240156952377, "TEXT", - "#/texts/175", + "#/texts/37", 1.0, - 17767354399704235159, - 17892509173094146701, + 12582210430540746378, + 14089013500727116382, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 168, + 262, + 168, + 262, + 32, + 49, true, - "7", - "7" + "A basic scenario includes types such as documents, document components, keywords, and authors.", + "A basic scenario includes types such as documents, document components, keywords, and authors." ], [ - "reference", - "date", - 8111170387462350170, + "sentence", + "", + 5617240156952377, "TEXT", - "#/texts/175", + "#/texts/37", 1.0, - 389609625548777057, - 14192492111179186414, + 9972047232259612032, + 3846286086726380255, 18446744073709551615, 18446744073709551615, - 151, - 155, - 151, - 155, - 28, - 29, + 263, + 484, + 263, + 484, + 49, + 92, true, - "2017", - "2017" + "In addition, there can be more specific types tied to domain verticals, such as materials and properties in material science, or geological ages, formations, rocks, minerals, structures, etc., for oil and gas exploration.", + "In addition, there can be more specific types tied to domain verticals, such as materials and properties in material science, or geological ages, formations, rocks, minerals, structures, etc., for oil and gas exploration." ], [ - "reference", - "editor", - 8111170387462350170, + "sentence", + "", + 5617240156952377, "TEXT", - "#/texts/175", + "#/texts/37", 1.0, - 5944998866513528822, - 6604265927490760522, + 794739137233286117, + 9994141964316778148, 18446744073709551615, 18446744073709551615, - 140, - 149, - 140, - 149, - 25, - 27, + 485, + 551, + 485, + 551, + 92, + 103, true, - "To appear", - "To appear" + "Relationships in the KG are strictly defined between the entities.", + "Relationships in the KG are strictly defined between the entities." ], [ - "reference", - "title", - 8111170387462350170, + "sentence", + "", + 5617240156952377, "TEXT", - "#/texts/175", + "#/texts/37", 1.0, - 8673657110667713983, - 2132423457048291450, + 1452563618681853259, + 7631253544865320080, 18446744073709551615, 18446744073709551615, - 22, - 138, - 22, - 138, - 8, - 24, + 552, + 659, + 552, + 659, + 103, + 122, true, - "spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing", - "spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing" + "Similar to the entities, the relationships are typed (' has-material-property ' or ' has-geological-age ').", + "Similar to the entities, the relationships are typed (' has-material-property ' or ' has-geological-age ')." ], [ - "reference", - "author", - 14682702346227170925, + "sentence", + "", + 5617240156952377, "TEXT", - "#/texts/176", + "#/texts/37", 1.0, - 6627095272342846459, - 8960025720845820047, + 3192889723504224118, + 10764133440858685575, 18446744073709551615, 18446744073709551615, - 3, - 12, - 3, - 12, - 2, - 4, + 660, + 796, + 660, + 796, + 122, + 147, true, - "Magoon LB", - "Magoon LB" + "Also, relationships in the KG can be weighted, for example, to represent the trustworthiness of a fact that the relationship represents.", + "Also, relationships in the KG can be weighted, for example, to represent the trustworthiness of a fact that the relationship represents." ], [ - "reference", - "author", - 14682702346227170925, + "term", + "enum-term-mark-2", + 5617240156952377, "TEXT", - "#/texts/176", + "#/texts/37", 1.0, - 6563582333827106756, - 4026322596752919867, + 5515747999597331548, + 9117859149955612445, 18446744073709551615, 18446744073709551615, - 14, - 23, - 14, - 23, - 5, - 7, + 460, + 483, + 460, + 483, + 87, + 91, true, - "Hudson TL", - "Hudson TL" + "oil and gas exploration", + "oil and gas exploration" ], [ - "reference", - "author", - 14682702346227170925, + "term", + "enum-term-mark-3", + 5617240156952377, "TEXT", - "#/texts/176", + "#/texts/37", 1.0, - 1612814864176813785, - 12195293078214673428, + 15863098611266611689, + 11938678268878922872, 18446744073709551615, 18446744073709551615, - 25, - 35, - 25, - 35, - 8, - 11, + 101, + 123, + 101, + 123, + 18, + 22, true, - "Peters KE.", - "Peters KE." + "graphs nodes and edges", + "graphs nodes and edges" ], [ - "reference", - "citation-number", - 14682702346227170925, + "term", + "enum-term-mark-3", + 5617240156952377, "TEXT", - "#/texts/176", + "#/texts/37", 1.0, - 17767354399704235152, - 15651484829649486928, + 8274047561994409760, + 2528562038833681719, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 228, + 261, + 228, + 261, + 42, + 48, true, - "8", - "8" + "components, keywords, and authors", + "components, keywords, and authors" ], [ - "reference", - "date", - 14682702346227170925, + "term", + "enum-term-mark-3", + 5617240156952377, "TEXT", - "#/texts/176", + "#/texts/37", 1.0, - 329104147695665975, - 7749771140976442, + 15083712120508435047, + 15480403097954548676, 18446744073709551615, 18446744073709551615, - 163, - 168, - 163, - 168, - 38, - 40, + 343, + 367, + 343, + 367, + 65, + 68, true, - "2005;", - "2005;" + "materials and properties", + "materials and properties" ], [ - "reference", - "journal", - 14682702346227170925, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/176", + "#/texts/37", 1.0, - 14445748745948696227, - 6494504935180328364, + 5877539623435777295, + 3070141646605830439, 18446744073709551615, 18446744073709551615, - 139, - 161, - 139, - 161, - 32, - 37, + 10, + 25, + 10, + 25, + 4, + 6, true, - "Am Assoc Pet Geol Bull", - "Am Assoc Pet Geol Bull" + "Knowledge Graph", + "Knowledge Graph" ], [ - "reference", - "title", - 14682702346227170925, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/176", + "#/texts/37", 1.0, - 10827383077041810226, - 7289787549141850214, + 2924937330842356899, + 7802493761985505696, 18446744073709551615, 18446744073709551615, - 36, - 52, - 36, - 52, - 11, - 16, + 101, + 113, + 101, + 113, + 18, + 20, true, - "Egret-Hibernia(!", - "Egret-Hibernia(!" + "graphs nodes", + "graphs nodes" ], [ - "reference", - "title", - 14682702346227170925, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/176", + "#/texts/37", 1.0, - 8991166294068381652, - 13146587142049422219, + 5069338760716920094, + 17809400713453098686, 18446744073709551615, 18446744073709551615, - 55, - 137, - 55, - 137, - 18, - 31, + 145, + 157, + 145, + 157, + 27, + 29, true, - "a significant petroleum system, northern Grand Banks area, offshore eastern Canada", - "a significant petroleum system, northern Grand Banks area, offshore eastern Canada" + "wide variety", + "wide variety" ], [ - "reference", - "volume", - 14682702346227170925, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/176", + "#/texts/37", 1.0, - 12994571832648066926, - 17152242518570841800, + 15408825885837354070, + 15053329185263397097, 18446744073709551615, 18446744073709551615, - 168, - 183, - 168, - 183, - 40, - 46, + 170, + 184, + 170, + 184, + 33, + 35, true, - "89(9):1203-1237", - "89(9):1203-1237" + "basic scenario", + "basic scenario" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/177", + "#/texts/37", 1.0, - 389609625548777262, - 8826555294676663632, + 17524405716142769441, + 12187828972523501476, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 219, + 238, + 219, + 238, + 41, + 43, true, - "2020", - "2020" + "document components", + "document components" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/177", + "#/texts/37", 1.0, - 389609625548777251, - 8826555296349648778, + 15130402071104315819, + 14505129180480892684, 18446744073709551615, 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 294, + 308, + 294, + 308, + 56, + 58, true, - "2023", - "2023" + "specific types", + "specific types" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/177", + "#/texts/37", 1.0, - 8104408072666212335, - 13552219042525319352, + 10788814978233814896, + 1701325665325828957, 18446744073709551615, 18446744073709551615, + 371, + 387, + 371, + 387, + 69, 71, - 78, - 71, - 78, - 20, - 21, true, - "10.1002", - "10.1002" + "material science", + "material science" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/177", + "#/texts/37", 1.0, - 389609625548868096, - 8826558551385119058, + 9663226904190425014, + 11862188099935093855, 18446744073709551615, 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 392, + 407, + 392, + 407, + 73, + 75, true, - "2.20", - "2.20" + "geological ages", + "geological ages" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/177", + "#/texts/37", 1.0, - 14654386914267794441, - 12796143052106760105, + 10692163443301812358, + 3113975335211030761, 18446744073709551615, 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 468, + 483, + 468, + 483, + 89, + 91, true, - "26895595", - "26895595" + "gas exploration", + "gas exploration" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/177", + "#/texts/37", 1.0, - 17767354399704235162, - 7753390158484899261, + 12178341415896222428, + 3018050375403736631, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 4, - 5, + 3, + 6, + 3, + 6, + 1, + 2, true, - "2", - "2" + "CPS", + "CPS" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/177", + "#/texts/37", 1.0, - 15441160910541481791, - 3518619573290839093, + 2702984786539193186, + 2066105174092978753, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 42, + 52, + 42, + 52, + 10, + 11, true, - "23", - "23" + "collection", + "collection" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/177", + "#/texts/37", 1.0, - 15441160910541481543, - 3518617976696906498, + 14652256560445338257, + 14335368261363034099, 18446744073709551615, 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 56, + 64, + 56, + 64, + 12, + 13, true, - "08", - "08" + "entities", + "entities" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/177", + "#/texts/37", 1.0, - 8536069645534292969, - 16063604623463467342, + 8279380567349713241, + 12534042586919924803, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 75, + 88, + 75, + 88, + 15, + 16, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "relationships", + "relationships" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/177", + "#/texts/37", 1.0, - 594099663775968682, - 14698211805947073928, + 329104162186494203, + 15974017122495616980, 18446744073709551615, 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 118, + 123, + 118, + 123, + 21, + 22, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "edges", + "edges" ], [ - "link", - "doi", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/177", + "#/texts/37", 1.0, - 1697220653346092555, - 8458710314769009562, + 14650436091620137967, + 17692238337199335254, 18446744073709551615, 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 125, + 133, + 125, + 133, + 23, + 24, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "Entities", + "Entities" ], [ - "reference", - "author", - 11430385775112165283, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/178", + "#/texts/37", 1.0, - 7087532328962869115, - 5488976721015347116, + 329104159243796903, + 15885015617704145871, 18446744073709551615, 18446744073709551615, - 3, - 13, - 3, - 13, - 2, - 5, + 161, + 166, + 161, + 166, + 30, + 31, true, - "Estrada E.", - "Estrada E." + "types", + "types" ], [ - "reference", - "citation-number", - 11430385775112165283, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/178", + "#/texts/37", 1.0, - 17767354399704235153, - 10433678415276841389, + 329104159243796903, + 15885015617704135557, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 194, + 199, + 194, + 199, + 36, + 37, true, - "9", - "9" + "types", + "types" ], [ - "reference", - "date", - 11430385775112165283, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/178", + "#/texts/37", 1.0, - 8104407400303630267, - 3516783299715161152, + 6167933651658664291, + 7313720567483528692, 18446744073709551615, 18446744073709551615, - 67, - 74, - 67, - 74, - 15, - 18, + 208, + 217, + 208, + 217, + 39, + 40, true, - "2005;71", - "2005;71" + "documents", + "documents" ], [ - "reference", - "journal", - 11430385775112165283, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/178", + "#/texts/37", 1.0, - 1821145667706451373, - 6349148037602643636, + 14634111734655409321, + 3345538017068759698, 18446744073709551615, 18446744073709551615, - 55, - 65, - 55, - 65, - 11, - 14, + 240, + 248, + 240, + 248, + 44, + 45, true, - "Phys Rev E", - "Phys Rev E" + "keywords", + "keywords" ], [ - "reference", - "title", - 11430385775112165283, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/178", + "#/texts/37", 1.0, - 10002059539925749429, - 4038144589619849267, + 8106397759446161562, + 18033240504910693308, 18446744073709551615, 18446744073709551615, - 14, - 53, - 14, - 53, - 5, - 10, + 254, + 261, + 254, + 261, + 47, + 48, true, - "Subgraph centrality in complex networks", - "Subgraph centrality in complex networks" + "authors", + "authors" ], [ - "reference", - "volume", - 11430385775112165283, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/178", + "#/texts/37", 1.0, - 6573854687835318787, - 906292219904540950, + 14650447861280948245, + 10555199694781207120, 18446744073709551615, 18446744073709551615, - 75, - 84, - 75, - 84, - 19, - 23, + 266, + 274, + 266, + 274, + 50, + 51, true, - "5):056103", - "5):056103" + "addition", + "addition" ], [ - "reference", - "author", - 5825495964576843004, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/179", + "#/texts/37", 1.0, - 2628812302410383486, - 8225541491002394036, + 3534273810487275626, + 4373385647668922427, 18446744073709551615, 18446744073709551615, - 4, - 19, - 4, - 19, - 2, - 4, + 324, + 333, + 324, + 333, + 61, + 62, true, - "Estrada Ernesto", - "Estrada Ernesto" + "verticals", + "verticals" ], [ - "reference", - "author", - 5825495964576843004, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/179", + "#/texts/37", 1.0, - 17728567422753594500, - 4401840231895103727, + 6179392753523812130, + 16868072188025352035, 18446744073709551615, 18446744073709551615, - 21, - 38, - 21, - 38, - 5, - 9, + 343, + 352, + 343, + 352, + 65, + 66, true, - "Higham Desmond J.", - "Higham Desmond J." + "materials", + "materials" ], [ - "reference", - "citation-number", - 5825495964576843004, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/179", + "#/texts/37", 1.0, - 15441160910541481982, - 2952327273286615865, + 14088628410271132453, + 1983771389005185922, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 357, + 367, + 357, + 367, + 67, + 68, true, - "10", - "10" + "properties", + "properties" ], [ - "reference", - "date", - 5825495964576843004, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/179", + "#/texts/37", 1.0, - 389609625548777062, - 8937154938925173833, + 16064217528453934834, + 2928977078579581381, 18446744073709551615, 18446744073709551615, - 40, - 44, - 40, - 44, - 10, - 11, + 409, + 419, + 409, + 419, + 76, + 77, true, - "2010", - "2010" + "formations", + "formations" ], [ - "reference", - "journal", - 5825495964576843004, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/179", + "#/texts/37", 1.0, - 745633759305567859, - 2105664067016610109, + 329104161637315394, + 15906833325474937465, 18446744073709551615, 18446744073709551615, - 47, - 112, - 47, - 112, - 13, - 22, + 421, + 426, + 421, + 426, + 78, + 79, true, - "Network Properties Revealed through Matrix Functions. SIAM Review", - "Network Properties Revealed through Matrix Functions. SIAM Review" + "rocks", + "rocks" ], [ - "reference", - "pages", - 5825495964576843004, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/179", + "#/texts/37", 1.0, - 8104408773920978895, - 9147525378271823463, + 14638289846375411086, + 13515658020381275329, 18446744073709551615, 18446744073709551615, - 123, - 130, - 123, - 130, - 29, - 30, + 428, + 436, + 428, + 436, + 80, + 81, true, - "696-714", - "696-714" + "minerals", + "minerals" ], [ - "reference", - "url", - 5825495964576843004, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/179", + "#/texts/37", 1.0, - 16159594323378820687, - 15692242274322104012, + 14120356269929906404, + 7929205159710255559, 18446744073709551615, 18446744073709551615, - 132, - 167, - 132, - 167, - 31, - 44, + 438, + 448, + 438, + 448, + 82, + 83, true, - "http://dx.doi.org/10.1137/090761070", - "http://dx.doi.org/10.1137/090761070" + "structures", + "structures" ], [ - "reference", - "volume", - 5825495964576843004, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/179", + "#/texts/37", 1.0, - 15441160910541486331, - 2952320863259255438, + 12178341415895623363, + 3018073362770496593, 18446744073709551615, 18446744073709551615, - 114, - 116, - 114, - 116, - 23, - 24, + 460, + 463, + 460, + 463, + 87, + 88, true, - "52", - "52" + "oil", + "oil" ], [ - "reference", - "volume", - 5825495964576843004, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/179", + "#/texts/37", 1.0, - 17767354399704235156, - 8049906977590018916, + 1808270638656316647, + 13986132968258321440, 18446744073709551615, 18446744073709551615, - 119, - 120, - 119, - 120, - 26, - 27, + 485, + 498, + 485, + 498, + 92, + 93, true, - "4", - "4" + "Relationships", + "Relationships" ], [ - "numval", - "year", - 5698421097735371040, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/180", + "#/texts/37", 1.0, - 389609625548777055, - 1517668227262464254, + 15441160910541480204, + 6094061681036227158, 18446744073709551615, 18446744073709551615, - 45, - 49, - 45, - 49, - 9, - 10, + 506, + 508, + 506, + 508, + 95, + 96, true, - "2019", - "2019" + "KG", + "KG" ], [ - "numval", - "fval", - 5698421097735371040, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/180", + "#/texts/37", 1.0, - 12178341415896427355, - 7596226314134098818, + 14652256560445338257, + 14335368261362934201, 18446744073709551615, 18446744073709551615, - 40, - 43, - 40, - 43, - 7, - 8, + 542, + 550, + 542, + 550, + 101, + 102, true, - "1.0", - "1.0" + "entities", + "entities" ], [ - "numval", - "ival", - 5698421097735371040, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/180", + "#/texts/37", 1.0, - 15441160910541481983, - 11293846485728944316, + 14652256560445338257, + 14335368261362935731, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 567, + 575, + 567, + 575, + 106, + 107, true, - "11", - "11" + "entities", + "entities" ], [ - "reference", - "author", - 5870535063942256428, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/181", + "#/texts/37", 1.0, - 15754713894443025139, - 17869835566751337591, + 8279380567349713241, + 12534042586919828830, 18446744073709551615, 18446744073709551615, - 4, - 15, - 4, - 15, - 2, - 4, + 581, + 594, + 581, + 594, + 109, + 110, true, - "TigerGraph.", - "TigerGraph." + "relationships", + "relationships" ], [ - "reference", - "citation-number", - 5870535063942256428, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/181", + "#/texts/37", 1.0, - 15441160910541481976, - 12703724519968684238, + 4906245502857778203, + 10671415923670610924, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 608, + 629, + 608, + 629, + 114, + 115, true, - "12", - "12" + "has-material-property", + "has-material-property" ], [ - "reference", - "date", - 5870535063942256428, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/181", + "#/texts/37", 1.0, - 389609625548777054, - 3194806985827377522, + 8279380567349713241, + 12534042586919764573, 18446744073709551615, 18446744073709551615, - 47, - 51, - 47, - 51, - 11, - 12, + 666, + 679, + 666, + 679, + 124, + 125, true, - "2018", - "2018" + "relationships", + "relationships" ], [ - "reference", - "title", - 5870535063942256428, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/181", + "#/texts/37", 1.0, - 17475892521501552303, - 8529795867214537154, + 15441160910541480204, + 6094061681036184736, 18446744073709551615, 18446744073709551615, - 16, - 45, - 16, - 45, - 4, - 10, + 687, + 689, + 687, + 689, + 127, + 128, true, - "Real-Time Deep Link Analytics", - "Real-Time Deep Link Analytics" + "KG", + "KG" ], [ - "reference", - "author", - 18196767266655606709, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/182", + "#/texts/37", 1.0, - 14652280730090715542, - 9368048166047908224, + 8106397496085150773, + 13219162774327540266, 18446744073709551615, 18446744073709551615, - 4, - 12, - 4, - 12, - 2, - 4, + 711, + 718, + 711, + 718, + 133, + 134, true, - "Jeremy K", - "Jeremy K" + "example", + "example" ], [ - "reference", - "author", - 18196767266655606709, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/182", + "#/texts/37", 1.0, - 8106396242733918714, - 2646308426186848374, + 16946081241153289166, + 9864466924854841899, 18446744073709551615, 18446744073709551615, - 14, - 21, - 14, - 21, - 5, - 8, + 737, + 752, + 737, + 752, + 138, + 139, true, - "John G.", - "John G." + "trustworthiness", + "trustworthiness" ], [ - "reference", - "citation-number", - 18196767266655606709, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/182", + "#/texts/37", 1.0, - 15441160910541481977, - 12462842527617278799, + 389609625697921894, + 4210996759022448259, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 758, + 762, + 758, + 762, + 141, + 142, true, - "13", - "13" + "fact", + "fact" ], [ - "reference", - "date", - 18196767266655606709, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/182", + "#/texts/37", 1.0, - 16381206542172555296, - 17521384641614480308, + 11304142420310002900, + 15341405723120219151, 18446744073709551615, 18446744073709551615, - 138, - 144, - 138, + 772, + 784, + 772, + 784, 144, - 27, - 29, + 145, true, - "; 2011", - "; 2011" + "relationship", + "relationship" ], [ - "reference", - "journal", - 18196767266655606709, + "numval", + "ival", + 3276490574487379366, "TEXT", - "#/texts/182", + "#/texts/38", 1.0, - 1813266722082342225, - 593931840598100395, + 17767354399704235161, + 7431448323123128102, 18446744073709551615, 18446744073709551615, - 74, - 86, - 74, - 86, - 17, - 18, + 390, + 391, + 390, + 391, + 56, + 57, true, - "Philadelphia", - "Philadelphia" + "1", + "1" ], [ - "reference", - "publisher", - 18196767266655606709, + "parenthesis", + "round brackets", + 3276490574487379366, "TEXT", - "#/texts/182", + "#/texts/38", 1.0, - 12316905074950798954, - 2223929060632914124, + 15394970067510817687, + 15962838465604485796, 18446744073709551615, 18446744073709551615, - 92, - 138, - 92, - 138, - 21, - 27, + 502, + 544, + 502, + 544, + 75, + 86, true, - "Society for Industrial and Applied Mathematics", - "Society for Industrial and Applied Mathematics" + "(eg, title, abstract, references, authors)", + "(eg, title, abstract, references, authors)" ], [ - "reference", - "title", - 18196767266655606709, + "parenthesis", + "round brackets", + 3276490574487379366, "TEXT", - "#/texts/182", + "#/texts/38", 1.0, - 11539515714196318944, - 4409464707523225606, + 16942421772084851866, + 558193540038999230, 18446744073709551615, 18446744073709551615, - 22, - 72, - 22, - 72, - 8, - 16, + 606, + 625, + 606, + 625, + 97, + 102, true, - "Graph Algorithms in the Language of Linear Algebra", - "Graph Algorithms in the Language of Linear Algebra" + "(of various levels)", + "(of various levels)" ], [ - "reference", - "volume", - 18196767266655606709, + "parenthesis", + "round brackets", + 3276490574487379366, "TEXT", - "#/texts/182", + "#/texts/38", 1.0, - 12178341415896290392, - 14083523807676346774, + 9856539956643571945, + 16934970679196867236, 18446744073709551615, 18446744073709551615, - 88, - 91, - 88, - 91, - 19, - 21, + 653, + 694, + 653, + 694, + 108, + 116, true, - "PA:", - "PA:" + "(with internal row and column structures)", + "(with internal row and column structures)" ], [ - "reference", - "author", - 3623403683642367845, + "parenthesis", + "round brackets", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 3893756947393595038, - 15910484170600691612, + 329104053183779089, + 6177701307152006939, 18446744073709551615, 18446744073709551615, - 4, - 17, - 4, - 17, - 2, - 4, + 1006, + 1011, + 1006, + 1011, + 176, + 179, true, - "Kepner Jeremy", - "Kepner Jeremy" + "(DAG)", + "(DAG)" ], [ - "reference", - "author", - 3623403683642367845, + "parenthesis", + "round brackets", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 4638041857648041651, - 2139644705806385528, + 389609625544714259, + 3798349935221358481, 18446744073709551615, 18446744073709551615, - 19, - 30, - 19, - 30, - 5, - 7, - true, - "Bader David", - "Bader David" + 1052, + 1056, + 1052, + 1056, + 181, + 181, + false, + "(DF)", + "(DF)" ], [ - "reference", - "author", - 3623403683642367845, + "expression", + "word-concatenation", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 9621725435760800320, - 4639858687526125642, + 15378814484553003560, + 5153477104270258604, 18446744073709551615, 18446744073709551615, - 32, - 47, - 32, - 45, - 8, - 12, + 134, + 155, + 134, + 155, + 23, + 24, true, - "Bulu\u00e7 Ayd \u0131 n", - "Bulu\u00e7 Ayd \u0131 n" + "machine-interpretable", + "machine-interpretable" ], [ - "reference", - "author", - 3623403683642367845, + "expression", + "word-concatenation", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 978039607314331382, - 9008054255178396141, + 6182928126338688945, + 18137263452770401078, 18446744073709551615, 18446744073709551615, - 49, - 61, - 47, - 59, - 13, - 15, + 492, + 501, + 492, + 501, + 74, + 75, true, - "Gilbert John", - "Gilbert John" + "meta-data", + "meta-data" ], [ - "reference", - "author", - 3623403683642367845, + "expression", + "wtoken-concatenation", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 10968707392751490476, - 11627993516556341660, + 17425405063522726763, + 3711175375270954936, 18446744073709551615, 18446744073709551615, - 63, - 78, - 61, - 76, - 16, - 18, + 156, + 210, + 156, + 210, + 24, + 25, true, - "Mattson Timothy", - "Mattson Timothy" + "formatssuchasJSON,XML,orHTML.However,inthevastmajority", + "formatssuchasJSON,XML,orHTML.However,inthevastmajority" ], [ - "reference", - "author", - 3623403683642367845, + "expression", + "wtoken-concatenation", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 3010219124533777340, - 3552467627404320563, + 12178341415895464512, + 2400205165144462589, 18446744073709551615, 18446744073709551615, - 80, - 98, - 78, - 96, - 19, - 21, + 785, + 788, + 785, + 788, + 149, + 150, true, - "Meyerhenke Henning", - "Meyerhenke Henning" + "d,m", + "d,m" ], [ - "reference", - "citation-number", - 3623403683642367845, + "expression", + "wtoken-concatenation", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 15441160910541481978, - 9067685736347109846, + 17917268842395766052, + 17602957436319151954, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 801, + 868, + 801, + 868, + 156, + 157, true, - "14", - "14" + "processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely", + "processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely" ], [ - "reference", - "date", - 3623403683642367845, + "expression", + "wtoken-concatenation", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 389609625548777059, - 3330964369910711146, + 6406713117350634128, + 3855167623003760197, 18446744073709551615, 18446744073709551615, - 100, - 104, - 98, - 102, - 22, - 23, + 881, + 895, + 881, + 895, + 159, + 160, true, - "2015", - "2015" + "annotation,and", + "annotation,and" ], [ - "reference", - "date", - 3623403683642367845, + "expression", + "wtoken-concatenation", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 389609625548777059, - 3330964369910703397, + 11781935617291778240, + 7483730464872502560, 18446744073709551615, 18446744073709551615, - 240, - 244, - 238, - 242, - 61, - 62, + 1016, + 1111, + 1016, + 1111, + 181, + 182, true, - "2015", - "2015" + "willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss", + "willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss" ], [ - "reference", - "location", - 3623403683642367845, + "sentence", + "", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 9440834537675533739, - 6746478687441634720, + 5803220549886302367, + 15412963923438297980, 18446744073709551615, 18446744073709551615, - 107, - 143, - 105, - 141, - 25, - 33, + 0, + 79, + 0, + 79, + 0, + 15, true, - "Graphs, Matrices, and the GraphBLAS:", - "Graphs, Matrices, and the GraphBLAS:" + "In typical cases, we start from a collection of documents in different formats.", + "In typical cases, we start from a collection of documents in different formats." ], [ - "reference", - "pages", - 3623403683642367845, + "sentence", + "", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 6573068860818606718, - 4687668980596472570, + 5852349916619262917, + 2271532486054143502, 18446744073709551615, 18446744073709551615, - 195, - 204, - 193, - 202, - 43, - 44, + 80, + 309, + 80, + 309, + 15, + 41, true, - "2453-2462", - "2453-2462" + "Sometimes, documents are available in semistructured, machine-interpretable formatssuchasJSON,XML,orHTML.However,inthevastmajority of cases this does not apply, especially for proprietary documents of companies and organizations.", + "Sometimes, documents are available in semistructured, machine-interpretable formatssuchasJSON,XML,orHTML.However,inthevastmajority of cases this does not apply, especially for proprietary documents of companies and organizations." ], [ - "reference", - "pages", - 3623403683642367845, + "sentence", + "", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 16380805713218987920, - 9575999090851094928, + 18004703537270342816, + 17398565441782732462, 18446744073709551615, 18446744073709551615, - 245, - 251, - 243, - 249, - 63, - 64, + 310, + 374, + 310, + 374, + 41, + 52, true, - "05.353", - "05.353" + "The latter are very often scanned or programmatic PDF documents.", + "The latter are very often scanned or programmatic PDF documents." ], [ - "reference", - "publisher", - 3623403683642367845, + "sentence", + "", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 10585062274889693433, - 394824704429372117, + 18263128607664038326, + 17664748450447018207, 18446744073709551615, 18446744073709551615, - 144, - 162, - 142, - 160, - 33, - 36, + 375, + 458, + 375, + 458, + 52, + 68, true, - "Seven Good Reasons", - "Seven Good Reasons" + "Using the CCS, 1 these types of documents are converted into structured JSON files.", + "Using the CCS, 1 these types of documents are converted into structured JSON files." ], [ - "reference", - "publisher", - 3623403683642367845, + "sentence", + "", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 11311803343161413167, - 2833609951174621747, + 4430064159297423066, + 1342061296626575350, 18446744073709551615, 18446744073709551615, - 164, - 189, - 162, - 187, - 37, - 40, + 459, + 567, + 459, + 567, + 68, + 91, true, - "Procedia Computer Science", - "Procedia Computer Science" + "Those provide easy access to the meta-data (eg, title, abstract, references, authors) and the document body.", + "Those provide easy access to the meta-data (eg, title, abstract, references, authors) and the document body." ], [ - "reference", - "url", - 3623403683642367845, + "sentence", + "", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 16959048237954323084, - 10596594611762835857, + 1486767917779853449, + 5612445825939840029, 18446744073709551615, 18446744073709551615, - 206, - 239, - 204, - 237, - 45, - 60, + 568, + 725, + 568, + 725, + 91, + 123, true, - "http://dx.doi.org/10.1016/j.procs", - "http://dx.doi.org/10.1016/j.procs" + "The latter is structured by subtitles (of various levels), paragraphs, lists, tables (with internal row and column structures), figures, and linked captions.", + "The latter is structured by subtitles (of various levels), paragraphs, lists, tables (with internal row and column structures), figures, and linked captions." ], [ - "reference", - "volume", - 3623403683642367845, + "sentence", + "", + 3276490574487379366, "TEXT", - "#/texts/183", + "#/texts/38", 1.0, - 15441160910541486330, - 9067694506000682765, + 14033738409479099128, + 11129512477137196725, 18446744073709551615, 18446744073709551615, - 191, - 193, - 189, - 191, - 41, - 42, + 726, + 908, + 726, + 908, + 123, + 162, true, - "51", - "51" + "O n c et h ec o r p u si sp r e s n ti nas t r u c t u r e d,m a c h i n e processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely extraction, annotation,and aggregation.", + "O n c et h ec o r p u si sp r e s n ti nas t r u c t u r e d,m a c h i n e processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely extraction, annotation,and aggregation." ], [ - "reference", - "author", - 13936866850854297069, + "sentence", + "", + 3276490574487379366, "TEXT", - "#/texts/184", + "#/texts/38", 1.0, - 8106396252822508385, - 7971302054101082514, + 12172348619957438677, + 66135446863509608, 18446744073709551615, 18446744073709551615, - 4, - 11, - 4, - 11, - 2, - 4, + 909, + 1012, + 909, + 1012, + 162, + 180, true, - "Aydin B", - "Aydin B" + "The inherent dependencies between these three tasks are defined through a directed acyclic graph (DAG).", + "The inherent dependencies between these three tasks are defined through a directed acyclic graph (DAG)." ], [ - "reference", - "author", - 13936866850854297069, + "sentence", + "", + 3276490574487379366, "TEXT", - "#/texts/184", + "#/texts/38", 1.0, - 3367556578117774584, - 5704823584998723957, + 12651939560234325286, + 7750091780475029203, 18446744073709551615, 18446744073709551615, - 13, - 28, - 13, - 28, - 5, - 9, + 1013, + 1141, + 1013, + 1141, + 180, + 189, true, - "Gilbert John R.", - "Gilbert John R." + "We willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss the details for each DF task.", + "We willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss the details for each DF task." ], [ - "reference", - "citation-number", - 13936866850854297069, + "term", + "enum-term-mark-2", + 3276490574487379366, "TEXT", - "#/texts/184", + "#/texts/38", 1.0, - 15441160910541481979, - 10213682970367471311, + 9624499087057569438, + 1557688156493519288, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 668, + 682, + 668, + 682, + 111, + 114, true, - "15", - "15" + "row and column", + "row and column" ], [ - "reference", - "date", - 13936866850854297069, + "term", + "enum-term-mark-3", + 3276490574487379366, "TEXT", - "#/texts/184", + "#/texts/38", 1.0, - 329104147695662665, - 13454856964816440075, + 12289843658171515644, + 3893311679547523383, 18446744073709551615, 18446744073709551615, - 127, - 132, - 127, - 132, - 27, - 29, + 281, + 308, + 281, + 308, + 37, + 40, true, - "2011;", - "2011;" + "companies and organizations", + "companies and organizations" ], [ - "reference", - "journal", - 13936866850854297069, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/184", + "#/texts/38", 1.0, - 15067288891537767501, - 3357793480659482128, + 471372152363700254, + 3033927950885562857, 18446744073709551615, 18446744073709551615, - 95, - 125, - 95, - 125, - 20, - 26, + 3, + 16, + 3, + 16, + 1, + 3, true, - "Int J High Perform Comput Appl", - "Int J High Perform Comput Appl" + "typical cases", + "typical cases" ], [ - "reference", - "title", - 13936866850854297069, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/184", + "#/texts/38", 1.0, - 6150328359964540652, - 10199114762007747151, + 600373524240865062, + 2399966783289403744, 18446744073709551615, 18446744073709551615, - 29, - 93, - 29, - 93, - 9, - 19, + 61, + 78, + 61, + 78, + 12, + 14, true, - "The combinatorial BLAS: design, implementation, and applications", - "The combinatorial BLAS: design, implementation, and applications" + "different formats", + "different formats" ], [ - "reference", - "volume", - 13936866850854297069, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/184", + "#/texts/38", 1.0, - 15441160910541481789, - 10213682972046866336, + 3457719533794546100, + 11639044621187327287, 18446744073709551615, 18446744073709551615, - 132, 134, - 132, + 210, 134, - 29, - 30, + 210, + 23, + 25, true, - "25", - "25" + "machine-interpretable __default__", + "machine-interpretable formatssuchasJSON,XML,orHTML.However,inthevastmajority" ], [ - "reference", - "volume", - 13936866850854297069, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/184", + "#/texts/38", 1.0, - 10114972591203837083, - 15828968969447743865, + 2100740202135991641, + 5436840372240293767, 18446744073709551615, 18446744073709551615, - 136, - 146, - 136, - 146, - 31, - 35, + 256, + 277, + 256, + 277, + 34, + 36, true, - "4):496-509", - "4):496-509" + "proprietary documents", + "proprietary documents" ], [ - "reference", - "author", - 8497015665124263236, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/185", + "#/texts/38", 1.0, - 14652280730090715542, - 12791881049692147803, + 1482873404926828774, + 1056012052691956269, 18446744073709551615, 18446744073709551615, - 4, - 12, - 4, - 12, - 2, - 4, + 347, + 373, + 347, + 373, + 48, + 51, true, - "Jeremy K", - "Jeremy K" + "programmatic PDF documents", + "programmatic PDF documents" ], [ - "reference", - "author", - 8497015665124263236, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/185", + "#/texts/38", 1.0, - 8106352035144611671, - 4513564816050590788, + 7691191751134078884, + 8559094095047305408, 18446744073709551615, 18446744073709551615, - 14, - 21, - 14, - 21, - 5, - 7, + 436, + 457, + 436, + 457, + 64, + 67, true, - "Peter A", - "Peter A" + "structured JSON files", + "structured JSON files" ], [ - "reference", - "author", - 8497015665124263236, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/185", + "#/texts/38", 1.0, - 11373457542276896833, - 10633744312666392907, + 2015984486169108115, + 14466633496919987426, 18446744073709551615, 18446744073709551615, - 23, - 36, - 23, - 36, - 8, - 11, + 473, + 484, + 473, + 484, + 70, + 72, true, - "Bader David A", - "Bader David A" + "easy access", + "easy access" ], [ - "reference", - "citation-number", - 8497015665124263236, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/185", + "#/texts/38", 1.0, - 15441160910541481860, - 13099555958800192769, + 15559940474156832047, + 9774609109928111702, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 553, + 566, + 553, + 566, + 88, + 90, true, - "16", - "16" + "document body", + "document body" ], [ - "reference", - "container-title", - 8497015665124263236, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/185", + "#/texts/38", 1.0, - 10709633855219206820, - 961925091352749103, + 7559223825341723371, + 4627385075759212737, 18446744073709551615, 18446744073709551615, - 88, - 102, - 88, - 102, - 21, - 24, + 610, + 624, + 610, + 624, + 99, + 101, true, - "2016 IEEE HPEC", - "2016 IEEE HPEC" + "various levels", + "various levels" ], [ - "reference", - "date", - 8497015665124263236, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/185", + "#/texts/38", 1.0, - 6573474049096193902, - 2260581871937703980, + 724989623191821162, + 348519433085549433, 18446744073709551615, 18446744073709551615, - 104, - 113, - 104, - 113, - 25, - 28, + 659, + 671, + 659, + 671, + 110, + 112, true, - "2016; 1-9", - "2016; 1-9" + "internal row", + "internal row" ], [ - "reference", - "title", - 8497015665124263236, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/185", + "#/texts/38", 1.0, - 16641826418709048621, - 2282440200854755549, + 298685439673823473, + 14473962335563688005, 18446744073709551615, 18446744073709551615, - 45, - 86, - 45, - 86, - 15, - 20, + 676, + 693, + 676, + 693, + 113, + 115, true, - "Mathematical foundations of the GraphBLAS", - "Mathematical foundations of the GraphBLAS" + "column structures", + "column structures" ], [ - "reference", - "author", - 15947529491299956047, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/186", + "#/texts/38", 1.0, - 14650296444613217893, - 2015187192231796797, + 12178341415895653569, + 2400012968392811663, 18446744073709551615, 18446744073709551615, - 4, - 12, - 4, - 12, - 2, - 4, + 742, + 745, + 742, + 745, + 130, + 132, true, - "Ariful A", - "Ariful A" + "r p", + "r p" ], [ - "reference", - "author", - 15947529491299956047, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/186", + "#/texts/38", 1.0, - 6611311853662317003, - 219996680584521934, + 329104161784480753, + 7493450073534230471, 18446744073709551615, 18446744073709551615, - 14, - 23, - 14, - 23, - 5, - 7, + 754, + 759, + 754, + 759, + 135, + 138, true, - "Mathias J", - "Mathias J" + "r e s", + "r e s" ], [ - "reference", - "author", - 15947529491299956047, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/186", + "#/texts/38", 1.0, - 8106396252822508385, - 5214697480984905265, + 16381206513088282843, + 17468687230071587166, 18446744073709551615, 18446744073709551615, - 25, - 32, - 25, - 32, - 8, - 10, + 762, + 768, + 762, + 768, + 139, + 141, true, - "Aydin B", - "Aydin B" + "ti nas", + "ti nas" ], [ - "reference", - "author", - 15947529491299956047, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/186", + "#/texts/38", 1.0, - 1138450846564361539, - 13516232875802125645, + 8106475359937212610, + 17526369764917632585, 18446744073709551615, 18446744073709551615, - 34, - 46, - 34, - 46, - 11, - 15, + 771, + 778, + 771, + 778, + 142, + 146, true, - "Ng Esmond G.", - "Ng Esmond G." + "r u c t", + "r u c t" ], [ - "reference", - "citation-number", - 15947529491299956047, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/186", + "#/texts/38", 1.0, - 15441160910541481861, - 5749903657566610070, + 12178341415895653580, + 2400012973399203890, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 781, + 784, + 781, + 784, + 147, + 149, true, - "17", - "17" + "r e", + "r e" ], [ - "reference", - "container-title", - 15947529491299956047, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/186", + "#/texts/38", 1.0, - 10701056912570859123, - 6872071652706022831, + 12178341415895584463, + 2400183328813825594, 18446744073709551615, 18446744073709551615, - 106, - 175, - 106, - 175, - 26, - 34, + 791, + 794, + 791, + 794, + 151, + 153, true, - "2017 IEEE International Parallel and Distributed Processing Symposium", - "2017 IEEE International Parallel and Distributed Processing Symposium" + "c h", + "c h" ], [ - "reference", - "container-title", - 15947529491299956047, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/186", + "#/texts/38", 1.0, - 329104161866629985, - 4498077561104002021, + 15397570689800643360, + 728258568208254432, 18446744073709551615, 18446744073709551615, - 177, - 182, - 177, - 182, - 35, - 36, + 913, + 934, + 913, + 934, + 163, + 165, true, - "IPDPS", - "IPDPS" + "inherent dependencies", + "inherent dependencies" ], [ - "reference", - "date", - 15947529491299956047, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/186", + "#/texts/38", 1.0, - 7366731910384143591, - 4074534479596534226, + 3407118261441380378, + 14310748118874608151, 18446744073709551615, 18446744073709551615, - 185, - 196, - 185, - 196, - 38, - 41, + 983, + 1005, + 983, + 1005, + 173, + 176, true, - "2017: 22-31", - "2017: 22-31" + "directed acyclic graph", + "directed acyclic graph" ], [ - "reference", - "title", - 15947529491299956047, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/186", + "#/texts/38", 1.0, - 18143113072209505450, - 5317689214231344382, + 8106396823466748730, + 7340738159572681824, 18446744073709551615, 18446744073709551615, - 47, - 104, - 47, - 104, - 15, - 25, + 1133, + 1140, + 1133, + 1140, + 186, + 188, true, - "The reverse Cuthill-McKee algorithm in distributed-memory", - "The reverse Cuthill-McKee algorithm in distributed-memory" + "DF task", + "DF task" ], [ - "reference", - "author", - 14843401725435831033, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/187", + "#/texts/38", 1.0, - 9277063416399937233, - 9921862040524615824, + 2702984786539193186, + 6013382759463234661, 18446744073709551615, 18446744073709551615, - 4, - 14, - 4, - 14, - 2, - 4, + 34, + 44, + 34, + 44, + 8, + 9, true, - "Rukhsana S", - "Rukhsana S" + "collection", + "collection" ], [ - "reference", - "author", - 14843401725435831033, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/187", + "#/texts/38", 1.0, - 8106479273814684994, - 12770854321018137055, + 6167933651658664291, + 2995705359694128803, 18446744073709551615, 18446744073709551615, - 16, - 23, - 16, - 23, - 5, - 7, + 48, + 57, + 48, + 57, + 10, + 11, true, - "Anila U", - "Anila U" + "documents", + "documents" ], [ - "reference", - "author", - 14843401725435831033, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/187", + "#/texts/38", 1.0, - 16985962715048067011, - 772749724699858811, + 6167933651658664291, + 2995705359694038064, 18446744073709551615, 18446744073709551615, - 25, - 37, - 25, - 37, - 8, - 11, + 91, + 100, + 91, + 100, + 17, + 18, true, - "Chughtai IR.", - "Chughtai IR." + "documents", + "documents" ], [ - "reference", - "citation-number", - 14843401725435831033, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/187", + "#/texts/38", 1.0, - 15441160910541481862, - 17618650105274567067, + 329104161511786824, + 7360980031546797901, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 214, + 219, + 214, + 219, + 26, + 27, true, - "18", - "18" + "cases", + "cases" ], [ - "reference", - "date", - 14843401725435831033, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/187", + "#/texts/38", 1.0, - 389609625548757410, - 18165604049296771030, + 5947879506556567994, + 12367501146346618724, 18446744073709551615, 18446744073709551615, - 88, - 92, - 88, - 92, - 19, - 20, + 281, + 290, + 281, + 290, + 37, + 38, true, - "2005", - "2005" + "companies", + "companies" ], [ - "reference", - "date", - 14843401725435831033, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/187", + "#/texts/38", 1.0, - 389609625548757410, - 18165604049296772353, + 15694895771625575831, + 5124557589675872289, 18446744073709551615, 18446744073709551615, - 133, - 137, - 133, - 137, - 25, - 26, + 295, + 308, + 295, + 308, + 39, + 40, true, - "2005", - "2005" + "organizations", + "organizations" ], [ - "reference", - "pages", - 14843401725435831033, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/187", + "#/texts/38", 1.0, - 12178341415896427411, - 9464229838695116121, + 12178341415896221596, + 2400025488731380794, 18446744073709551615, 18446744073709551615, - 138, - 141, - 138, - 141, - 26, - 27, + 385, + 388, + 385, + 388, + 54, + 55, true, - "1-7", - "1-7" + "CCS", + "CCS" ], [ - "reference", - "title", - 14843401725435831033, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/187", + "#/texts/38", 1.0, - 12931819230736677229, - 14856363282836835505, + 329104159243796903, + 8933277972599288721, 18446744073709551615, 18446744073709551615, - 38, - 86, - 38, - 86, - 11, - 18, + 398, + 403, + 398, + 403, + 58, + 59, true, - "Review of storage techniques for sparse matrices", - "Review of storage techniques for sparse matrices" + "types", + "types" ], [ - "reference", - "title", - 14843401725435831033, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/187", + "#/texts/38", 1.0, - 1320248361117940781, - 5199561905441189481, + 6167933651658664291, + 2995705359694164349, 18446744073709551615, 18446744073709551615, - 93, - 131, - 93, - 131, - 20, - 24, + 407, + 416, + 407, + 416, + 60, + 61, true, - "Pakistan Section Multitopic Conference", - "Pakistan Section Multitopic Conference" + "documents", + "documents" ], [ - "reference", - "author", - 16676439669743530711, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/188", + "#/texts/38", 1.0, - 14638563242508500832, - 2752940376292253295, + 6182928126338688945, + 18137263452770401078, 18446744073709551615, 18446744073709551615, - 4, - 12, - 4, - 12, - 2, - 4, + 492, + 501, + 492, + 501, + 74, + 75, true, - "Welte DH", - "Welte DH" + "meta-data", + "meta-data" ], [ - "reference", - "author", - 16676439669743530711, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/188", + "#/texts/38", 1.0, - 1317380608127935415, - 8792991722627090893, + 15441160910541487324, + 7422793804087321065, 18446744073709551615, 18446744073709551615, - 14, - 25, - 14, - 25, - 5, - 7, + 503, + 505, + 503, + 505, + 76, + 77, true, - "Horsfield B", - "Horsfield B" + "eg", + "eg" ], [ - "reference", - "author", - 16676439669743530711, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/188", + "#/texts/38", 1.0, - 4172892994592792372, - 2160694788416159558, + 329104159220026466, + 8927506052944868849, 18446744073709551615, 18446744073709551615, - 27, - 46, - 27, - 46, - 8, - 12, + 507, + 512, + 507, + 512, + 78, + 79, true, - "Baker DR. Petroleum", - "Baker DR. Petroleum" + "title", + "title" ], [ - "reference", - "author", - 16676439669743530711, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/188", + "#/texts/38", 1.0, - 5561358046097680519, - 15395766198352277458, + 15984565858548749625, + 16963224979092543907, 18446744073709551615, 18446744073709551615, - 51, - 67, - 51, - 67, - 13, - 16, + 524, + 534, + 524, + 534, + 82, + 83, true, - "Basin Evolution:", - "Basin Evolution:" + "references", + "references" ], [ - "reference", - "citation-number", - 16676439669743530711, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/188", + "#/texts/38", 1.0, - 15441160910541481863, - 8099163979199984832, + 8106397759446161562, + 10642848847630119927, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 536, + 543, + 536, + 543, + 84, + 85, true, - "19", - "19" + "authors", + "authors" ], [ - "reference", - "date", - 16676439669743530711, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/188", + "#/texts/38", 1.0, - 16381206542172924133, - 9981189962990674937, + 16381206590630461421, + 7979888348543905422, 18446744073709551615, 18446744073709551615, - 169, - 175, - 169, - 175, - 33, - 35, + 572, + 578, + 572, + 578, + 92, + 93, true, - "; 1997", - "; 1997" + "latter", + "latter" ], [ - "reference", - "journal", - 16676439669743530711, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/188", + "#/texts/38", 1.0, - 2422127895824933260, - 7556925222758925531, + 6165459422936662913, + 156978013262325260, 18446744073709551615, 18446744073709551615, - 106, - 133, - 106, - 133, - 21, - 26, + 596, + 605, + 596, + 605, + 96, + 97, true, - "Geology, and Basin Modeling", - "Geology, and Basin Modeling" + "subtitles", + "subtitles" ], [ - "reference", - "location", - 16676439669743530711, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/188", + "#/texts/38", 1.0, - 11741555610443867475, - 15927342063432766432, + 13968965538538956038, + 3970349775283182601, 18446744073709551615, 18446744073709551615, - 135, - 153, - 135, - 153, - 27, - 30, + 627, + 637, + 627, + 637, + 103, + 104, true, - "Berlin Heidelberg:", - "Berlin Heidelberg:" + "paragraphs", + "paragraphs" ], [ - "reference", - "publisher", - 16676439669743530711, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/188", + "#/texts/38", 1.0, - 3197612152806046883, - 2512966040017790311, + 329104161597245179, + 7405248900434860581, 18446744073709551615, 18446744073709551615, - 154, - 169, - 154, - 169, - 30, - 33, + 639, + 644, + 639, + 644, + 105, + 106, true, - "Springer-Verlag", - "Springer-Verlag" + "lists", + "lists" ], [ - "reference", - "title", - 16676439669743530711, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/188", + "#/texts/38", 1.0, - 1197865287651023688, - 134234943361095181, + 16381206513098478539, + 17467059885496981342, 18446744073709551615, 18446744073709551615, - 68, - 104, - 68, - 104, - 16, - 20, + 646, + 652, + 646, + 652, + 107, + 108, true, - "Insights from Petroleum Geochemistry", - "Insights from Petroleum Geochemistry" + "tables", + "tables" ], [ - "reference", - "author", - 2986547206451163051, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/189", + "#/texts/38", 1.0, - 8106351306870445011, - 7231860053894851093, + 8106397480533647371, + 7988334766998510018, 18446744073709551615, 18446744073709551615, - 37, - 44, - 37, - 44, - 9, - 11, + 696, + 703, + 696, + 703, + 117, + 118, true, - "Dolfi M", - "Dolfi M" + "figures", + "figures" ], [ - "reference", - "author", - 2986547206451163051, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/189", + "#/texts/38", 1.0, - 8106479197488776816, - 6022123083747398357, + 14652289689770638970, + 15793206492240518081, 18446744073709551615, 18446744073709551615, - 46, - 53, - 46, - 53, - 12, - 15, + 716, + 724, + 716, + 724, + 121, + 122, true, - "Auer C.", - "Auer C." + "captions", + "captions" ], [ - "reference", - "date", - 2986547206451163051, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/189", + "#/texts/38", 1.0, - 12668563530344603848, - 14820206483220239473, + 5303544497514782120, + 15576269407827001608, 18446744073709551615, 18446744073709551615, - 173, - 183, - 173, - 183, - 35, - 41, + 869, + 879, + 869, + 879, + 157, + 158, true, - "2020;1:e20", - "2020;1:e20" + "extraction", + "extraction" ], [ - "reference", - "journal", - 2986547206451163051, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/189", + "#/texts/38", 1.0, - 10525943314116263182, - 11312474291607917611, + 844664518895955636, + 15595830566546024366, 18446744073709551615, 18446744073709551615, - 153, - 171, - 153, - 171, - 31, - 34, + 896, + 907, + 896, + 907, + 160, + 161, true, - "Applied AI Letters", - "Applied AI Letters" + "aggregation", + "aggregation" ], [ - "reference", - "title", - 2986547206451163051, + "term", + "single-term", + 3276490574487379366, "TEXT", - "#/texts/189", + "#/texts/38", 1.0, - 912378836411683307, - 17710224191321636054, + 329104159214088329, + 8900803654811599827, + 18446744073709551615, + 18446744073709551615, + 955, + 960, + 955, + 960, + 168, + 169, + true, + "tasks", + "tasks" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 12178341415896112046, + 2400051137088161641, + 18446744073709551615, + 18446744073709551615, + 1007, + 1010, + 1007, + 1010, + 177, + 178, + true, + "DAG", + "DAG" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 8106396517320706028, + 767766212633331407, + 18446744073709551615, + 18446744073709551615, + 1116, + 1123, + 1116, + 1123, + 183, + 184, + true, + "details", + "details" + ], + [ + "numval", + "fval", + 3367451956962330174, + "TEXT", + "#/texts/39", + 1.0, + 12178341415896439119, + 1493266672212178244, 18446744073709551615, 18446744073709551615, 0, - 35, + 3, + 0, + 3, 0, + 1, + true, + "2.1", + "2.1" + ], + [ + "numval", + "ival", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 17767354399704235161, + 13327421909992595494, + 18446744073709551615, + 18446744073709551615, + 10, + 11, + 10, + 11, + 2, + 3, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 17767354399704235161, + 13327421909992715764, + 18446744073709551615, + 18446744073709551615, + 176, + 177, + 176, + 177, + 35, + 35, + false, + "1", + "1" + ], + [ + "expression", + "wtoken-concatenation", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 17090092756343575965, + 11744760582045783073, + 18446744073709551615, + 18446744073709551615, + 170, + 218, + 170, + 218, 35, + 36, + true, + "Figure1toillustratethepurposeandimplementationof", + "Figure1toillustratethepurposeandimplementationof" + ], + [ + "sentence", + "", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 18361587473539395888, + 13758431587446230571, + 18446744073709551615, + 18446744073709551615, 0, - 8, + 157, + 0, + 157, + 0, + 32, true, - "How to cite this article: Staar PWJ", - "How to cite this article: Staar PWJ" + "In Figure 1, we sketch a minimal DF, in which each of the three tasks is used consecutively in order to generate entities and relationships for a generic KG.", + "In Figure 1, we sketch a minimal DF, in which each of the three tasks is used consecutively in order to generate entities and relationships for a generic KG." ], [ - "reference", - "title", - 2986547206451163051, + "sentence", + "", + 5509744459704235873, "TEXT", - "#/texts/189", + "#/texts/40", 1.0, - 4375081646508065875, - 5872894694925809811, + 14266973449968823144, + 5033210687475997626, 18446744073709551615, 18446744073709551615, - 54, - 151, - 54, - 151, - 15, - 30, + 158, + 232, + 158, + 232, + 32, + 40, true, - "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", - "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora" + "We will use Figure1toillustratethepurposeandimplementationof each DF task.", + "We will use Figure1toillustratethepurposeandimplementationof each DF task." ], [ - "reference", - "url", - 2986547206451163051, + "term", + "enum-term-mark-3", + 5509744459704235873, "TEXT", - "#/texts/189", + "#/texts/40", 1.0, - 751450063096904044, - 2161551171101074414, + 13335488353876392384, + 2655829317417497504, 18446744073709551615, 18446744073709551615, - 185, - 216, - 185, - 216, - 42, - 54, + 113, + 139, + 113, + 139, + 24, + 27, true, - "https://doi.org/10.1002/ail2.20", - "https://doi.org/10.1002/ail2.20" + "entities and relationships", + "entities and relationships" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 14108247744263052140, + 4614352863788930341, + 18446744073709551615, + 18446744073709551615, + 25, + 35, + 25, + 35, + 7, + 9, + true, + "minimal DF", + "minimal DF" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 2511167939540360071, + 5009883108334261823, + 18446744073709551615, + 18446744073709551615, + 146, + 156, + 146, + 156, + 29, + 31, + true, + "generic KG", + "generic KG" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 8106396823466748730, + 7802822363042755988, + 18446744073709551615, + 18446744073709551615, + 224, + 231, + 224, + 231, + 37, + 39, + true, + "DF task", + "DF task" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 16381206514091025767, + 10250804564386933303, + 18446744073709551615, + 18446744073709551615, + 3, + 9, + 3, + 9, + 1, + 2, + true, + "Figure", + "Figure" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 329104159214088329, + 13912131276346951656, + 18446744073709551615, + 18446744073709551615, + 64, + 69, + 64, + 69, + 16, + 17, + true, + "tasks", + "tasks" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 329104161571401725, + 14203603284583761528, + 18446744073709551615, + 18446744073709551615, + 95, + 100, + 95, + 100, + 21, + 22, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 14652256560445338257, + 3592483441909519898, + 18446744073709551615, + 18446744073709551615, + 113, + 121, + 113, + 121, + 24, + 25, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 8279380567349713241, + 9458076378316233083, + 18446744073709551615, + 18446744073709551615, + 126, + 139, + 126, + 139, + 26, + 27, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 17090092756343575965, + 11744760582045783073, + 18446744073709551615, + 18446744073709551615, + 170, + 218, + 170, + 218, + 35, + 36, + true, + "Figure1toillustratethepurposeandimplementationof", + "Figure1toillustratethepurposeandimplementationof" ], [ "numval", "year", 18391264192891079539, "TEXT", - "#/texts/190", + "#/texts/41", 1.0, 389609625548777262, 8826555294676663632, @@ -12388,7 +12879,7 @@ "year", 18391264192891079539, "TEXT", - "#/texts/190", + "#/texts/41", 1.0, 389609625548777251, 8826555296349648778, @@ -12398,9 +12889,9 @@ 123, 119, 123, - 34, - 35, - true, + 14, + 14, + false, "2023", "2023" ], @@ -12409,7 +12900,7 @@ "fval", 18391264192891079539, "TEXT", - "#/texts/190", + "#/texts/41", 1.0, 8104408072666212335, 13552219042525319352, @@ -12419,9 +12910,9 @@ 78, 71, 78, - 20, - 21, - true, + 8, + 8, + false, "10.1002", "10.1002" ], @@ -12430,7 +12921,7 @@ "fval", 18391264192891079539, "TEXT", - "#/texts/190", + "#/texts/41", 1.0, 389609625548868096, 8826558551385119058, @@ -12440,9 +12931,9 @@ 86, 82, 86, - 23, - 24, - true, + 8, + 9, + false, "2.20", "2.20" ], @@ -12451,7 +12942,7 @@ "ival", 18391264192891079539, "TEXT", - "#/texts/190", + "#/texts/41", 1.0, 14654386914267794441, 12796143052106760105, @@ -12472,7 +12963,7 @@ "ival", 18391264192891079539, "TEXT", - "#/texts/190", + "#/texts/41", 1.0, 17767354399704235162, 7753390158484899261, @@ -12493,7 +12984,7 @@ "ival", 18391264192891079539, "TEXT", - "#/texts/190", + "#/texts/41", 1.0, 15441160910541481791, 3518619573290839093, @@ -12503,9 +12994,9 @@ 115, 113, 115, - 30, - 31, - true, + 14, + 14, + false, "23", "23" ], @@ -12514,7 +13005,7 @@ "ival", 18391264192891079539, "TEXT", - "#/texts/190", + "#/texts/41", 1.0, 15441160910541481543, 3518617976696906498, @@ -12524,9 +13015,9 @@ 118, 116, 118, - 32, - 33, - true, + 14, + 14, + false, "08", "08" ], @@ -12535,7 +13026,7 @@ "url", 18391264192891079539, "TEXT", - "#/texts/190", + "#/texts/41", 1.0, 8536069645534292969, 16063604623463467342, @@ -12546,7 +13037,7 @@ 35, 87, 8, - 25, + 10, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -12556,7 +13047,7 @@ "url", 18391264192891079539, "TEXT", - "#/texts/190", + "#/texts/41", 1.0, 594099663775968682, 14698211805947073928, @@ -12566,8 +13057,8 @@ 208, 156, 208, - 43, - 58, + 22, + 37, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -12577,7 +13068,7 @@ "doi", 18391264192891079539, "TEXT", - "#/texts/190", + "#/texts/41", 1.0, 1697220653346092555, 8458710314769009562, @@ -12587,2280 +13078,62115 @@ 87, 67, 87, - 18, - 25, - true, + 8, + 10, + false, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," ], [ - "numval", - "ival", - 12469893451248582632, - "TABLE", - "#/tables/0", + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/41", 1.0, - 15441160910541482672, - 3558959168916500461, - 0, - 2, - 3, - 5, - 3, - 5, - 1, - 3, + 12213187056216195918, + 14309760985361468471, + 18446744073709551615, + 18446744073709551615, + 155, + 209, + 155, + 209, + 21, + 38, true, - "-1", - "-1" + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" ], [ - "numval", - "ival", - 12469893451248582632, - "TABLE", - "#/tables/0", + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/41", 1.0, - 15441160910541482673, - 3558959168967845780, - 0, - 3, - 3, - 5, - 3, - 5, - 1, - 3, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, true, - "-2", - "-2" + "[23/08/2023]", + "[23/08/2023]" ], [ - "numval", - "ival", - 12469893451248582632, - "TABLE", - "#/tables/0", + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/41", 1.0, - 15441160910541482674, - 3558959169084991311, - 0, - 4, - 3, - 5, - 3, - 5, - 1, - 3, + 3856967589249015473, + 3576147774941915841, + 18446744073709551615, + 18446744073709551615, + 35, + 86, + 35, + 86, + 8, + 9, true, - "-3", - "-3" + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" ], [ - "numval", - "ival", - 12469893451248582632, - "TABLE", - "#/tables/0", + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/41", 1.0, - 15441160910541482676, - 3558959170275494348, - 0, - 5, - 3, - 5, - 3, - 5, - 1, - 3, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, true, - "-5", - "-5" + "[23/08/2023]", + "[23/08/2023]" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "sentence", + "", + 18391264192891079539, + "TEXT", + "#/texts/41", 1.0, - 389609625535995426, - 7990768689708475978, - 1, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 10933383461306782608, + 10178418358179275356, + 18446744073709551615, + 18446744073709551615, + 19, + 125, + 19, + 125, + 6, + 16, true, - "0.82", - "0.82" + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", 1.0, - 389609625535995621, - 7990774618103388257, - 1, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 12466457873768409517, + 3430070082404029638, + 18446744073709551615, + 18446744073709551615, + 88, + 108, + 88, + 108, + 10, + 13, true, - "0.96", - "0.96" + "Wiley Online Library", + "Wiley Online Library" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", 1.0, - 389609625535995627, - 7990774615713296517, - 1, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 12466457873768409517, + 3430070082403846184, + 18446744073709551615, + 18446744073709551615, + 213, + 233, + 213, + 233, + 39, + 42, true, - "0.98", - "0.98" + "Wiley Online Library", + "Wiley Online Library" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", 1.0, - 389609625536250803, - 7990774066976884381, - 1, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 10086796047802705645, + 11637015082128438412, + 18446744073709551615, + 18446744073709551615, + 252, + 263, + 252, + 263, + 47, + 49, true, - "1.00", - "1.00" + "OA articles", + "OA articles" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", 1.0, - 389609625535995622, - 7990774618160743993, - 2, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 6687370681685741393, + 17939310132506951168, + 18446744073709551615, + 18446744073709551615, + 284, + 319, + 284, + 319, + 53, + 57, true, - "0.93", - "0.93" + "applicable Creative Commons License", + "applicable Creative Commons License" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", 1.0, - 389609625535995627, - 7990774615712524481, - 2, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 329104161846385964, + 16017248647642597247, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 18, + 19, true, - "0.98", - "0.98" + "Terms", + "Terms" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", 1.0, - 389609625536250803, - 7990774066976098009, - 2, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 969969168017005656, + 2961182532179915323, + 18446744073709551615, + 18446744073709551615, + 144, + 154, + 144, + 154, + 20, + 21, true, - "1.00", - "1.00" + "Conditions", + "Conditions" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", 1.0, - 389609625536250803, - 7990774066976110280, - 2, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 329104161825278214, + 16021621362593374209, + 18446744073709551615, + 18446744073709551615, + 238, + 243, + 238, + 243, + 43, + 44, true, - "1.00", - "1.00" + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 12178341415895516060, + 12061595171928625555, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 45, + 46, + true, + "use", + "use" ], [ "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "ival", + 4361549176688508574, + "TEXT", + "#/texts/42", 1.0, - 389609625535995293, - 7990774599790700074, - 3, - 2, + 17767354399704235156, + 7238925036885539838, + 18446744073709551615, + 18446744073709551615, 0, - 4, + 1, 0, - 4, + 1, 0, - 4, + 1, true, - "0.62", - "0.62" + "4", + "4" ], [ "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "ival", + 4361549176688508574, + "TEXT", + "#/texts/42", 1.0, - 389609625535995424, - 7990768689730984037, + 15441160910541481979, + 7918922223876958481, + 18446744073709551615, + 18446744073709551615, 3, + 5, + 3, + 5, + 2, 3, - 0, - 4, - 0, - 4, - 0, - 4, true, - "0.80", - "0.80" + "15", + "15" ], [ "numval", "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + 12374482891052873875, + "TEXT", + "#/texts/43", 1.0, - 389609625535995433, - 7990768688117646262, + 12178341415896439119, + 1298001416237199126, + 18446744073709551615, + 18446744073709551615, + 0, 3, - 4, 0, - 4, + 3, 0, - 4, 0, - 4, - true, - "0.87", - "0.87" + false, + "2.1", + "2.1" ], [ "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "ival", + 12374482891052873875, + "TEXT", + "#/texts/43", 1.0, - 389609625535995623, - 7990774617730131452, - 3, - 5, - 0, + 17767354399704235161, + 4264503375288263632, + 18446744073709551615, + 18446744073709551615, 4, - 0, + 5, 4, + 5, 0, - 4, - true, - "0.94", - "0.94" + 1, + false, + "1", + "1" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "expression", + "wtoken-concatenation", + 12374482891052873875, + "TEXT", + "#/texts/43", 1.0, - 389609625535995492, - 7990768692352137559, - 4, - 2, + 329104147711421761, + 15707288010084820862, + 18446744073709551615, + 18446744073709551615, 0, - 4, + 5, 0, - 4, + 5, 0, - 4, + 1, true, - "0.73", - "0.73" + "2.1.1", + "2.1.1" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "parenthesis", + "round brackets", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995616, - 7990774618481181961, - 4, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 13286102413593957394, + 6381707955564535797, + 18446744073709551615, + 18446744073709551615, + 53, + 78, + 53, + 78, + 10, + 16, true, - "0.91", - "0.91" + "(eg, document components)", + "(eg, document components)" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "parenthesis", + "round brackets", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995623, - 7990774617741217753, - 4, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 10585762328342379081, + 2922498186515452860, + 18446744073709551615, + 18446744073709551615, + 119, + 134, + 119, + 134, + 23, + 28, true, - "0.94", - "0.94" + "(eg, documents)", + "(eg, documents)" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "sentence", + "", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995626, - 7990774612563908250, - 4, - 5, + 15409610016137670632, + 15197855970598893502, + 18446744073709551615, + 18446744073709551615, 0, - 4, + 135, 0, - 4, + 135, 0, - 4, + 29, true, - "0.97", - "0.97" + "In an extraction task, we generate new data entities (eg, document components) from an original set of source entities (eg, documents).", + "In an extraction task, we generate new data entities (eg, document components) from an original set of source entities (eg, documents)." ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "sentence", + "", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995426, - 7990768689764177354, - 5, + 12978591138826985854, + 5403545883930117735, + 18446744073709551615, + 18446744073709551615, + 136, + 261, + 136, + 261, + 29, + 50, + true, + "During this process, new links are created which connect these newly generated data entities to their original source entity.", + "During this process, new links are created which connect these newly generated data entities to their original source entity." + ], + [ + "sentence", + "", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 14884458065406655707, + 12844102167211528377, + 18446744073709551615, + 18446744073709551615, + 262, + 403, + 262, + 403, + 50, + 74, + true, + "Typical examples of such extraction tasks are the extraction of abstracts, paragraphs, tables, or figures from the structured document files.", + "Typical examples of such extraction tasks are the extraction of abstracts, paragraphs, tables, or figures from the structured document files." + ], + [ + "term", + "enum-term-mark-3", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 12690021242452676680, + 6810659946202167494, + 18446744073709551615, + 18446744073709551615, + 326, + 367, + 326, + 367, + 60, + 68, + true, + "abstracts, paragraphs, tables, or figures", + "abstracts, paragraphs, tables, or figures" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 5101579281631733460, + 3966842469618203217, + 18446744073709551615, + 18446744073709551615, + 6, + 21, + 6, + 21, 2, - 0, - 4, - 0, - 4, - 0, 4, true, - "0.82", - "0.82" + "extraction task", + "extraction task" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995623, - 7990774617746212517, - 5, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 8165740181202876025, + 12959134491963005199, + 18446744073709551615, + 18446744073709551615, + 35, + 52, + 35, + 52, + 7, + 10, true, - "0.94", - "0.94" + "new data entities", + "new data entities" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995626, - 7990774612589838230, - 5, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 17524405716142769441, + 8406307373037476042, + 18446744073709551615, + 18446744073709551615, + 58, + 77, + 58, + 77, + 13, + 15, true, - "0.97", - "0.97" + "document components", + "document components" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995627, - 7990774616182657591, - 5, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 5306796263967471926, + 13469424225666590652, + 18446744073709551615, + 18446744073709551615, + 87, + 99, + 87, + 99, + 18, + 20, true, - "0.98", - "0.98" + "original set", + "original set" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995426, - 7990768689764403839, - 6, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 15765380208127739160, + 18326515589191928219, + 18446744073709551615, + 18446744073709551615, + 103, + 118, + 103, + 118, + 21, + 23, true, - "0.82", - "0.82" + "source entities", + "source entities" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995617, - 7990774618567229989, - 6, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 6172031743812195918, + 18336408684850256697, + 18446744073709551615, + 18446744073709551615, + 157, + 166, + 157, + 166, + 33, + 35, true, - "0.92", - "0.92" + "new links", + "new links" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995620, - 7990774618125993935, - 6, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 5594093096302267983, + 8802313533208580560, + 18446744073709551615, + 18446744073709551615, + 215, + 228, + 215, + 228, + 42, + 44, true, - "0.95", - "0.95" + "data entities", + "data entities" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995626, - 7990774612590090226, - 6, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 17136238213570622776, + 10120757655484606397, + 18446744073709551615, + 18446744073709551615, + 238, + 260, + 238, + 260, + 46, + 49, true, - "0.97", - "0.97" + "original source entity", + "original source entity" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995494, - 7990768689217789732, - 7, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 7471629211517394017, + 10357904862024863501, + 18446744073709551615, + 18446744073709551615, + 262, + 278, + 262, + 278, + 50, + 52, true, - "0.75", - "0.75" + "Typical examples", + "Typical examples" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995617, - 7990774619359159209, - 7, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 16021145566749909698, + 8783219642454586604, + 18446744073709551615, + 18446744073709551615, + 282, + 303, + 282, + 303, + 53, + 56, true, - "0.92", - "0.92" + "such extraction tasks", + "such extraction tasks" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995621, - 7990774618108893234, - 7, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 11975718842215856689, + 7368242614057501661, + 18446744073709551615, + 18446744073709551615, + 377, + 402, + 377, + 402, + 70, + 73, true, - "0.96", - "0.96" + "structured document files", + "structured document files" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995626, - 7990774612570894765, - 7, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 15441160910541487324, + 3536104368677879456, + 18446744073709551615, + 18446744073709551615, + 54, + 56, + 54, + 56, + 11, + 12, true, - "0.97", - "0.97" + "eg", + "eg" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995435, - 7990774626011945031, - 8, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 15441160910541487324, + 3536104368677884369, + 18446744073709551615, + 18446744073709551615, + 120, + 122, + 120, + 122, + 24, + 25, true, - "0.89", - "0.89" + "eg", + "eg" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995621, - 7990774618110730915, - 8, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 6167933651658664291, + 787322433359315506, + 18446744073709551615, + 18446744073709551615, + 124, + 133, + 124, + 133, + 26, + 27, true, - "0.96", - "0.96" + "documents", + "documents" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995626, - 7990774612562839849, - 8, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 8106476000254393164, + 1225756195807708888, + 18446744073709551615, + 18446744073709551615, + 148, + 155, + 148, + 155, + 31, + 32, true, - "0.97", - "0.97" + "process", + "process" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995627, - 7990774616172489304, - 8, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 5303544497514782120, + 7449508868916247566, + 18446744073709551615, + 18446744073709551615, + 312, + 322, + 312, + 322, + 58, + 59, true, - "0.98", - "0.98" + "extraction", + "extraction" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995429, - 7990774613602439211, - 9, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 5950055304304346669, + 7040340631860090954, + 18446744073709551615, + 18446744073709551615, + 326, + 335, + 326, + 335, + 60, + 61, true, - "0.83", - "0.83" + "abstracts", + "abstracts" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995617, - 7990774619353439571, - 9, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 13968965538538956038, + 12002401925983499058, + 18446744073709551615, + 18446744073709551615, + 337, + 347, + 337, + 347, + 62, + 63, true, - "0.92", - "0.92" + "paragraphs", + "paragraphs" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", 1.0, - 389609625535995620, - 7990774618123099565, - 9, - 4, + 16381206513098478539, + 18007320769283054809, + 18446744073709551615, + 18446744073709551615, + 349, + 355, + 349, + 355, + 64, + 65, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 8106397480533647371, + 6262926971414929226, + 18446744073709551615, + 18446744073709551615, + 360, + 367, + 360, + 367, + 67, + 68, + true, + "figures", + "figures" + ], + [ + "sentence", + "", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 2147087418790148248, + 15000446724502032444, + 18446744073709551615, + 18446744073709551615, 0, - 4, + 170, 0, - 4, + 170, 0, - 4, + 29, true, - "0.95", - "0.95" + "From a scalability point of view, this task is embarrassingly parallel, which makes it extremely easy to implement on loosely interconnected environments such as a cloud.", + "From a scalability point of view, this task is embarrassingly parallel, which makes it extremely easy to implement on loosely interconnected environments such as a cloud." ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "sentence", + "", + 4698316471746130896, + "TEXT", + "#/texts/45", 1.0, - 389609625535995621, - 7990774618110462820, + 463110093402807730, + 15879147521720349842, + 18446744073709551615, + 18446744073709551615, + 171, + 359, + 171, + 359, + 29, + 61, + true, + "We simply iterate in parallel over all source entities in the backend database, extract the desired components and then insert those components as new data entities back into the database.", + "We simply iterate in parallel over all source entities in the backend database, extract the desired components and then insert those components as new data entities back into the database." + ], + [ + "sentence", + "", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 1890759309653672100, + 6265380164012186787, + 18446744073709551615, + 18446744073709551615, + 360, + 417, + 360, + 417, + 61, + 69, + true, + "Extraction tasks have no internal synchronization points.", + "Extraction tasks have no internal synchronization points." + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 15351504864593712035, + 2756443006928426392, + 18446744073709551615, + 18446744073709551615, + 7, + 24, + 7, + 24, + 2, + 4, + true, + "scalability point", + "scalability point" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 15765380208127739160, + 277110281299086050, + 18446744073709551615, + 18446744073709551615, + 210, + 225, + 210, + 225, + 36, + 38, + true, + "source entities", + "source entities" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 8498518363315513669, + 16461247402515495671, + 18446744073709551615, + 18446744073709551615, + 233, + 249, + 233, + 249, + 40, + 42, + true, + "backend database", + "backend database" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 8165740181202876025, + 16399806825987925636, + 18446744073709551615, + 18446744073709551615, + 318, + 335, + 318, + 335, + 53, + 56, + true, + "new data entities", + "new data entities" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 14447454215787633762, + 11742892003972248650, + 18446744073709551615, + 18446744073709551615, + 360, + 376, + 360, + 376, + 61, + 63, + true, + "Extraction tasks", + "Extraction tasks" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 15507406252536266458, + 3423090220874343271, + 18446744073709551615, + 18446744073709551615, + 385, + 416, + 385, + 416, + 65, + 68, + true, + "internal synchronization points", + "internal synchronization points" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 389609625619349298, + 9833507871259260213, + 18446744073709551615, + 18446744073709551615, + 28, + 32, + 28, + 32, + 5, + 6, + true, + "view", + "view" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 389609625631210899, + 9833297825584884817, + 18446744073709551615, + 18446744073709551615, + 39, + 43, + 39, + 43, + 8, 9, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 8143730520203056904, + 2959281947983247053, + 18446744073709551615, + 18446744073709551615, + 141, + 153, + 141, + 153, + 23, + 24, + true, + "environments", + "environments" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 329104161517016668, + 10439779028398331162, + 18446744073709551615, + 18446744073709551615, + 164, + 169, + 164, + 169, + 27, + 28, + true, + "cloud", + "cloud" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 14814034872218884114, + 6147860370160875462, + 18446744073709551615, + 18446744073709551615, + 192, + 200, + 192, + 200, + 33, + 34, + true, + "parallel", + "parallel" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 2703018952916355661, + 12156539097906296251, + 18446744073709551615, + 18446744073709551615, + 271, + 281, + 271, + 281, + 46, + 47, + true, + "components", + "components" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 2703018952916355661, + 12156539097906285939, + 18446744073709551615, + 18446744073709551615, + 304, + 314, + 304, + 314, + 51, + 52, + true, + "components", + "components" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 14650399832241044640, + 12452531445847951870, + 18446744073709551615, + 18446744073709551615, + 350, + 358, + 350, + 358, + 59, + 60, + true, + "database", + "database" + ], + [ + "sentence", + "", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 3181956257518905626, + 12206342658123130916, + 18446744073709551615, + 18446744073709551615, + 0, + 171, + 0, + 171, + 0, + 30, + true, + "One particular benefit of this task is to make the query capability on the Knowledge Graph more fine grained by being able to provide provenance information on the result.", + "One particular benefit of this task is to make the query capability on the Knowledge Graph more fine grained by being able to provide provenance information on the result." + ], + [ + "sentence", + "", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 6450838456884111499, + 591579186413275111, + 18446744073709551615, + 18446744073709551615, + 172, + 282, + 172, + 282, + 30, + 53, + true, + "For example, this would let the user explore all the paragraphs, tables, or figures that embed a certain fact.", + "For example, this would let the user explore all the paragraphs, tables, or figures that embed a certain fact." + ], + [ + "term", + "enum-term-mark-3", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 14465129582137182361, + 8361227597967842953, + 18446744073709551615, + 18446744073709551615, + 225, + 255, + 225, + 255, + 41, + 47, + true, + "paragraphs, tables, or figures", + "paragraphs, tables, or figures" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 10222723319085515006, + 12762428063935087370, + 18446744073709551615, + 18446744073709551615, + 0, + 22, + 0, + 22, + 0, + 3, + true, + "One particular benefit", + "One particular benefit" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 1964765537092797933, + 15905881283685597246, + 18446744073709551615, + 18446744073709551615, + 51, + 67, + 51, + 67, + 10, + 12, + true, + "query capability", + "query capability" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 5877539623435777295, + 14123988044322330006, + 18446744073709551615, + 18446744073709551615, + 75, + 90, + 75, + 90, + 14, + 16, + true, + "Knowledge Graph", + "Knowledge Graph" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 10500127719357050942, + 7860892982742338668, + 18446744073709551615, + 18446744073709551615, + 134, + 156, + 134, + 156, + 24, + 26, + true, + "provenance information", + "provenance information" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 5446369751014219582, + 4289580841675524190, + 18446744073709551615, + 18446744073709551615, + 269, + 281, + 269, + 281, + 50, + 52, + true, + "certain fact", + "certain fact" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 389609625631210899, + 8702509282366718713, + 18446744073709551615, + 18446744073709551615, + 31, + 35, + 31, + 35, + 5, + 6, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 16381206521509536706, + 1134399847748717084, + 18446744073709551615, + 18446744073709551615, + 164, + 170, + 164, + 170, + 28, + 29, + true, + "result", + "result" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 8106397496085150773, + 4220253075084279441, + 18446744073709551615, + 18446744073709551615, + 176, + 183, + 176, + 183, + 31, + 32, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 389609625632179162, + 8702524581310998150, + 18446744073709551615, + 18446744073709551615, + 204, + 208, + 204, + 208, + 37, + 38, + true, + "user", + "user" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 13968965538538956038, + 4949616636950973040, + 18446744073709551615, + 18446744073709551615, + 225, + 235, + 225, + 235, + 41, + 42, + true, + "paragraphs", + "paragraphs" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 16381206513098478539, + 11251268092763423519, + 18446744073709551615, + 18446744073709551615, + 237, + 243, + 237, + 243, + 43, + 44, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 8106397480533647371, + 9599867079335670867, + 18446744073709551615, + 18446744073709551615, + 248, + 255, + 248, + 255, + 46, + 47, + true, + "figures", + "figures" + ], + [ + "numval", + "fval", + 6297710299044869343, + "TEXT", + "#/texts/47", + 1.0, + 12178341415896439119, + 9338691878670130519, + 18446744073709551615, + 18446744073709551615, + 0, + 3, + 0, + 3, + 0, + 0, + false, + "2.1", + "2.1" + ], + [ + "numval", + "ival", + 6297710299044869343, + "TEXT", + "#/texts/47", + 1.0, + 17767354399704235162, + 17230475508982970052, + 18446744073709551615, + 18446744073709551615, + 4, + 5, + 4, + 5, + 0, + 1, + false, + "2", + "2" + ], + [ + "expression", + "wtoken-concatenation", + 6297710299044869343, + "TEXT", + "#/texts/47", + 1.0, + 329104147711421774, + 10145012391943880145, + 18446744073709551615, + 18446744073709551615, + 0, 5, 0, - 4, + 5, + 0, + 1, + true, + "2.1.2", + "2.1.2" + ], + [ + "parenthesis", + "round brackets", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16141002319857532197, + 16721784361290394175, + 18446744073709551615, + 18446744073709551615, + 184, + 201, + 184, + 201, + 35, + 41, + true, + "(eg, a paragraph)", + "(eg, a paragraph)" + ], + [ + "parenthesis", + "round brackets", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 7045104231246392140, + 4304878346281955020, + 18446744073709551615, + 18446744073709551615, + 590, + 646, + 590, + 646, + 109, + 123, + true, + "(eg, noun phrases, abbreviations, unit and values, etc.)", + "(eg, noun phrases, abbreviations, unit and values, etc.)" + ], + [ + "expression", + "common", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 12178341415895450733, + 12807202931780954464, + 18446744073709551615, + 18446744073709551615, + 641, + 645, + 641, + 645, + 121, + 122, + true, + "etc", + "etc." + ], + [ + "sentence", + "", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16887173432250904127, + 7631275740120697636, + 18446744073709551615, + 18446744073709551615, + 0, + 125, + 0, + 125, + 0, + 22, + true, + "In the annotation task, we apply NLU methods to detect language entities and their relationships within a single data entity.", + "In the annotation task, we apply NLU methods to detect language entities and their relationships within a single data entity." + ], + [ + "sentence", + "", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 10511705512724067221, + 8131498709835920590, + 18446744073709551615, + 18446744073709551615, + 126, + 255, + 126, + 255, + 22, + 51, + true, + "Here, data entities can be as simple as a snippet of text (eg, a paragraph) or more complex structures such as tables or figures.", + "Here, data entities can be as simple as a snippet of text (eg, a paragraph) or more complex structures such as tables or figures." + ], + [ + "sentence", + "", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 8398014702449778476, + 12904618777003590817, + 18446744073709551615, + 18446744073709551615, + 256, + 392, + 256, + 392, + 51, + 77, + true, + "The main goal of the annotation task is to obtain all relevant information from the data entity with regard to the domain of the corpus.", + "The main goal of the annotation task is to obtain all relevant information from the data entity with regard to the domain of the corpus." + ], + [ + "sentence", + "", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 5036973358471188301, + 5113429010382725861, + 18446744073709551615, + 18446744073709551615, + 393, + 647, + 393, + 647, + 77, + 124, + true, + "Since different technical fields require different annotations, our annotation task is modular, allowing language entities to be annotated for material science, oil and gas, or more basic entities (eg, noun phrases, abbreviations, unit and values, etc.).", + "Since different technical fields require different annotations, our annotation task is modular, allowing language entities to be annotated for material science, oil and gas, or more basic entities (eg, noun phrases, abbreviations, unit and values, etc.)." + ], + [ + "term", + "enum-term-mark-2", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 1860880111352291313, + 13970201774176510008, + 18446744073709551615, + 18446744073709551615, + 536, + 565, + 536, + 565, + 98, + 104, + true, + "material science, oil and gas", + "material science, oil and gas" + ], + [ + "term", + "enum-term-mark-3", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 18384137313945358770, + 11714307673095999951, + 18446744073709551615, + 18446744073709551615, + 237, + 254, + 237, + 254, + 47, + 50, + true, + "tables or figures", + "tables or figures" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 4147688156856812386, + 11375319308790844386, + 18446744073709551615, + 18446744073709551615, + 7, + 22, + 7, + 22, + 2, + 4, + true, + "annotation task", + "annotation task" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 4064125330427640408, + 1392875787154483270, + 18446744073709551615, + 18446744073709551615, + 33, + 44, + 33, + 44, + 7, + 9, + true, + "NLU methods", + "NLU methods" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 12159911606058366544, + 12317116291497839674, + 18446744073709551615, + 18446744073709551615, + 55, + 72, + 55, + 72, + 11, + 13, + true, + "language entities", + "language entities" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 3099738444514320422, + 11450082253858502881, + 18446744073709551615, + 18446744073709551615, + 106, + 124, + 106, + 124, + 18, + 21, + true, + "single data entity", + "single data entity" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 5594093096302267983, + 4253165684837357319, + 18446744073709551615, + 18446744073709551615, + 132, + 145, + 132, + 145, + 24, + 26, + true, + "data entities", + "data entities" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 3570234228108234057, + 10514715831603435127, + 18446744073709551615, + 18446744073709551615, + 210, + 228, + 210, + 228, + 43, + 45, + true, + "complex structures", + "complex structures" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 6179391932551989543, + 16916864605364003434, + 18446744073709551615, + 18446744073709551615, + 260, + 269, + 260, + 269, + 52, + 54, + true, + "main goal", + "main goal" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 4147688156856812386, + 11375319308790730633, + 18446744073709551615, + 18446744073709551615, + 277, + 292, + 277, + 292, + 56, + 58, + true, + "annotation task", + "annotation task" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 447877196158192114, + 9731824282974847876, + 18446744073709551615, + 18446744073709551615, + 310, + 330, + 310, + 330, + 62, + 64, + true, + "relevant information", + "relevant information" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 4106840074686891911, + 6843113354081867506, + 18446744073709551615, + 18446744073709551615, + 340, + 351, + 340, + 351, + 66, + 68, + true, + "data entity", + "data entity" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 14770886840273751060, + 2461765204774504664, + 18446744073709551615, + 18446744073709551615, + 399, + 425, + 399, + 425, + 78, + 81, + true, + "different technical fields", + "different technical fields" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 14395582171383490407, + 17447243651093722639, + 18446744073709551615, + 18446744073709551615, + 434, + 455, + 434, + 455, + 82, + 84, + true, + "different annotations", + "different annotations" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 4147688156856812386, + 11375319308790832527, + 18446744073709551615, + 18446744073709551615, + 461, + 476, + 461, + 476, + 86, + 88, + true, + "annotation task", + "annotation task" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 12159911606058366544, + 12317116291497818179, + 18446744073709551615, + 18446744073709551615, + 498, + 515, + 498, + 515, + 92, + 94, + true, + "language entities", + "language entities" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 10788814978233814896, + 4396386661032428573, + 18446744073709551615, + 18446744073709551615, + 536, + 552, + 536, + 552, + 98, + 100, + true, + "material science", + "material science" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 15408216039387112052, + 9368993532817081616, + 18446744073709551615, + 18446744073709551615, + 575, + 589, + 575, + 589, + 107, + 109, + true, + "basic entities", + "basic entities" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 14689069836122597249, + 3316427666339118054, + 18446744073709551615, + 18446744073709551615, + 595, + 607, + 595, + 607, + 112, + 114, + true, + "noun phrases", + "noun phrases" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 8279380567349713241, + 3197450440257438778, + 18446744073709551615, + 18446744073709551615, + 83, + 96, + 83, + 96, + 15, + 16, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 8106478562764653920, + 94832442363733992, + 18446744073709551615, + 18446744073709551615, + 168, + 175, + 168, + 175, + 32, + 33, + true, + "snippet", + "snippet" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 389609625631325904, + 2402759180687356962, + 18446744073709551615, + 18446744073709551615, + 179, + 183, + 179, + 183, + 34, + 35, + true, + "text", + "text" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 15441160910541487324, + 8070554360546184492, + 18446744073709551615, + 18446744073709551615, + 185, + 187, + 185, + 187, + 36, + 37, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 6169141668427353082, + 2952751502941090235, + 18446744073709551615, + 18446744073709551615, + 191, + 200, + 191, + 200, + 39, + 40, + true, + "paragraph", + "paragraph" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16381206513098478539, + 4960744687131596426, + 18446744073709551615, + 18446744073709551615, + 237, + 243, + 237, + 243, + 47, + 48, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 8106397480533647371, + 7525065665694091915, + 18446744073709551615, + 18446744073709551615, + 247, + 254, + 247, + 254, + 49, + 50, + true, + "figures", + "figures" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16381206521526353544, + 4862321863264196616, + 18446744073709551615, + 18446744073709551615, + 357, + 363, + 357, + 363, + 69, + 70, + true, + "regard", + "regard" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16381206568268873414, + 8285328794802270882, + 18446744073709551615, + 18446744073709551615, + 371, + 377, + 371, + 377, + 72, + 73, + true, + "domain", + "domain" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16381206562408205435, + 1670996501012150979, + 18446744073709551615, + 18446744073709551615, + 385, + 391, + 385, + 391, + 75, + 76, + true, + "corpus", + "corpus" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 12178341415895623363, + 12807178710519925531, + 18446744073709551615, + 18446744073709551615, + 554, + 557, + 554, + 557, + 101, + 102, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 12178341415895464135, + 12807165220377806489, + 18446744073709551615, + 18446744073709551615, + 562, + 565, + 562, + 565, + 103, + 104, + true, + "gas", + "gas" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 15441160910541487324, + 8070554360545437401, + 18446744073709551615, + 18446744073709551615, + 591, + 593, + 591, + 593, + 110, + 111, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 1348537160828121453, + 2554508032215814292, + 18446744073709551615, + 18446744073709551615, + 609, + 622, + 609, + 622, + 115, + 116, + true, + "abbreviations", + "abbreviations" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 389609625632190829, + 2402804839141248529, + 18446744073709551615, + 18446744073709551615, + 624, + 628, + 624, + 628, + 117, + 118, + true, + "unit", + "unit" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16381206519529683092, + 8830939125119703029, + 18446744073709551615, + 18446744073709551615, + 633, + 639, + 633, + 639, + 119, + 120, + true, + "values", + "values" + ], + [ + "numval", + "fval", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 12178341415896310341, + 6520357412536397527, + 18446744073709551615, + 18446744073709551615, + 170, + 173, + 168, + 171, + 27, + 28, + true, + "5,6", + "5,6" + ], + [ + "numval", + "ival", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 17767354399704235159, + 17919867067928731763, + 18446744073709551615, + 18446744073709551615, + 228, + 229, + 226, + 227, + 36, + 37, + true, + "7", + "7" + ], + [ + "numval", + "ival", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 17767354399704235162, + 17919867064012061628, + 18446744073709551615, + 18446744073709551615, + 423, + 424, + 419, + 420, + 71, + 72, + true, + "2", + "2" + ], + [ + "parenthesis", + "round brackets", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 6251248012671019556, + 8400220771602544974, + 18446744073709551615, + 18446744073709551615, + 463, + 475, + 459, + 471, + 80, + 83, + true, + "(geological)", + "(geological)" + ], + [ + "expression", + "word-concatenation", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 5044385734724420019, + 7407814039821929674, + 18446744073709551615, + 18446744073709551615, + 184, + 200, + 182, + 198, + 30, + 31, + true, + "state-of-the-art", + "state-of-the-art" + ], + [ + "sentence", + "", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 7779111357672011576, + 17653885300415270377, + 18446744073709551615, + 18446744073709551615, + 0, + 169, + 0, + 167, + 0, + 27, + true, + "From a technical perspective, the language entities are detected and annotated using multiple NLU methods, ranging from complex regular expressions \u2020 to LSTM networks.", + "From a technical perspective, the language entities are detected and annotated using multiple NLU methods, ranging from complex regular expressions \u2020 to LSTM networks." + ], + [ + "sentence", + "", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 6580876879929044039, + 6602773183063942475, + 18446744073709551615, + 18446744073709551615, + 174, + 300, + 172, + 296, + 28, + 50, + true, + "We employ state-of-the-art NLU toolkits such as Spacy 7 or NLTK \u2021 to train and apply custom named entity recognition models.", + "We employ state-of-the-art NLU toolkits such as Spacy 7 or NLTK \u2021 to train and apply custom named entity recognition models." + ], + [ + "sentence", + "", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 17212207186390634612, + 10030533856655835592, + 18446744073709551615, + 18446744073709551615, + 301, + 403, + 297, + 399, + 50, + 67, + true, + "A detailed investigation of these NLU annotators unfortunately goes beyond of the scope of this paper.", + "A detailed investigation of these NLU annotators unfortunately goes beyond of the scope of this paper." + ], + [ + "sentence", + "", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 9979157195255435816, + 5766649868617218566, + 18446744073709551615, + 18446744073709551615, + 404, + 542, + 400, + 538, + 67, + 96, + true, + "However, in Figure 2, we show the different types of named (geological) entities found in a paragraph by our oil and gas annotation model.", + "However, in Figure 2, we show the different types of named (geological) entities found in a paragraph by our oil and gas annotation model." + ], + [ + "term", + "enum-term-mark-2", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 836843906912689304, + 5336535317156291017, + 18446744073709551615, + 18446744073709551615, + 513, + 541, + 509, + 537, + 90, + 95, + true, + "oil and gas annotation model", + "oil and gas annotation model" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 9847720278307775508, + 16897299718511055664, + 18446744073709551615, + 18446744073709551615, + 7, + 28, + 7, + 28, + 2, + 4, + true, + "technical perspective", + "technical perspective" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 12159911606058366544, + 9840970767482962073, + 18446744073709551615, + 18446744073709551615, + 34, + 51, + 34, + 51, + 6, + 8, + true, + "language entities", + "language entities" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 18281631562721121276, + 4629272614700109136, + 18446744073709551615, + 18446744073709551615, + 85, + 105, + 85, + 105, + 13, + 16, + true, + "multiple NLU methods", + "multiple NLU methods" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 6765267146872757605, + 14403665241565123375, + 18446744073709551615, + 18446744073709551615, + 120, + 147, + 120, + 147, + 19, + 22, + true, + "complex regular expressions", + "complex regular expressions" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 3706863384497465111, + 8221205291804750590, + 18446744073709551615, + 18446744073709551615, + 155, + 168, + 153, + 166, + 24, + 26, + true, + "LSTM networks", + "LSTM networks" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 8578460468129151886, + 12823972662225514109, + 18446744073709551615, + 18446744073709551615, + 184, + 213, + 182, + 211, + 30, + 33, + true, + "state-of-the-art NLU toolkits", + "state-of-the-art NLU toolkits" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 8774094861370452817, + 2952593523093736834, + 18446744073709551615, + 18446744073709551615, + 274, + 299, + 270, + 295, + 46, + 49, + true, + "entity recognition models", + "entity recognition models" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 3850923311242910952, + 11129645616881861710, + 18446744073709551615, + 18446744073709551615, + 303, + 325, + 299, + 321, + 51, + 53, + true, + "detailed investigation", + "detailed investigation" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 17250040061445046934, + 11791038398155808201, + 18446744073709551615, + 18446744073709551615, + 335, + 349, + 331, + 345, + 55, + 57, + true, + "NLU annotators", + "NLU annotators" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 13127417780371024365, + 3643698460906758134, + 18446744073709551615, + 18446744073709551615, + 438, + 453, + 434, + 449, + 76, + 78, + true, + "different types", + "different types" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 6054420812037878801, + 17086623133308882247, + 18446744073709551615, + 18446744073709551615, + 521, + 541, + 517, + 537, + 92, + 95, + true, + "gas annotation model", + "gas annotation model" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 329104162342367500, + 10244334105852443240, + 18446744073709551615, + 18446744073709551615, + 222, + 227, + 220, + 225, + 35, + 36, + true, + "Spacy", + "Spacy" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 389609625695864805, + 14626057431240642030, + 18446744073709551615, + 18446744073709551615, + 233, + 237, + 231, + 235, + 38, + 39, + true, + "NLTK", + "NLTK" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 16381206559341571450, + 1447158595331306969, + 18446744073709551615, + 18446744073709551615, + 261, + 267, + 257, + 263, + 44, + 45, + true, + "custom", + "custom" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 329104161784846775, + 10257930626923248048, + 18446744073709551615, + 18446744073709551615, + 383, + 388, + 379, + 384, + 62, + 63, + true, + "scope", + "scope" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 329104161668023890, + 10258894979066678110, + 18446744073709551615, + 18446744073709551615, + 397, + 402, + 393, + 398, + 65, + 66, + true, + "paper", + "paper" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 16381206514091025767, + 251249362636336734, + 18446744073709551615, + 18446744073709551615, + 416, + 422, + 412, + 418, + 70, + 71, + true, + "Figure", + "Figure" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 14652256560445338257, + 5505418234470722173, + 18446744073709551615, + 18446744073709551615, + 476, + 484, + 472, + 480, + 83, + 84, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 6169141668427353082, + 8227784820734106454, + 18446744073709551615, + 18446744073709551615, + 496, + 505, + 492, + 501, + 87, + 88, + true, + "paragraph", + "paragraph" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 12178341415895623363, + 6519272468740598962, + 18446744073709551615, + 18446744073709551615, + 513, + 516, + 509, + 512, + 90, + 91, + true, + "oil", + "oil" + ], + [ + "numval", + "fval", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 12178341415896439119, + 15419364153911617129, + 18446744073709551615, + 18446744073709551615, + 573, + 576, + 572, + 575, + 100, + 101, + true, + "2.1", + "2.1" + ], + [ + "numval", + "ival", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 17767354399704235161, + 8171001275372472332, + 18446744073709551615, + 18446744073709551615, + 11, + 12, + 11, + 12, + 2, + 3, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 17767354399704235156, + 8171001275288926016, + 18446744073709551615, + 18446744073709551615, + 577, + 578, + 576, + 577, + 102, + 103, + true, + "4", + "4" + ], + [ + "parenthesis", + "round brackets", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 23744483499852859, + 1477981163033721681, + 18446744073709551615, + 18446744073709551615, + 61, + 103, + 61, + 103, + 13, + 20, + true, + "(both language entities and relationships)", + "(both language entities and relationships)" + ], + [ + "parenthesis", + "round brackets", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 4962669361213862895, + 5695197211497118759, + 18446744073709551615, + 18446744073709551615, + 186, + 196, + 186, + 196, + 35, + 38, + true, + "(weighted)", + "(weighted)" + ], + [ + "parenthesis", + "round brackets", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 15147261052050380573, + 340133695651638722, + 18446744073709551615, + 18446744073709551615, + 560, + 579, + 559, + 578, + 97, + 104, + true, + "(see section 2.1.4)", + "(see section 2.1.4)" + ], + [ + "sentence", + "", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 15219000500037869602, + 8856002696045104109, + 18446744073709551615, + 18446744073709551615, + 0, + 130, + 0, + 130, + 0, + 26, + true, + "In Listing 1, we also show an excerpt of how the annotations (both language entities and relationships) are stored in the backend.", + "In Listing 1, we also show an excerpt of how the annotations (both language entities and relationships) are stored in the backend." + ], + [ + "sentence", + "", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 16069762016341043397, + 4644962557882819656, + 18446744073709551615, + 18446744073709551615, + 131, + 233, + 131, + 233, + 26, + 44, + true, + "It is noteworthy here that relationships are stored as (weighted) links between two entity references.", + "It is noteworthy here that relationships are stored as (weighted) links between two entity references." + ], + [ + "sentence", + "", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 9876246604275471522, + 3163098707953307694, + 18446744073709551615, + 18446744073709551615, + 237, + 394, + 236, + 393, + 45, + 70, + true, + "The usage of references reduces data duplication and more importantly ensures that the relationships are always defined between two known entities in the KG.", + "The usage of references reduces data duplication and more importantly ensures that the relationships are always defined between two known entities in the KG." + ], + [ + "sentence", + "", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 16887832449551411027, + 18045699081531244891, + 18446744073709551615, + 18446744073709551615, + 395, + 580, + 394, + 579, + 70, + 105, + true, + "The latter simplifies the aggregation of the relationships significantly, since no new entities need to be created in the KG in order to aggregate the relationships (see section 2.1.4).", + "The latter simplifies the aggregation of the relationships significantly, since no new entities need to be created in the KG in order to aggregate the relationships (see section 2.1.4)." + ], + [ + "term", + "enum-term-mark-3", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 13335488353876392384, + 17546248817268837810, + 18446744073709551615, + 18446744073709551615, + 76, + 102, + 76, + 102, + 16, + 19, + true, + "entities and relationships", + "entities and relationships" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 12159911606058366544, + 17237419887761715230, + 18446744073709551615, + 18446744073709551615, + 67, + 84, + 67, + 84, + 15, + 17, + true, + "language entities", + "language entities" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 3523694329852808542, + 11676604379961027572, + 18446744073709551615, + 18446744073709551615, + 215, + 232, + 215, + 232, + 41, + 43, + true, + "entity references", + "entity references" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 18000510673442216122, + 13486560897367143376, + 18446744073709551615, + 18446744073709551615, + 269, + 285, + 268, + 284, + 50, + 52, + true, + "data duplication", + "data duplication" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 15983101815597714776, + 14869920223655256325, + 18446744073709551615, + 18446744073709551615, + 478, + 490, + 477, + 489, + 82, + 84, + true, + "new entities", + "new entities" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8106397495779651824, + 14812410445927163850, + 18446744073709551615, + 18446744073709551615, + 30, + 37, + 30, + 37, + 8, + 9, + true, + "excerpt", + "excerpt" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 1037258523789473353, + 1591720788223851008, + 18446744073709551615, + 18446744073709551615, + 49, + 60, + 49, + 60, + 12, + 13, + true, + "annotations", + "annotations" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8279380567349713241, + 14280654403321989315, + 18446744073709551615, + 18446744073709551615, + 89, + 102, + 89, + 102, + 18, + 19, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8106396937135332179, + 5430416277392822917, + 18446744073709551615, + 18446744073709551615, + 122, + 129, + 122, + 129, + 24, + 25, + true, + "backend", + "backend" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8279380567349713241, + 14280654403322018040, + 18446744073709551615, + 18446744073709551615, + 158, + 171, + 158, + 171, + 31, + 32, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 329104159157898666, + 16038035701522583509, + 18446744073709551615, + 18446744073709551615, + 241, + 246, + 240, + 245, + 46, + 47, + true, + "usage", + "usage" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 15984565858548749625, + 7081370485737015030, + 18446744073709551615, + 18446744073709551615, + 250, + 260, + 249, + 259, + 48, + 49, + true, + "references", + "references" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8279380567349713241, + 14280654403322071374, + 18446744073709551615, + 18446744073709551615, + 324, + 337, + 323, + 336, + 58, + 59, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 14652256560445338257, + 4854961473690419140, + 18446744073709551615, + 18446744073709551615, + 375, + 383, + 374, + 382, + 65, + 66, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 15441160910541480204, + 9487537061622411973, + 18446744073709551615, + 18446744073709551615, + 391, + 393, + 390, + 392, + 68, + 69, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 16381206590630461421, + 183862245083634869, + 18446744073709551615, + 18446744073709551615, + 399, + 405, + 398, + 404, + 71, + 72, + true, + "latter", + "latter" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 844664518895955636, + 3549384269706196539, + 18446744073709551615, + 18446744073709551615, + 421, + 432, + 420, + 431, + 74, + 75, + true, + "aggregation", + "aggregation" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8279380567349713241, + 14280654403322029163, + 18446744073709551615, + 18446744073709551615, + 440, + 453, + 439, + 452, + 77, + 78, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 15441160910541480204, + 9487537061622371109, + 18446744073709551615, + 18446744073709551615, + 517, + 519, + 516, + 518, + 90, + 91, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 329104161571401725, + 16112186615218301516, + 18446744073709551615, + 18446744073709551615, + 523, + 528, + 522, + 527, + 92, + 93, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8279380567349713241, + 14280654403322027617, + 18446744073709551615, + 18446744073709551615, + 546, + 559, + 545, + 558, + 96, + 97, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8106478708629288965, + 3196712847993308076, + 18446744073709551615, + 18446744073709551615, + 565, + 572, + 564, + 571, + 99, + 100, + true, + "section", + "section" + ], + [ + "numval", + "ival", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 17767354399704235162, + 11171804972701775781, + 18446744073709551615, + 18446744073709551615, + 7, + 8, + 7, + 8, + 1, + 2, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 17767354399704235152, + 11171804967920230653, + 18446744073709551615, + 18446744073709551615, + 112, + 113, + 112, + 113, + 18, + 19, + true, + "8", + "8" + ], + [ + "sentence", + "", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 11152699847125264436, + 12077505865209116602, + 18446744073709551615, + 18446744073709551615, + 0, + 111, + 0, + 111, + 0, + 18, + true, + "FIGURE 2 Illustration of various detected language entities in a particularly rich snippet of an AAPG abstract.", + "FIGURE 2 Illustration of various detected language entities in a particularly rich snippet of an AAPG abstract." + ], + [ + "term", + "enum-term-mark-2", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 5515747999597331548, + 15418084629649194710, + 18446744073709551615, + 18446744073709551615, + 197, + 220, + 197, + 220, + 33, + 37, + true, + "oil and gas exploration", + "oil and gas exploration" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 12159911606058366544, + 6926612245340290214, + 18446744073709551615, + 18446744073709551615, + 42, + 59, + 42, + 59, + 6, + 8, + true, + "language entities", + "language entities" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 18145815066853361043, + 5215848738409166677, + 18446744073709551615, + 18446744073709551615, + 78, + 90, + 78, + 90, + 11, + 13, + true, + "rich snippet", + "rich snippet" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 15509823190207649705, + 5960259217804932002, + 18446744073709551615, + 18446744073709551615, + 97, + 110, + 97, + 110, + 15, + 17, + true, + "AAPG abstract", + "AAPG abstract" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 12159911606058366544, + 6926612245340286517, + 18446744073709551615, + 18446744073709551615, + 118, + 135, + 118, + 135, + 20, + 22, + true, + "language entities", + "language entities" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 13713515574324394347, + 12590722771456480016, + 18446744073709551615, + 18446744073709551615, + 160, + 179, + 160, + 179, + 27, + 29, + true, + "geological concepts", + "geological concepts" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 10692163443301812358, + 705285979073830157, + 18446744073709551615, + 18446744073709551615, + 205, + 220, + 205, + 220, + 35, + 37, + true, + "gas exploration", + "gas exploration" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 16381206531186882505, + 282429509959017085, + 18446744073709551615, + 18446744073709551615, + 0, + 6, + 0, + 6, + 0, + 1, + true, + "FIGURE", + "FIGURE" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 11591880785922286007, + 11861251288579682434, + 18446744073709551615, + 18446744073709551615, + 9, + 21, + 9, + 21, + 2, + 3, + true, + "Illustration", + "Illustration" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 16381206568268873414, + 9023158530069285353, + 18446744073709551615, + 18446744073709551615, + 187, + 193, + 187, + 193, + 31, + 32, + true, + "domain", + "domain" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 12178341415895623363, + 5058536426346121044, + 18446744073709551615, + 18446744073709551615, + 197, + 200, + 197, + 200, + 33, + 34, + true, + "oil", + "oil" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 389609625548777262, + 8826555294676663632, + 18446744073709551615, + 18446744073709551615, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 389609625548777251, + 8826555296349648778, + 18446744073709551615, + 18446744073709551615, + 119, + 123, + 119, + 123, + 14, + 14, + false, + "2023", + "2023" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 8104408072666212335, + 13552219042525319352, + 18446744073709551615, + 18446744073709551615, + 71, + 78, + 71, + 78, + 8, + 8, + false, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 389609625548868096, + 8826558551385119058, + 18446744073709551615, + 18446744073709551615, + 82, + 86, + 82, + 86, + 8, + 9, + false, + "2.20", + "2.20" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 14654386914267794441, + 12796143052106760105, + 18446744073709551615, + 18446744073709551615, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 17767354399704235162, + 7753390158484899261, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 15441160910541481791, + 3518619573290839093, + 18446744073709551615, + 18446744073709551615, + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 15441160910541481543, + 3518617976696906498, + 18446744073709551615, + 18446744073709551615, + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 8536069645534292969, + 16063604623463467342, + 18446744073709551615, + 18446744073709551615, + 35, + 87, + 35, + 87, + 8, + 10, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 594099663775968682, + 14698211805947073928, + 18446744073709551615, + 18446744073709551615, + 156, + 208, + 156, + 208, + 22, + 37, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 1697220653346092555, + 8458710314769009562, + 18446744073709551615, + 18446744073709551615, + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 12213187056216195918, + 14309760985361468471, + 18446744073709551615, + 18446744073709551615, + 155, + 209, + 155, + 209, + 21, + 38, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 3856967589249015473, + 3576147774941915841, + 18446744073709551615, + 18446744073709551615, + 35, + 86, + 35, + 86, + 8, + 9, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "sentence", + "", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 10933383461306782608, + 10178418358179275356, + 18446744073709551615, + 18446744073709551615, + 19, + 125, + 19, + 125, + 6, + 16, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 12466457873768409517, + 3430070082404029638, + 18446744073709551615, + 18446744073709551615, + 88, + 108, + 88, + 108, + 10, + 13, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 12466457873768409517, + 3430070082403846184, + 18446744073709551615, + 18446744073709551615, + 213, + 233, + 213, + 233, + 39, + 42, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 10086796047802705645, + 11637015082128438412, + 18446744073709551615, + 18446744073709551615, + 252, + 263, + 252, + 263, + 47, + 49, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 6687370681685741393, + 17939310132506951168, + 18446744073709551615, + 18446744073709551615, + 284, + 319, + 284, + 319, + 53, + 57, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 329104161846385964, + 16017248647642597247, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 18, + 19, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 969969168017005656, + 2961182532179915323, + 18446744073709551615, + 18446744073709551615, + 144, + 154, + 144, + 154, + 20, + 21, + true, + "Conditions", + "Conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 329104161825278214, + 16021621362593374209, + 18446744073709551615, + 18446744073709551615, + 238, + 243, + 238, + 243, + 43, + 44, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 12178341415895516060, + 12061595171928625555, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 45, + 46, + true, + "use", + "use" + ], + [ + "numval", + "ival", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 17767354399704235161, + 11087830826518420632, + 18446744073709551615, + 18446744073709551615, + 8, + 9, + 8, + 9, + 1, + 2, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 17767354399704235152, + 11087830826319423704, + 18446744073709551615, + 18446744073709551615, + 63, + 64, + 63, + 64, + 11, + 12, + true, + "8", + "8" + ], + [ + "parenthesis", + "round brackets", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 9255725192624412487, + 5908058035068237778, + 18446744073709551615, + 18446744073709551615, + 167, + 194, + 167, + 194, + 28, + 35, + true, + "(encoded in the field name)", + "(encoded in the field name)" + ], + [ + "sentence", + "", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 12645284953227579523, + 12974947662755325996, + 18446744073709551615, + 18446744073709551615, + 0, + 132, + 0, + 132, + 0, + 23, + true, + "LISTING 1 Excerpt of the annotated abstract from an AAPG paper 8 with its original text and the detected entities and relationships.", + "LISTING 1 Excerpt of the annotated abstract from an AAPG paper 8 with its original text and the detected entities and relationships." + ], + [ + "sentence", + "", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 12234165085298724165, + 11085803697781807661, + 18446744073709551615, + 18446744073709551615, + 133, + 208, + 133, + 208, + 23, + 38, + true, + "Note that relationships are typed (encoded in the field name) and weighted.", + "Note that relationships are typed (encoded in the field name) and weighted." + ], + [ + "sentence", + "", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 15149154235125366870, + 971355569745530824, + 18446744073709551615, + 18446744073709551615, + 209, + 295, + 209, + 295, + 38, + 51, + true, + "The weight reflects the confidence of the language annotation model during extraction.", + "The weight reflects the confidence of the language annotation model during extraction." + ], + [ + "term", + "enum-term-mark-3", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 13335488353876392384, + 7678075181696158825, + 18446744073709551615, + 18446744073709551615, + 105, + 131, + 105, + 131, + 19, + 22, + true, + "entities and relationships", + "entities and relationships" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 17270283073797262012, + 9056082944943925130, + 18446744073709551615, + 18446744073709551615, + 25, + 43, + 25, + 43, + 5, + 7, + true, + "annotated abstract", + "annotated abstract" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 6208047174578296014, + 11157541887347812593, + 18446744073709551615, + 18446744073709551615, + 52, + 62, + 52, + 62, + 9, + 11, + true, + "AAPG paper", + "AAPG paper" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 10146060738548390539, + 10395901779297563099, + 18446744073709551615, + 18446744073709551615, + 74, + 87, + 74, + 87, + 14, + 16, + true, + "original text", + "original text" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 5376193215510313776, + 2300794343146009047, + 18446744073709551615, + 18446744073709551615, + 183, + 193, + 183, + 193, + 32, + 34, + true, + "field name", + "field name" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 11168190807530039563, + 9616121456751772425, + 18446744073709551615, + 18446744073709551615, + 251, + 276, + 251, + 276, + 45, + 48, + true, + "language annotation model", + "language annotation model" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 7548687258225017395, + 17484470566150792413, + 18446744073709551615, + 18446744073709551615, + 332, + 349, + 332, + 349, + 56, + 58, + true, + "detected entities", + "detected entities" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 8106396829827034261, + 10257142782963866660, + 18446744073709551615, + 18446744073709551615, + 10, + 17, + 10, + 17, + 2, + 3, + true, + "Excerpt", + "Excerpt" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 14652256560445338257, + 23671674995088716, + 18446744073709551615, + 18446744073709551615, + 105, + 113, + 105, + 113, + 19, + 20, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 8279380567349713241, + 2766472369779690779, + 18446744073709551615, + 18446744073709551615, + 118, + 131, + 118, + 131, + 21, + 22, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 8279380567349713241, + 2766472369779690668, + 18446744073709551615, + 18446744073709551615, + 143, + 156, + 143, + 156, + 25, + 26, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 16381206557786164800, + 9609671604520235392, + 18446744073709551615, + 18446744073709551615, + 213, + 219, + 213, + 219, + 39, + 40, + true, + "weight", + "weight" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 2702871111219879214, + 4793337639583017260, + 18446744073709551615, + 18446744073709551615, + 233, + 243, + 233, + 243, + 42, + 43, + true, + "confidence", + "confidence" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 5303544497514782120, + 15086525081846414293, + 18446744073709551615, + 18446744073709551615, + 284, + 294, + 284, + 294, + 49, + 50, + true, + "extraction", + "extraction" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 1808270638656316647, + 6568689023336282632, + 18446744073709551615, + 18446744073709551615, + 296, + 309, + 296, + 309, + 51, + 52, + true, + "Relationships", + "Relationships" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 15984565858548749625, + 13397156481796417033, + 18446744073709551615, + 18446744073709551615, + 374, + 384, + 374, + 384, + 63, + 64, + true, + "references", + "references" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 389609625633316251, + 15262543275847637295, + 18446744073709551615, + 18446744073709551615, + 396, + 400, + 396, + 400, + 66, + 67, + true, + "link", + "link" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 14652256560445338257, + 23671674995075921, + 18446744073709551615, + 18446744073709551615, + 413, + 421, + 413, + 421, + 69, + 70, + true, + "entities", + "entities" + ], + [ + "numval", + "ival", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 12178341415896426714, + 14365907824633173416, + 18446744073709551615, + 18446744073709551615, + 503, + 506, + 503, + 506, + 84, + 85, + true, + "100", + "100" + ], + [ + "numval", + "ival", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 12178341415896430891, + 14365907899184508224, + 18446744073709551615, + 18446744073709551615, + 507, + 510, + 507, + 510, + 85, + 86, + true, + "000", + "000" + ], + [ + "numval", + "ival", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 17767354399704235163, + 2072240023181579806, + 18446744073709551615, + 18446744073709551615, + 549, + 550, + 549, + 550, + 92, + 93, + true, + "3", + "3" + ], + [ + "numval", + "ival", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 17767354399704235163, + 2072240023181582239, + 18446744073709551615, + 18446744073709551615, + 657, + 658, + 657, + 658, + 108, + 109, + true, + "3", + "3" + ], + [ + "parenthesis", + "round brackets", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 8218859132330537689, + 15199448891854533062, + 18446744073709551615, + 18446744073709551615, + 316, + 342, + 316, + 342, + 52, + 59, + true, + "(paragraphs, tables, etc.)", + "(paragraphs, tables, etc.)" + ], + [ + "expression", + "common", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 12178341415895450733, + 14365751987021702484, + 18446744073709551615, + 18446744073709551615, + 337, + 341, + 337, + 341, + 57, + 58, + true, + "etc", + "etc." + ], + [ + "sentence", + "", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 3379820760040661991, + 9630040689393067965, + 18446744073709551615, + 18446744073709551615, + 0, + 71, + 0, + 71, + 0, + 12, + true, + "From a scaling perspective, this task is again embarrassingly parallel.", + "From a scaling perspective, this task is again embarrassingly parallel." + ], + [ + "sentence", + "", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 9719846915474708286, + 1724806033594978345, + 18446744073709551615, + 18446744073709551615, + 72, + 225, + 72, + 225, + 12, + 39, + true, + "Unlike the extraction task, the annotation task is not creating new data entities, but rather appending new data associated with an existing data entity.", + "Unlike the extraction task, the annotation task is not creating new data entities, but rather appending new data associated with an existing data entity." + ], + [ + "sentence", + "", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 6497043505161373931, + 3986341826774012937, + 18446744073709551615, + 18446744073709551615, + 226, + 421, + 226, + 421, + 39, + 71, + true, + "We simply apply the desired entity and relationship annotators on all document components (paragraphs, tables, etc.) in parallel by distributing the operations on all available compute resources.", + "We simply apply the desired entity and relationship annotators on all document components (paragraphs, tables, etc.) in parallel by distributing the operations on all available compute resources." + ], + [ + "sentence", + "", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 12102171329672562237, + 1970937600463832378, + 18446744073709551615, + 18446744073709551615, + 422, + 479, + 422, + 479, + 71, + 79, + true, + "Annotation tasks have no internal synchronization points.", + "Annotation tasks have no internal synchronization points." + ], + [ + "sentence", + "", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 13423572652191948422, + 13417187127495456897, + 18446744073709551615, + 18446744073709551615, + 480, + 570, + 480, + 570, + 79, + 96, + true, + "From a corpus of about 100 000 documents, we typically extract about 3 million paragraphs.", + "From a corpus of about 100 000 documents, we typically extract about 3 million paragraphs." + ], + [ + "sentence", + "", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 10872421192555193150, + 7255995964890387843, + 18446744073709551615, + 18446744073709551615, + 571, + 687, + 571, + 687, + 96, + 113, + true, + "Assuming unlimited resources, the annotation task could be distributed to potentially 3 million independent workers.", + "Assuming unlimited resources, the annotation task could be distributed to potentially 3 million independent workers." + ], + [ + "term", + "enum-term-mark-2", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 15819622148818218229, + 9356434918116130285, + 18446744073709551615, + 18446744073709551615, + 254, + 277, + 254, + 277, + 44, + 47, + true, + "entity and relationship", + "entity and relationship" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 14759509177149592461, + 3924802885839203615, + 18446744073709551615, + 18446744073709551615, + 7, + 26, + 7, + 26, + 2, + 4, + true, + "scaling perspective", + "scaling perspective" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 5101579281631733460, + 16368993288317499605, + 18446744073709551615, + 18446744073709551615, + 83, + 98, + 83, + 98, + 14, + 16, + true, + "extraction task", + "extraction task" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 4147688156856812386, + 10246888153284075300, + 18446744073709551615, + 18446744073709551615, + 104, + 119, + 104, + 119, + 18, + 20, + true, + "annotation task", + "annotation task" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 8165740181202876025, + 2261200330077561310, + 18446744073709551615, + 18446744073709551615, + 136, + 153, + 136, + 153, + 23, + 26, + true, + "new data entities", + "new data entities" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 14814151107054759097, + 7178764862843143005, + 18446744073709551615, + 18446744073709551615, + 176, + 184, + 176, + 184, + 30, + 32, + true, + "new data", + "new data" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 4106840074686891911, + 5880300922269562472, + 18446744073709551615, + 18446744073709551615, + 213, + 224, + 213, + 224, + 36, + 38, + true, + "data entity", + "data entity" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 398121334352453215, + 4070873708922197400, + 18446744073709551615, + 18446744073709551615, + 265, + 288, + 265, + 288, + 46, + 48, + true, + "relationship annotators", + "relationship annotators" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 17524405716142769441, + 4212954867232897467, + 18446744073709551615, + 18446744073709551615, + 296, + 315, + 296, + 315, + 50, + 52, + true, + "document components", + "document components" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 11188780108166616166, + 18209882539381064623, + 18446744073709551615, + 18446744073709551615, + 393, + 420, + 393, + 420, + 67, + 70, + true, + "available compute resources", + "available compute resources" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 10983505580038966516, + 1311386809729224802, + 18446744073709551615, + 18446744073709551615, + 422, + 438, + 422, + 438, + 71, + 73, + true, + "Annotation tasks", + "Annotation tasks" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 15507406252536266458, + 8871487447783311304, + 18446744073709551615, + 18446744073709551615, + 447, + 478, + 447, + 478, + 75, + 78, + true, + "internal synchronization points", + "internal synchronization points" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 8014457366322397053, + 205843253777526494, + 18446744073709551615, + 18446744073709551615, + 551, + 569, + 551, + 569, + 93, + 95, + true, + "million paragraphs", + "million paragraphs" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 12229168076678998995, + 12510724719610977574, + 18446744073709551615, + 18446744073709551615, + 580, + 599, + 580, + 599, + 97, + 99, + true, + "unlimited resources", + "unlimited resources" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 4147688156856812386, + 10246888153283929577, + 18446744073709551615, + 18446744073709551615, + 605, + 620, + 605, + 620, + 101, + 103, + true, + "annotation task", + "annotation task" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 13272446480061434936, + 11845447386382530535, + 18446744073709551615, + 18446744073709551615, + 659, + 686, + 659, + 686, + 109, + 112, + true, + "million independent workers", + "million independent workers" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 389609625631210899, + 12900735516089250680, + 18446744073709551615, + 18446744073709551615, + 33, + 37, + 33, + 37, + 6, + 7, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 16381206564577775616, + 1763085460560834451, + 18446744073709551615, + 18446744073709551615, + 254, + 260, + 254, + 260, + 44, + 45, + true, + "entity", + "entity" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 13968965538538956038, + 5978660594977828560, + 18446744073709551615, + 18446744073709551615, + 317, + 327, + 317, + 327, + 53, + 54, + true, + "paragraphs", + "paragraphs" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 16381206513098478539, + 17629065645663027433, + 18446744073709551615, + 18446744073709551615, + 329, + 335, + 329, + 335, + 55, + 56, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 14814034872218884114, + 8691748728752973432, + 18446744073709551615, + 18446744073709551615, + 346, + 354, + 346, + 354, + 60, + 61, + true, + "parallel", + "parallel" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 13985988710970420061, + 11652619962770153512, + 18446744073709551615, + 18446744073709551615, + 375, + 385, + 375, + 385, + 64, + 65, + true, + "operations", + "operations" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 16381206562408205435, + 7059006330358361907, + 18446744073709551615, + 18446744073709551615, + 487, + 493, + 487, + 493, + 81, + 82, + true, + "corpus", + "corpus" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 6167933651658664291, + 5381718186587203635, + 18446744073709551615, + 18446744073709551615, + 511, + 520, + 511, + 520, + 86, + 87, + true, + "documents", + "documents" + ], + [ + "numval", + "fval", + 2617775076168299948, + "TEXT", + "#/texts/55", + 1.0, + 12178341415896439119, + 18028276311967117811, + 18446744073709551615, + 18446744073709551615, + 0, + 3, + 0, + 3, + 0, + 0, + false, + "2.1", + "2.1" + ], + [ + "numval", + "ival", + 2617775076168299948, + "TEXT", + "#/texts/55", + 1.0, + 17767354399704235163, + 11990453707355571146, + 18446744073709551615, + 18446744073709551615, + 4, + 5, + 4, + 5, + 0, + 1, + false, + "3", + "3" + ], + [ + "expression", + "wtoken-concatenation", + 2617775076168299948, + "TEXT", + "#/texts/55", + 1.0, + 329104147711421775, + 6272026767940968313, + 18446744073709551615, + 18446744073709551615, + 0, + 5, + 0, + 5, + 0, + 1, + true, + "2.1.3", + "2.1.3" + ], + [ + "numval", + "ival", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 17767354399704235161, + 6534238515883477149, + 18446744073709551615, + 18446744073709551615, + 488, + 489, + 487, + 488, + 88, + 89, + true, + "1", + "1" + ], + [ + "parenthesis", + "round brackets", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 13741485124789240495, + 7389280086660259889, + 18446744073709551615, + 18446744073709551615, + 1392, + 1412, + 1391, + 1411, + 240, + 246, + true, + "(or a newly created)", + "(or a newly created)" + ], + [ + "sentence", + "", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 11367271216130118432, + 10099806209711852677, + 18446744073709551615, + 18446744073709551615, + 0, + 165, + 0, + 165, + 0, + 32, + true, + "The aggregation task for entities is similar to an extraction task, in the sense that we create new entities and link them each to the source they were mentioned in.", + "The aggregation task for entities is similar to an extraction task, in the sense that we create new entities and link them each to the source they were mentioned in." + ], + [ + "sentence", + "", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 13872144822242467415, + 12063372182631901768, + 18446744073709551615, + 18446744073709551615, + 166, + 296, + 166, + 295, + 32, + 53, + true, + "In addition to extraction, the entity aggregation task also applies a similarity metric \u00b6 between the entities during extraction.", + "In addition to extraction, the entity aggregation task also applies a similarity metric \u00b6 between the entities during extraction." + ], + [ + "sentence", + "", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 5037884732683442691, + 10436193456753512017, + 18446744073709551615, + 18446744073709551615, + 297, + 477, + 296, + 476, + 53, + 86, + true, + "This similarity metric will define if two entities refer to the same language concept and thus need to be represented by a single entity in the KG, rather than remaining separated.", + "This similarity metric will define if two entities refer to the same language concept and thus need to be represented by a single entity in the KG, rather than remaining separated." + ], + [ + "sentence", + "", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14134432982956085165, + 12558270821582062646, + 18446744073709551615, + 18446744073709551615, + 478, + 600, + 477, + 599, + 86, + 107, + true, + "In Figure 1, we have illustrated the aggregation task for two types of entities across many different document components.", + "In Figure 1, we have illustrated the aggregation task for two types of entities across many different document components." + ], + [ + "sentence", + "", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14785096463026995035, + 7734917022209301213, + 18446744073709551615, + 18446744073709551615, + 601, + 711, + 600, + 710, + 107, + 124, + true, + "These entity types could be for example materials and properties or geological formations and geological ages.", + "These entity types could be for example materials and properties or geological formations and geological ages." + ], + [ + "sentence", + "", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 2993305092365996286, + 17194587297594850406, + 18446744073709551615, + 18446744073709551615, + 712, + 912, + 711, + 911, + 124, + 162, + true, + "The links connecting the new entities to their source entity are weighted according to the frequency of the match, that is, we set a higher weight if the language entity has been found multiple times.", + "The links connecting the new entities to their source entity are weighted according to the frequency of the match, that is, we set a higher weight if the language entity has been found multiple times." + ], + [ + "sentence", + "", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 8311220471757940334, + 2005274358531069313, + 18446744073709551615, + 18446744073709551615, + 913, + 999, + 912, + 998, + 162, + 177, + true, + "From an implementation point of view, the aggregation task for entities is nontrivial.", + "From an implementation point of view, the aggregation task for entities is nontrivial." + ], + [ + "sentence", + "", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 15423286190643719046, + 11690064648926497115, + 18446744073709551615, + 18446744073709551615, + 1000, + 1066, + 999, + 1065, + 177, + 188, + true, + "In distributed computing, it corresponds to a reduction operation.", + "In distributed computing, it corresponds to a reduction operation." + ], + [ + "sentence", + "", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 16888006153925329926, + 2859445480279325516, + 18446744073709551615, + 18446744073709551615, + 1067, + 1179, + 1066, + 1178, + 188, + 203, + true, + "Our implementation distributes the iteration of the source elements among all available computational resources.", + "Our implementation distributes the iteration of the source elements among all available computational resources." + ], + [ + "sentence", + "", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 10754148152517303717, + 16101411456265296552, + 18446744073709551615, + 18446744073709551615, + 1180, + 1323, + 1179, + 1322, + 203, + 229, + true, + "The aggregation is first performed in a local buffer, which is then synchronized with the backend database only when it reaches a maximum size.", + "The aggregation is first performed in a local buffer, which is then synchronized with the backend database only when it reaches a maximum size." + ], + [ + "sentence", + "", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14105785377218888565, + 4351972894165463037, + 18446744073709551615, + 18446744073709551615, + 1324, + 1429, + 1323, + 1428, + 229, + 249, + true, + "The synchronization step is a simple atomic update into an existing (or a newly created) database object.", + "The synchronization step is a simple atomic update into an existing (or a newly created) database object." + ], + [ + "sentence", + "", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 12813104783910172537, + 2053442892096489490, + 18446744073709551615, + 18446744073709551615, + 1430, + 1517, + 1429, + 1516, + 249, + 264, + true, + "The synchronization for updates from each worker task does not collide with the others.", + "The synchronization for updates from each worker task does not collide with the others." + ], + [ + "term", + "enum-term-mark-3", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 15083712120508435047, + 2420676786096945602, + 18446744073709551615, + 18446744073709551615, + 641, + 665, + 640, + 664, + 114, + 117, + true, + "materials and properties", + "materials and properties" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9614479359601927568, + 14941729413387470974, + 18446744073709551615, + 18446744073709551615, + 4, + 20, + 4, + 20, + 1, + 3, + true, + "aggregation task", + "aggregation task" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 5101579281631733460, + 17565787919127588672, + 18446744073709551615, + 18446744073709551615, + 51, + 66, + 51, + 66, + 9, + 11, + true, + "extraction task", + "extraction task" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 15983101815597714776, + 70459589720017255, + 18446744073709551615, + 18446744073709551615, + 96, + 108, + 96, + 108, + 18, + 20, + true, + "new entities", + "new entities" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 6746037511376410145, + 11728944427356384927, + 18446744073709551615, + 18446744073709551615, + 197, + 220, + 197, + 220, + 38, + 41, + true, + "entity aggregation task", + "entity aggregation task" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14708728462097652999, + 15263769781535282609, + 18446744073709551615, + 18446744073709551615, + 236, + 256, + 236, + 255, + 44, + 47, + true, + "similarity metric \u00b6", + "similarity metric \u00b6" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 12525375003379905483, + 6952514242140296043, + 18446744073709551615, + 18446744073709551615, + 361, + 382, + 360, + 381, + 64, + 67, + true, + "same language concept", + "same language concept" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 7459064322160295058, + 16552287479938986451, + 18446744073709551615, + 18446744073709551615, + 420, + 433, + 419, + 432, + 75, + 77, + true, + "single entity", + "single entity" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9614479359601927568, + 14941729413387177248, + 18446744073709551615, + 18446744073709551615, + 515, + 531, + 514, + 530, + 94, + 96, + true, + "aggregation task", + "aggregation task" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 12805042060785076198, + 13666698149907793309, + 18446744073709551615, + 18446744073709551615, + 565, + 599, + 564, + 598, + 102, + 106, + true, + "many different document components", + "many different document components" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 7246756799784975282, + 3536616615701721498, + 18446744073709551615, + 18446744073709551615, + 607, + 619, + 606, + 618, + 108, + 110, + true, + "entity types", + "entity types" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 8873555710417801716, + 11367606112035812080, + 18446744073709551615, + 18446744073709551615, + 633, + 650, + 632, + 649, + 113, + 115, + true, + "example materials", + "example materials" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9648537698556423826, + 5670564797859801155, + 18446744073709551615, + 18446744073709551615, + 669, + 690, + 668, + 689, + 118, + 120, + true, + "geological formations", + "geological formations" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9663226904190425014, + 7389081649832461915, + 18446744073709551615, + 18446744073709551615, + 695, + 710, + 694, + 709, + 121, + 123, + true, + "geological ages", + "geological ages" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 15983101815597714776, + 70459589720141381, + 18446744073709551615, + 18446744073709551615, + 737, + 749, + 736, + 748, + 128, + 130, + true, + "new entities", + "new entities" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 17621545813270270871, + 12868479750299862546, + 18446744073709551615, + 18446744073709551615, + 759, + 772, + 758, + 771, + 132, + 134, + true, + "source entity", + "source entity" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 11068190489576906314, + 3485226378154122039, + 18446744073709551615, + 18446744073709551615, + 866, + 881, + 865, + 880, + 154, + 156, + true, + "language entity", + "language entity" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 17200993877087579427, + 12244708574954642458, + 18446744073709551615, + 18446744073709551615, + 897, + 911, + 896, + 910, + 159, + 161, + true, + "multiple times", + "multiple times" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 469538851381879616, + 14202402575145459277, + 18446744073709551615, + 18446744073709551615, + 921, + 941, + 920, + 940, + 164, + 166, + true, + "implementation point", + "implementation point" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9614479359601927568, + 14941729413387159766, + 18446744073709551615, + 18446744073709551615, + 955, + 971, + 954, + 970, + 170, + 172, + true, + "aggregation task", + "aggregation task" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9283617342877675041, + 15873184834718781399, + 18446744073709551615, + 18446744073709551615, + 1046, + 1065, + 1045, + 1064, + 185, + 187, + true, + "reduction operation", + "reduction operation" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 15765378636786371769, + 15305975287346165990, + 18446744073709551615, + 18446744073709551615, + 1119, + 1134, + 1118, + 1133, + 195, + 197, + true, + "source elements", + "source elements" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14336868642439968330, + 10406918958800501535, + 18446744073709551615, + 18446744073709551615, + 1145, + 1178, + 1144, + 1177, + 199, + 202, + true, + "available computational resources", + "available computational resources" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 4976485978415387103, + 17001722156673714678, + 18446744073709551615, + 18446744073709551615, + 1220, + 1232, + 1219, + 1231, + 210, + 212, + true, + "local buffer", + "local buffer" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 8498518363315513669, + 5745264438629505035, + 18446744073709551615, + 18446744073709551615, + 1270, + 1286, + 1269, + 1285, + 219, + 221, + true, + "backend database", + "backend database" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 11218037041441406912, + 11691128466607537398, + 18446744073709551615, + 18446744073709551615, + 1310, + 1322, + 1309, + 1321, + 226, + 228, + true, + "maximum size", + "maximum size" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9096796183603350034, + 9964838906647012705, + 18446744073709551615, + 18446744073709551615, + 1328, + 1348, + 1327, + 1347, + 230, + 232, + true, + "synchronization step", + "synchronization step" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 10270216220990789707, + 15932172102865871238, + 18446744073709551615, + 18446744073709551615, + 1354, + 1374, + 1353, + 1373, + 234, + 237, + true, + "simple atomic update", + "simple atomic update" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 13739718539243203510, + 2110945273060730382, + 18446744073709551615, + 18446744073709551615, + 1413, + 1428, + 1412, + 1427, + 246, + 248, + true, + "database object", + "database object" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9601975787072124180, + 17970814035593313155, + 18446744073709551615, + 18446744073709551615, + 1472, + 1483, + 1471, + 1482, + 255, + 257, + true, + "worker task", + "worker task" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14652256560445338257, + 5402311031787560493, + 18446744073709551615, + 18446744073709551615, + 25, + 33, + 25, + 33, + 4, + 5, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 329104161787480235, + 14057633201980204607, + 18446744073709551615, + 18446744073709551615, + 75, + 80, + 75, + 80, + 14, + 15, + true, + "sense", + "sense" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 16381206579112188113, + 11083441268366703806, + 18446744073709551615, + 18446744073709551615, + 135, + 141, + 135, + 141, + 26, + 27, + true, + "source", + "source" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14650447861280948245, + 2874163810587468663, + 18446744073709551615, + 18446744073709551615, + 169, + 177, + 169, + 177, + 33, + 34, + true, + "addition", + "addition" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 5303544497514782120, + 1457149129547678585, + 18446744073709551615, + 18446744073709551615, + 181, + 191, + 181, + 191, + 35, + 36, + true, + "extraction", + "extraction" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14652256560445338257, + 5402311031787578846, + 18446744073709551615, + 18446744073709551615, + 269, + 277, + 268, + 276, + 49, + 50, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 5303544497514782120, + 1457149129547684243, + 18446744073709551615, + 18446744073709551615, + 285, + 295, + 284, + 294, + 51, + 52, + true, + "extraction", + "extraction" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14087387480976987019, + 17412828544627723541, + 18446744073709551615, + 18446744073709551615, + 302, + 312, + 301, + 311, + 54, + 55, + true, + "similarity", + "similarity" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14652256560445338257, + 5402311031787574259, + 18446744073709551615, + 18446744073709551615, + 339, + 347, + 338, + 346, + 60, + 61, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 15441160910541480204, + 12103870215605198083, + 18446744073709551615, + 18446744073709551615, + 441, + 443, + 440, + 442, + 79, + 80, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 16381206514091025767, + 5867212895741027859, + 18446744073709551615, + 18446744073709551615, + 481, + 487, + 480, + 486, + 87, + 88, + true, + "Figure", + "Figure" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 329104159243796903, + 5660915257099444274, + 18446744073709551615, + 18446744073709551615, + 540, + 545, + 539, + 544, + 98, + 99, + true, + "types", + "types" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14652256560445338257, + 5402311031787526504, + 18446744073709551615, + 18446744073709551615, + 549, + 557, + 548, + 556, + 100, + 101, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14088628410271132453, + 2902683368439961232, + 18446744073709551615, + 18446744073709551615, + 655, + 665, + 654, + 664, + 116, + 117, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 329104161597470987, + 14056796539026990417, + 18446744073709551615, + 18446744073709551615, + 716, + 721, + 715, + 720, + 125, + 126, + true, + "links", + "links" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 6184772648035902755, + 13644276956832070572, + 18446744073709551615, + 18446744073709551615, + 803, + 812, + 802, + 811, + 139, + 140, + true, + "frequency", + "frequency" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 329104161505834046, + 14058488092625120224, + 18446744073709551615, + 18446744073709551615, + 820, + 825, + 819, + 824, + 142, + 143, + true, + "match", + "match" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 16381206557786164800, + 13942859270411079637, + 18446744073709551615, + 18446744073709551615, + 852, + 858, + 851, + 857, + 151, + 152, + true, + "weight", + "weight" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 389609625619349298, + 2424396184693639955, + 18446744073709551615, + 18446744073709551615, + 945, + 949, + 944, + 948, + 167, + 168, + true, + "view", + "view" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14652256560445338257, + 5402311031783443666, + 18446744073709551615, + 18446744073709551615, + 976, + 984, + 975, + 983, + 173, + 174, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 5947879507992892292, + 10649999492811896566, + 18446744073709551615, + 18446744073709551615, + 1015, + 1024, + 1014, + 1023, + 179, + 180, + true, + "computing", + "computing" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 16770038681622514616, + 4247418780695300556, + 18446744073709551615, + 18446744073709551615, + 1071, + 1085, + 1070, + 1084, + 189, + 190, + true, + "implementation", + "implementation" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 6182474587515941250, + 2243782707457437586, + 18446744073709551615, + 18446744073709551615, + 1102, + 1111, + 1101, + 1110, + 192, + 193, + true, + "iteration", + "iteration" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 844664518895955636, + 7006404085999929209, + 18446744073709551615, + 18446744073709551615, + 1184, + 1195, + 1183, + 1194, + 204, + 205, + true, + "aggregation", + "aggregation" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 17812108117212345078, + 17863514596000506382, + 18446744073709551615, + 18446744073709551615, + 1434, + 1449, + 1433, + 1448, + 250, + 251, + true, + "synchronization", + "synchronization" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 8106478011916155592, + 11450752277701638419, + 18446744073709551615, + 18446744073709551615, + 1454, + 1461, + 1453, + 1460, + 252, + 253, + true, + "updates", + "updates" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 16381206566198176359, + 13844350319778229358, + 18446744073709551615, + 18446744073709551615, + 1510, + 1516, + 1509, + 1515, + 262, + 263, + true, + "others", + "others" + ], + [ + "numval", + "fval", + 5985285694705576020, + "TEXT", + "#/texts/57", + 1.0, + 12178341415896439119, + 9356899144609064731, + 18446744073709551615, + 18446744073709551615, + 0, + 3, + 0, + 3, + 0, + 0, + false, + "2.1", + "2.1" + ], + [ + "numval", + "ival", + 5985285694705576020, + "TEXT", + "#/texts/57", + 1.0, + 17767354399704235156, + 5166044511235843509, + 18446744073709551615, + 18446744073709551615, + 4, + 5, + 4, + 5, + 0, + 1, + false, + "4", + "4" + ], + [ + "expression", + "wtoken-concatenation", + 5985285694705576020, + "TEXT", + "#/texts/57", + 1.0, + 329104147711421772, + 3440265738096889757, + 18446744073709551615, + 18446744073709551615, + 0, + 5, + 0, + 5, + 0, + 1, + true, + "2.1.4", + "2.1.4" + ], + [ + "numval", + "ival", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 17767354399704235161, + 6700456192654799825, + 18446744073709551615, + 18446744073709551615, + 145, + 146, + 145, + 146, + 21, + 22, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 17767354399704235161, + 6700456192654780632, + 18446744073709551615, + 18446744073709551615, + 382, + 383, + 382, + 383, + 63, + 64, + true, + "1", + "1" + ], + [ + "name", + "name-concatenation", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 2223855877560943312, + 1919053918596705356, + 18446744073709551615, + 18446744073709551615, + 314, + 328, + 314, + 328, + 52, + 55, + true, + "Egret-Hibernia", + "Egret-Hibernia" + ], + [ + "expression", + "wtoken-concatenation", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 329104147807359846, + 8954447317539411217, + 18446744073709551615, + 18446744073709551615, + 357, + 362, + 357, + 362, + 59, + 60, + true, + "D'Arc", + "D'Arc" + ], + [ + "sentence", + "", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 5318507464051547992, + 15229729095805195430, + 18446744073709551615, + 18446744073709551615, + 0, + 134, + 0, + 134, + 0, + 19, + true, + "The aggregation of relationships introduces new links between the entities that were aggregated in the previous aggregation operation.", + "The aggregation of relationships introduces new links between the entities that were aggregated in the previous aggregation operation." + ], + [ + "sentence", + "", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 7339777553356328117, + 17064922654164515372, + 18446744073709551615, + 18446744073709551615, + 135, + 270, + 135, + 270, + 19, + 43, + true, + "In Figure 1, this task is depicted as the last operation, where entities with an annotated relationship are explicitly linked together.", + "In Figure 1, this task is depicted as the last operation, where entities with an annotated relationship are explicitly linked together." + ], + [ + "sentence", + "", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 16272411550008296662, + 11555295498174731192, + 18446744073709551615, + 18446744073709551615, + 271, + 384, + 271, + 384, + 43, + 65, + true, + "For example, we create an edge between the Egret-Hibernia Petroleum System and Jeanne D'Arc Basin from Listing 1.", + "For example, we create an edge between the Egret-Hibernia Petroleum System and Jeanne D'Arc Basin from Listing 1." + ], + [ + "term", + "enum-term-mark-4", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 16039672247523329505, + 15779820031469871158, + 18446744073709551615, + 18446744073709551615, + 320, + 368, + 320, + 368, + 54, + 61, + true, + "Hibernia Petroleum System and Jeanne D'Arc Basin", + "Hibernia Petroleum System and Jeanne D'Arc Basin" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 6172031743812195918, + 8603792663584797402, + 18446744073709551615, + 18446744073709551615, + 44, + 53, + 44, + 53, + 5, + 7, + true, + "new links", + "new links" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 17436612889402329741, + 257686206738688629, + 18446744073709551615, + 18446744073709551615, + 103, + 133, + 103, + 133, + 15, + 18, + true, + "previous aggregation operation", + "previous aggregation operation" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 4041064346196287786, + 12193344528431002782, + 18446744073709551615, + 18446744073709551615, + 177, + 191, + 177, + 191, + 29, + 31, + true, + "last operation", + "last operation" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 17634022196856315426, + 12810331327669295159, + 18446744073709551615, + 18446744073709551615, + 216, + 238, + 216, + 238, + 36, + 38, + true, + "annotated relationship", + "annotated relationship" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 8984547291290070810, + 10785884644016224053, + 18446744073709551615, + 18446744073709551615, + 320, + 345, + 320, + 345, + 54, + 57, + true, + "Hibernia Petroleum System", + "Hibernia Petroleum System" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 17296406944230595237, + 9123925137580829147, + 18446744073709551615, + 18446744073709551615, + 350, + 368, + 350, + 368, + 58, + 61, + true, + "Jeanne D'Arc Basin", + "Jeanne D'Arc Basin" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 844664518895955636, + 4707743503550014047, + 18446744073709551615, + 18446744073709551615, + 4, + 15, + 4, + 15, + 1, + 2, + true, + "aggregation", + "aggregation" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 8279380567349713241, + 4993685421577464738, + 18446744073709551615, + 18446744073709551615, + 19, + 32, + 19, + 32, + 3, + 4, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 14652256560445338257, + 12349417693150165423, + 18446744073709551615, + 18446744073709551615, + 66, + 74, + 66, + 74, + 9, + 10, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 16381206514091025767, + 1041958781247576177, + 18446744073709551615, + 18446744073709551615, + 138, + 144, + 138, + 144, + 20, + 21, + true, + "Figure", + "Figure" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 389609625631210899, + 4226532899380792628, + 18446744073709551615, + 18446744073709551615, + 153, + 157, + 153, + 157, + 24, + 25, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 14652256560445338257, + 12349417693150135191, + 18446744073709551615, + 18446744073709551615, + 199, + 207, + 199, + 207, + 33, + 34, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 8106397496085150773, + 13532347169213908612, + 18446744073709551615, + 18446744073709551615, + 275, + 282, + 275, + 282, + 44, + 45, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 389609625699630670, + 4202499610317382249, + 18446744073709551615, + 18446744073709551615, + 297, + 301, + 297, + 301, + 49, + 50, + true, + "edge", + "edge" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 389609625548777262, + 8826555294676663632, + 18446744073709551615, + 18446744073709551615, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 389609625548777251, + 8826555296349648778, + 18446744073709551615, + 18446744073709551615, + 119, + 123, + 119, + 123, + 14, + 14, + false, + "2023", + "2023" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 8104408072666212335, + 13552219042525319352, + 18446744073709551615, + 18446744073709551615, + 71, + 78, + 71, + 78, + 8, + 8, + false, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 389609625548868096, + 8826558551385119058, + 18446744073709551615, + 18446744073709551615, + 82, + 86, + 82, + 86, + 8, + 9, + false, + "2.20", + "2.20" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 14654386914267794441, + 12796143052106760105, + 18446744073709551615, + 18446744073709551615, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 17767354399704235162, + 7753390158484899261, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 15441160910541481791, + 3518619573290839093, + 18446744073709551615, + 18446744073709551615, + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 15441160910541481543, + 3518617976696906498, + 18446744073709551615, + 18446744073709551615, + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 8536069645534292969, + 16063604623463467342, + 18446744073709551615, + 18446744073709551615, + 35, + 87, + 35, + 87, + 8, + 10, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 594099663775968682, + 14698211805947073928, + 18446744073709551615, + 18446744073709551615, + 156, + 208, + 156, + 208, + 22, + 37, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 1697220653346092555, + 8458710314769009562, + 18446744073709551615, + 18446744073709551615, + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 12213187056216195918, + 14309760985361468471, + 18446744073709551615, + 18446744073709551615, + 155, + 209, + 155, + 209, + 21, + 38, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 3856967589249015473, + 3576147774941915841, + 18446744073709551615, + 18446744073709551615, + 35, + 86, + 35, + 86, + 8, + 9, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "sentence", + "", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 10933383461306782608, + 10178418358179275356, + 18446744073709551615, + 18446744073709551615, + 19, + 125, + 19, + 125, + 6, + 16, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 12466457873768409517, + 3430070082404029638, + 18446744073709551615, + 18446744073709551615, + 88, + 108, + 88, + 108, + 10, + 13, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 12466457873768409517, + 3430070082403846184, + 18446744073709551615, + 18446744073709551615, + 213, + 233, + 213, + 233, + 39, + 42, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 10086796047802705645, + 11637015082128438412, + 18446744073709551615, + 18446744073709551615, + 252, + 263, + 252, + 263, + 47, + 49, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 6687370681685741393, + 17939310132506951168, + 18446744073709551615, + 18446744073709551615, + 284, + 319, + 284, + 319, + 53, + 57, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 329104161846385964, + 16017248647642597247, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 18, + 19, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 969969168017005656, + 2961182532179915323, + 18446744073709551615, + 18446744073709551615, + 144, + 154, + 144, + 154, + 20, + 21, + true, + "Conditions", + "Conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 329104161825278214, + 16021621362593374209, + 18446744073709551615, + 18446744073709551615, + 238, + 243, + 238, + 243, + 43, + 44, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 12178341415895516060, + 12061595171928625555, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 45, + 46, + true, + "use", + "use" + ], + [ + "numval", + "ival", + 4361549266576336732, + "TEXT", + "#/texts/60", + 1.0, + 17767354399704235158, + 5655206626033153623, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "6", + "6" + ], + [ + "numval", + "ival", + 4361549266576336732, + "TEXT", + "#/texts/60", + 1.0, + 15441160910541481979, + 15406507443958837158, + 18446744073709551615, + 18446744073709551615, + 3, + 5, + 3, + 5, + 2, + 3, + true, + "15", + "15" + ], + [ + "sentence", + "", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 1989015336043033185, + 17681587218053733993, + 18446744073709551615, + 18446744073709551615, + 0, + 104, + 0, + 104, + 0, + 17, + true, + "Similar to the aggregation of entities, the aggregation task for relationships is a reduction operation.", + "Similar to the aggregation of entities, the aggregation task for relationships is a reduction operation." + ], + [ + "sentence", + "", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 7530777968862609021, + 1101347999983368416, + 18446744073709551615, + 18446744073709551615, + 105, + 199, + 105, + 199, + 17, + 30, + true, + "Two independent document components could describe the same relationship between two entities.", + "Two independent document components could describe the same relationship between two entities." + ], + [ + "sentence", + "", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 18319377738763554945, + 17590146835780286845, + 18446744073709551615, + 18446744073709551615, + 200, + 405, + 200, + 405, + 30, + 65, + true, + "To minimize the synchronization lookup operation with the backend database, this task also utilizes a local buffer which accumulates the changes to be committed to the KG until the maximum size is reached.", + "To minimize the synchronization lookup operation with the backend database, this task also utilizes a local buffer which accumulates the changes to be committed to the KG until the maximum size is reached." + ], + [ + "sentence", + "", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 13293192132043146016, + 5012103471707666411, + 18446744073709551615, + 18446744073709551615, + 406, + 564, + 406, + 564, + 65, + 89, + true, + "This approach allows to distribute the computation among all the source document components and performs very few blocking operations in the backend database.", + "This approach allows to distribute the computation among all the source document components and performs very few blocking operations in the backend database." + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 9614479359601927568, + 11015701264450843984, + 18446744073709551615, + 18446744073709551615, + 44, + 60, + 44, + 60, + 8, + 10, + true, + "aggregation task", + "aggregation task" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 9283617342877675041, + 1437171259999206723, + 18446744073709551615, + 18446744073709551615, + 84, + 103, + 84, + 103, + 14, + 16, + true, + "reduction operation", + "reduction operation" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 1050457497437719405, + 14594207665656202151, + 18446744073709551615, + 18446744073709551615, + 105, + 140, + 105, + 140, + 17, + 21, + true, + "Two independent document components", + "Two independent document components" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 11267724614336150621, + 8843317855331358448, + 18446744073709551615, + 18446744073709551615, + 160, + 177, + 160, + 177, + 24, + 26, + true, + "same relationship", + "same relationship" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 11690899603853993269, + 1940567799173253407, + 18446744073709551615, + 18446744073709551615, + 216, + 248, + 216, + 248, + 33, + 36, + true, + "synchronization lookup operation", + "synchronization lookup operation" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 8498518363315513669, + 11874681077770548005, + 18446744073709551615, + 18446744073709551615, + 258, + 274, + 258, + 274, + 38, + 40, + true, + "backend database", + "backend database" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 4976485978415387103, + 13157458319015694846, + 18446744073709551615, + 18446744073709551615, + 302, + 314, + 302, + 314, + 46, + 48, + true, + "local buffer", + "local buffer" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 11218037041441406912, + 710943453708216350, + 18446744073709551615, + 18446744073709551615, + 381, + 393, + 381, + 393, + 60, + 62, + true, + "maximum size", + "maximum size" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 13327256571445639908, + 12955231630551139133, + 18446744073709551615, + 18446744073709551615, + 471, + 497, + 471, + 497, + 75, + 78, + true, + "source document components", + "source document components" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 8498518363315513669, + 11874681077770959213, + 18446744073709551615, + 18446744073709551615, + 547, + 563, + 547, + 563, + 86, + 88, + true, + "backend database", + "backend database" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 844664518895955636, + 6942483173811422672, + 18446744073709551615, + 18446744073709551615, + 15, + 26, + 15, + 26, + 3, + 4, + true, + "aggregation", + "aggregation" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 14652256560445338257, + 16076905631701172695, + 18446744073709551615, + 18446744073709551615, + 30, + 38, + 30, + 38, + 5, + 6, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 8279380567349713241, + 4443872868806033431, + 18446744073709551615, + 18446744073709551615, + 65, + 78, + 65, + 78, + 11, + 12, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 14652256560445338257, + 16076905631701100415, + 18446744073709551615, + 18446744073709551615, + 190, + 198, + 190, + 198, + 28, + 29, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 389609625631210899, + 6515706229842801309, + 18446744073709551615, + 18446744073709551615, + 281, + 285, + 281, + 285, + 42, + 43, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 8106396967856974361, + 16862662699097952271, + 18446744073709551615, + 18446744073709551615, + 337, + 344, + 337, + 344, + 51, + 52, + true, + "changes", + "changes" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 15441160910541480204, + 7076021896677418682, + 18446744073709551615, + 18446744073709551615, + 368, + 370, + 368, + 370, + 57, + 58, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 14650448032998792781, + 18358605601302965052, + 18446744073709551615, + 18446744073709551615, + 411, + 419, + 411, + 419, + 66, + 67, + true, + "approach", + "approach" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 14747625504171261759, + 12271672629498529699, + 18446744073709551615, + 18446744073709551615, + 445, + 456, + 445, + 456, + 71, + 72, + true, + "computation", + "computation" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 13985988710970420061, + 7242007196001838848, + 18446744073709551615, + 18446744073709551615, + 529, + 539, + 529, + 539, + 83, + 84, + true, + "operations", + "operations" + ], + [ + "numval", + "fval", + 5371685212527510397, + "TEXT", + "#/texts/62", + 1.0, + 12178341415896439118, + 9239884836110286517, + 18446744073709551615, + 18446744073709551615, + 0, + 3, + 0, + 3, + 0, + 1, + true, + "2.2", + "2.2" + ], + [ + "sentence", + "", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 5027814746067812274, + 8880743583508752189, + 18446744073709551615, + 18446744073709551615, + 0, + 144, + 0, + 144, + 0, + 29, + true, + "The purpose of a DF is to provide an execution plan for the task types detailed above in a meaningful order to generate or update a specific KG.", + "The purpose of a DF is to provide an execution plan for the task types detailed above in a meaningful order to generate or update a specific KG." + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 513252523484387603, + 17258289923281663294, + 18446744073709551615, + 18446744073709551615, + 37, + 51, + 37, + 51, + 9, + 11, + true, + "execution plan", + "execution plan" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 4681591099656035072, + 3420565556676131642, + 18446744073709551615, + 18446744073709551615, + 60, + 70, + 60, + 70, + 13, + 15, + true, + "task types", + "task types" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 15530988091855779508, + 13374334168429685199, + 18446744073709551615, + 18446744073709551615, + 91, + 107, + 91, + 107, + 19, + 21, + true, + "meaningful order", + "meaningful order" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 541002758701937407, + 10425137461922241957, + 18446744073709551615, + 18446744073709551615, + 132, + 143, + 132, + 143, + 26, + 28, + true, + "specific KG", + "specific KG" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 2892304827914802359, + 2785411801236762324, + 18446744073709551615, + 18446744073709551615, + 209, + 224, + 209, + 224, + 42, + 44, + true, + "declarative way", + "declarative way" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 8106479265948440982, + 781741948405146011, + 18446744073709551615, + 18446744073709551615, + 4, + 11, + 4, + 11, + 1, + 2, + true, + "purpose", + "purpose" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 15441160910541480770, + 14767814329685856037, + 18446744073709551615, + 18446744073709551615, + 17, + 19, + 17, + 19, + 4, + 5, + true, + "DF", + "DF" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 15441160910541480770, + 14767814329685844554, + 18446744073709551615, + 18446744073709551615, + 166, + 168, + 166, + 168, + 32, + 33, + true, + "DF", + "DF" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 17078598475728807446, + 1685386194017502435, + 18446744073709551615, + 18446744073709551615, + 182, + 193, + 182, + 193, + 37, + 38, + true, + "possibility", + "possibility" + ], + [ + "numval", + "ival", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 17767354399704235161, + 9308892477550455324, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "1", + "1" + ], + [ + "parenthesis", + "round brackets", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 6687613673164405501, + 3718808189748185983, + 18446744073709551615, + 18446744073709551615, + 97, + 178, + 97, + 178, + 16, + 33, + true, + "(eg, extract all paragraphs, tables, figures and captions from the AAPG articles)", + "(eg, extract all paragraphs, tables, figures and captions from the AAPG articles)" + ], + [ + "sentence", + "", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 17952852338970756919, + 13111419393486766397, + 18446744073709551615, + 18446744073709551615, + 3, + 179, + 3, + 179, + 2, + 34, + true, + "Which document components should be extracted from a converted corpus to form source entities (eg, extract all paragraphs, tables, figures and captions from the AAPG articles)?", + "Which document components should be extracted from a converted corpus to form source entities (eg, extract all paragraphs, tables, figures and captions from the AAPG articles)?" + ], + [ + "term", + "enum-term-mark-3", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 13927242563355017790, + 11111361558765550691, + 18446744073709551615, + 18446744073709551615, + 114, + 154, + 114, + 154, + 21, + 28, + true, + "paragraphs, tables, figures and captions", + "paragraphs, tables, figures and captions" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 17524405716142769441, + 17590820034902442787, + 18446744073709551615, + 18446744073709551615, + 9, + 28, + 9, + 28, + 3, + 5, + true, + "document components", + "document components" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 12990634353973901002, + 15215534364197625311, + 18446744073709551615, + 18446744073709551615, + 56, + 72, + 56, + 72, + 10, + 12, + true, + "converted corpus", + "converted corpus" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 15765380208127739160, + 3487088127591388980, + 18446744073709551615, + 18446744073709551615, + 81, + 96, + 81, + 96, + 14, + 16, + true, + "source entities", + "source entities" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 15509825031107342057, + 16430485838307002175, + 18446744073709551615, + 18446744073709551615, + 164, + 177, + 164, + 177, + 30, + 32, + true, + "AAPG articles", + "AAPG articles" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 15441160910541487324, + 14099211802745898044, + 18446744073709551615, + 18446744073709551615, + 98, + 100, + 98, + 100, + 17, + 18, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 13968965538538956038, + 15874836512031319554, + 18446744073709551615, + 18446744073709551615, + 114, + 124, + 114, + 124, + 21, + 22, + true, + "paragraphs", + "paragraphs" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 16381206513098478539, + 16007955502257386472, + 18446744073709551615, + 18446744073709551615, + 126, + 132, + 126, + 132, + 23, + 24, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 8106397480533647371, + 5407999228276195896, + 18446744073709551615, + 18446744073709551615, + 134, + 141, + 134, + 141, + 25, + 26, + true, + "figures", + "figures" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 14652289689770638970, + 3082690678157308456, + 18446744073709551615, + 18446744073709551615, + 146, + 154, + 146, + 154, + 27, + 28, + true, + "captions", + "captions" + ], + [ + "numval", + "ival", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 17767354399704235162, + 8832343908208005813, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "2", + "2" + ], + [ + "parenthesis", + "round brackets", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 12178341415896391104, + 1916219311504389810, + 18446744073709551615, + 18446744073709551615, + 24, + 27, + 24, + 27, + 4, + 5, + false, + "(s)", + "(s)" + ], + [ + "parenthesis", + "round brackets", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 14471732333604091421, + 18137583642465182298, + 18446744073709551615, + 18446744073709551615, + 66, + 132, + 66, + 132, + 13, + 26, + true, + "(eg, run the geology or material science annotators on paragraphs)", + "(eg, run the geology or material science annotators on paragraphs)" + ], + [ + "expression", + "wtoken-concatenation", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 14638289750758744304, + 5209943650119548686, + 18446744073709551615, + 18446744073709551615, + 19, + 27, + 19, + 27, + 4, + 5, + true, + "model(s)", + "model(s)" + ], + [ + "sentence", + "", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 5555439802994169065, + 12582050013497350866, + 18446744073709551615, + 18446744073709551615, + 3, + 133, + 3, + 133, + 2, + 27, + true, + "Which annotator model(s) to use on which type of source entity (eg, run the geology or material science annotators on paragraphs)?", + "Which annotator model(s) to use on which type of source entity (eg, run the geology or material science annotators on paragraphs)?" + ], + [ + "term", + "enum-term-mark-2", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 8092669759698512989, + 2405114237402716424, + 18446744073709551615, + 18446744073709551615, + 79, + 106, + 79, + 106, + 18, + 22, + true, + "geology or material science", + "geology or material science" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 13982724739224224965, + 14788269044995765456, + 18446744073709551615, + 18446744073709551615, + 9, + 27, + 9, + 27, + 3, + 5, + true, + "annotator model(s)", + "annotator model(s)" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 17621545813270270871, + 9209277177813395468, + 18446744073709551615, + 18446744073709551615, + 52, + 65, + 52, + 65, + 11, + 13, + true, + "source entity", + "source entity" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 8077232144599436920, + 3097313284964407388, + 18446744073709551615, + 18446744073709551615, + 90, + 117, + 90, + 117, + 20, + 23, + true, + "material science annotators", + "material science annotators" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 389609625631434316, + 1689548585610910345, + 18446744073709551615, + 18446744073709551615, + 44, + 48, + 44, + 48, + 9, + 10, + true, + "type", + "type" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 15441160910541487324, + 11728379687465099158, + 18446744073709551615, + 18446744073709551615, + 67, + 69, + 67, + 69, + 14, + 15, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 8106396492330410986, + 14716777031470552583, + 18446744073709551615, + 18446744073709551615, + 79, + 86, + 79, + 86, + 18, + 19, + true, + "geology", + "geology" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 13968965538538956038, + 9693440826264227420, + 18446744073709551615, + 18446744073709551615, + 121, + 131, + 121, + 131, + 24, + 25, + true, + "paragraphs", + "paragraphs" + ], + [ + "numval", + "ival", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 17767354399704235163, + 4307298561096377444, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "3", + "3" + ], + [ + "sentence", + "", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 5536330998509339910, + 8869846018318508353, + 18446744073709551615, + 18446744073709551615, + 3, + 101, + 3, + 101, + 2, + 17, + true, + "Which entity and relationship aggregations to perform on which set of annotated language entities?", + "Which entity and relationship aggregations to perform on which set of annotated language entities?" + ], + [ + "term", + "enum-term-mark-2", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 15819622148818218229, + 3247544755530782749, + 18446744073709551615, + 18446744073709551615, + 9, + 32, + 9, + 32, + 3, + 6, + true, + "entity and relationship", + "entity and relationship" + ], + [ + "term", + "single-term", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 10375574698259277266, + 6831591855006875406, + 18446744073709551615, + 18446744073709551615, + 20, + 45, + 20, + 45, + 5, + 7, + true, + "relationship aggregations", + "relationship aggregations" + ], + [ + "term", + "single-term", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 13768846528430928163, + 13590159867312231253, + 18446744073709551615, + 18446744073709551615, + 73, + 100, + 73, + 100, + 13, + 16, + true, + "annotated language entities", + "annotated language entities" + ], + [ + "term", + "single-term", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 16381206564577775616, + 17802925239924973922, + 18446744073709551615, + 18446744073709551615, + 9, + 15, + 9, + 15, + 3, + 4, + true, + "entity", + "entity" + ], + [ + "term", + "single-term", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 12178341415895638602, + 11898208455583787541, + 18446744073709551615, + 18446744073709551615, + 66, + 69, + 66, + 69, + 11, + 12, + true, + "set", + "set" + ], + [ + "expression", + "word-concatenation", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 8411957399126827159, + 17553608664055113416, + 18446744073709551615, + 18446744073709551615, + 216, + 226, + 216, + 226, + 38, + 39, + true, + "well-known", + "well-known" + ], + [ + "sentence", + "", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 17890377425977316094, + 4134511052230682896, + 18446744073709551615, + 18446744073709551615, + 0, + 95, + 0, + 95, + 0, + 18, + true, + "The DFs can thus be seen as blueprints for processing the corpus into a defined graph topology.", + "The DFs can thus be seen as blueprints for processing the corpus into a defined graph topology." + ], + [ + "sentence", + "", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 3849602608152927799, + 5933795491641734356, + 18446744073709551615, + 18446744073709551615, + 96, + 385, + 96, + 385, + 18, + 66, + true, + "Notably, our implementation of DFs and their tasks retains the flexibility of processing not only source documents of a well-known data schema such as from CCS, but virtually any structure that can be transformed to a JSON representation, including data entities from precurated databases.", + "Notably, our implementation of DFs and their tasks retains the flexibility of processing not only source documents of a well-known data schema such as from CCS, but virtually any structure that can be transformed to a JSON representation, including data entities from precurated databases." + ], + [ + "sentence", + "", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 6233663802035603519, + 12646838603453794377, + 18446744073709551615, + 18446744073709551615, + 386, + 525, + 386, + 525, + 66, + 91, + true, + "We designed the CPS platform to support export and import of DFs on entirely new datasets without the burden of recreating it from scratch.", + "We designed the CPS platform to support export and import of DFs on entirely new datasets without the burden of recreating it from scratch." + ], + [ + "term", + "enum-term-mark-2", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 11121084906404368828, + 14065085376394746955, + 18446744073709551615, + 18446744073709551615, + 426, + 443, + 426, + 443, + 73, + 76, + true, + "export and import", + "export and import" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 15908928394544452409, + 3315967902757812332, + 18446744073709551615, + 18446744073709551615, + 80, + 94, + 80, + 94, + 15, + 17, + true, + "graph topology", + "graph topology" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 11461340646147400027, + 5645701091668897811, + 18446744073709551615, + 18446744073709551615, + 194, + 210, + 194, + 210, + 34, + 36, + true, + "source documents", + "source documents" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 3243430563889378128, + 13795413780304913754, + 18446744073709551615, + 18446744073709551615, + 216, + 238, + 216, + 238, + 38, + 41, + true, + "well-known data schema", + "well-known data schema" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 2385031725262916889, + 12425871286607371512, + 18446744073709551615, + 18446744073709551615, + 314, + 333, + 314, + 333, + 56, + 58, + true, + "JSON representation", + "JSON representation" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 5594093096302267983, + 5645763396809619566, + 18446744073709551615, + 18446744073709551615, + 345, + 358, + 345, + 358, + 60, + 62, + true, + "data entities", + "data entities" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 5384513680513712549, + 1613404999844258732, + 18446744073709551615, + 18446744073709551615, + 364, + 384, + 364, + 384, + 63, + 65, + true, + "precurated databases", + "precurated databases" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 12779036928191531604, + 17810175013447901093, + 18446744073709551615, + 18446744073709551615, + 402, + 414, + 402, + 414, + 69, + 71, + true, + "CPS platform", + "CPS platform" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 15983059512171872769, + 12420728277455273707, + 18446744073709551615, + 18446744073709551615, + 463, + 475, + 463, + 475, + 80, + 82, + true, + "new datasets", + "new datasets" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 12178341415896110548, + 16353810209333011444, + 18446744073709551615, + 18446744073709551615, + 4, + 7, + 4, + 7, + 1, + 2, + true, + "DFs", + "DFs" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 7073262388879647009, + 1185102068772771476, + 18446744073709551615, + 18446744073709551615, + 28, + 38, + 28, + 38, + 7, + 8, + true, + "blueprints", + "blueprints" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 16381206562408205435, + 5202994988327969889, + 18446744073709551615, + 18446744073709551615, + 58, + 64, + 58, + 64, + 11, + 12, + true, + "corpus", + "corpus" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 16770038681622514616, + 8650515165933909157, + 18446744073709551615, + 18446744073709551615, + 109, + 123, + 109, + 123, + 21, + 22, + true, + "implementation", + "implementation" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 12178341415896110548, + 16353810209333018399, + 18446744073709551615, + 18446744073709551615, + 127, + 130, + 127, + 130, + 23, + 24, + true, + "DFs", + "DFs" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 329104159214088329, + 3704404855780482013, + 18446744073709551615, + 18446744073709551615, + 141, + 146, + 141, + 146, + 26, + 27, + true, + "tasks", + "tasks" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 1588332591737418271, + 11333604657040713834, + 18446744073709551615, + 18446744073709551615, + 159, + 170, + 159, + 170, + 29, + 30, + true, + "flexibility", + "flexibility" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 12178341415896221596, + 16353806306611832484, + 18446744073709551615, + 18446744073709551615, + 252, + 255, + 252, + 255, + 44, + 45, + true, + "CCS", + "CCS" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 6168083952332832164, + 11352255425650886421, + 18446744073709551615, + 18446744073709551615, + 275, + 284, + 275, + 284, + 49, + 50, + true, + "structure", + "structure" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 16381206569048371007, + 12545384013888449433, + 18446744073709551615, + 18446744073709551615, + 426, + 432, + 426, + 432, + 73, + 74, + true, + "export", + "export" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 16381206560633506211, + 6020333096561425742, + 18446744073709551615, + 18446744073709551615, + 437, + 443, + 437, + 443, + 75, + 76, + true, + "import", + "import" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 12178341415896110548, + 16353810209332997583, + 18446744073709551615, + 18446744073709551615, + 447, + 450, + 447, + 450, + 77, + 78, + true, + "DFs", + "DFs" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 16381206569552972313, + 5307296425469714515, + 18446744073709551615, + 18446744073709551615, + 488, + 494, + 488, + 494, + 84, + 85, + true, + "burden", + "burden" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 8106475349459363877, + 15665911979141161163, + 18446744073709551615, + 18446744073709551615, + 517, + 524, + 517, + 524, + 89, + 90, + true, + "scratch", + "scratch" + ], + [ + "sentence", + "", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 14390776963480530757, + 14144391792905836053, + 18446744073709551615, + 18446744073709551615, + 0, + 230, + 0, + 230, + 0, + 39, + true, + "Our backend engine can exploit the DAG defined through the DF to massively distribute the individual tasks on all compute resources, because independent branches of the DAG each containing a chain of tasks can execute in parallel.", + "Our backend engine can exploit the DAG defined through the DF to massively distribute the individual tasks on all compute resources, because independent branches of the DAG each containing a chain of tasks can execute in parallel." + ], + [ + "sentence", + "", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 10530703581906041107, + 10566686277400129697, + 18446744073709551615, + 18446744073709551615, + 231, + 300, + 231, + 300, + 39, + 49, + true, + "The achievable level of parallelism changes throughout the execution.", + "The achievable level of parallelism changes throughout the execution." + ], + [ + "sentence", + "", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 15328322475466549779, + 6356331500693372898, + 18446744073709551615, + 18446744073709551615, + 301, + 459, + 301, + 459, + 49, + 76, + true, + "A practical example is a DF which extracts paragraphs and abstracts from all documents in the corpus, then annotates them and finally aggregates all entities.", + "A practical example is a DF which extracts paragraphs and abstracts from all documents in the corpus, then annotates them and finally aggregates all entities." + ], + [ + "sentence", + "", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 8672431139074644597, + 15077769571580712776, + 18446744073709551615, + 18446744073709551615, + 460, + 614, + 460, + 614, + 76, + 104, + true, + "Here, the extraction tasks are distributed only over all documents; then, in the annotation tasks, we increase the parallelism to all document components.", + "Here, the extraction tasks are distributed only over all documents; then, in the annotation tasks, we increase the parallelism to all document components." + ], + [ + "sentence", + "", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 13667161688560255794, + 11144968750389162501, + 18446744073709551615, + 18446744073709551615, + 615, + 693, + 615, + 693, + 104, + 117, + true, + "Any synchronization points thus can be pushed back into the aggregation tasks.", + "Any synchronization points thus can be pushed back into the aggregation tasks." + ], + [ + "term", + "enum-term-mark-3", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 857608957643671729, + 8789227088448338439, + 18446744073709551615, + 18446744073709551615, + 344, + 368, + 344, + 368, + 57, + 60, + true, + "paragraphs and abstracts", + "paragraphs and abstracts" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 15829965978251528098, + 1372718158532474013, + 18446744073709551615, + 18446744073709551615, + 4, + 18, + 4, + 18, + 1, + 3, + true, + "backend engine", + "backend engine" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 4465888657594319459, + 9689611985399197926, + 18446744073709551615, + 18446744073709551615, + 90, + 106, + 90, + 106, + 15, + 17, + true, + "individual tasks", + "individual tasks" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 4421383392096991748, + 16000306739803699679, + 18446744073709551615, + 18446744073709551615, + 114, + 131, + 114, + 131, + 19, + 21, + true, + "compute resources", + "compute resources" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 13920350439839821083, + 16955722751473761512, + 18446744073709551615, + 18446744073709551615, + 141, + 161, + 141, + 161, + 23, + 25, + true, + "independent branches", + "independent branches" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 10252716422187396036, + 11724540237306114869, + 18446744073709551615, + 18446744073709551615, + 235, + 251, + 235, + 251, + 40, + 42, + true, + "achievable level", + "achievable level" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 4840493146633456446, + 4990244669328773678, + 18446744073709551615, + 18446744073709551615, + 255, + 274, + 255, + 274, + 43, + 45, + true, + "parallelism changes", + "parallelism changes" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 16494665832924434534, + 11964331841030860490, + 18446744073709551615, + 18446744073709551615, + 303, + 320, + 303, + 320, + 50, + 52, + true, + "practical example", + "practical example" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 16789581223630763462, + 11144393400275200701, + 18446744073709551615, + 18446744073709551615, + 470, + 486, + 470, + 486, + 79, + 81, + true, + "extraction tasks", + "extraction tasks" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 2926971850916888288, + 11862075283092330987, + 18446744073709551615, + 18446744073709551615, + 541, + 557, + 541, + 557, + 92, + 94, + true, + "annotation tasks", + "annotation tasks" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 17524405716142769441, + 2707237517370053311, + 18446744073709551615, + 18446744073709551615, + 594, + 613, + 594, + 613, + 101, + 103, + true, + "document components", + "document components" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 4432720038651401912, + 7824075473089442522, + 18446744073709551615, + 18446744073709551615, + 619, + 641, + 619, + 641, + 105, + 107, + true, + "synchronization points", + "synchronization points" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 12420511630999364190, + 12482143430448245195, + 18446744073709551615, + 18446744073709551615, + 675, + 692, + 675, + 692, + 114, + 116, + true, + "aggregation tasks", + "aggregation tasks" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 12178341415896112046, + 7013214852574030934, + 18446744073709551615, + 18446744073709551615, + 35, + 38, + 35, + 38, + 6, + 7, + true, + "DAG", + "DAG" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 15441160910541480770, + 448741175652348348, + 18446744073709551615, + 18446744073709551615, + 59, + 61, + 59, + 61, + 10, + 11, + true, + "DF", + "DF" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 12178341415896112046, + 7013214852574069656, + 18446744073709551615, + 18446744073709551615, + 169, + 172, + 169, + 172, + 27, + 28, + true, + "DAG", + "DAG" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 329104161556625920, + 10005480021798899834, + 18446744073709551615, + 18446744073709551615, + 191, + 196, + 191, + 196, + 31, + 32, + true, + "chain", + "chain" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 329104159214088329, + 9882082029920318768, + 18446744073709551615, + 18446744073709551615, + 200, + 205, + 200, + 205, + 33, + 34, + true, + "tasks", + "tasks" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 14814034872218884114, + 4851277065202120364, + 18446744073709551615, + 18446744073709551615, + 221, + 229, + 221, + 229, + 37, + 38, + true, + "parallel", + "parallel" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 6168355606348623882, + 6552081320825944349, + 18446744073709551615, + 18446744073709551615, + 290, + 299, + 290, + 299, + 47, + 48, + true, + "execution", + "execution" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 15441160910541480770, + 448741175652334105, + 18446744073709551615, + 18446744073709551615, + 326, + 328, + 326, + 328, + 54, + 55, + true, + "DF", + "DF" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 13968965538538956038, + 5696982554616545212, + 18446744073709551615, + 18446744073709551615, + 344, + 354, + 344, + 354, + 57, + 58, + true, + "paragraphs", + "paragraphs" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 5950055304304346669, + 13420779396952801457, + 18446744073709551615, + 18446744073709551615, + 359, + 368, + 359, + 368, + 59, + 60, + true, + "abstracts", + "abstracts" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 6167933651658664291, + 12910647832495839984, + 18446744073709551615, + 18446744073709551615, + 378, + 387, + 378, + 387, + 62, + 63, + true, + "documents", + "documents" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 16381206562408205435, + 10625918246812111142, + 18446744073709551615, + 18446744073709551615, + 395, + 401, + 395, + 401, + 65, + 66, + true, + "corpus", + "corpus" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 15389240612279378533, + 10613103374378756986, + 18446744073709551615, + 18446744073709551615, + 435, + 445, + 435, + 445, + 72, + 73, + true, + "aggregates", + "aggregates" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 14652256560445338257, + 12562074549158643256, + 18446744073709551615, + 18446744073709551615, + 450, + 458, + 450, + 458, + 74, + 75, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 6167933651658664291, + 12910647832496717158, + 18446744073709551615, + 18446744073709551615, + 517, + 526, + 517, + 526, + 86, + 87, + true, + "documents", + "documents" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 18223316012831076048, + 9614179813513706564, + 18446744073709551615, + 18446744073709551615, + 575, + 586, + 575, + 586, + 98, + 99, + true, + "parallelism", + "parallelism" + ], + [ + "numval", + "ival", + 11590138063543342276, + "TEXT", + "#/texts/69", + 1.0, + 17767354399704235163, + 13032776934094914368, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "3", + "3" + ], + [ + "sentence", + "", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 15139433187664310492, + 4695469573464464784, + 18446744073709551615, + 18446744073709551615, + 0, + 103, + 0, + 103, + 0, + 18, + true, + "We will now look into the requirements to perform deep data exploration on a populated Knowledge Graph.", + "We will now look into the requirements to perform deep data exploration on a populated Knowledge Graph." + ], + [ + "term", + "single-term", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 13671659409933113155, + 4173424983166285630, + 18446744073709551615, + 18446744073709551615, + 50, + 71, + 50, + 71, + 9, + 12, + true, + "deep data exploration", + "deep data exploration" + ], + [ + "term", + "single-term", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 4605433253513798881, + 12143484967953891920, + 18446744073709551615, + 18446744073709551615, + 77, + 102, + 77, + 102, + 14, + 17, + true, + "populated Knowledge Graph", + "populated Knowledge Graph" + ], + [ + "term", + "single-term", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 13671659409933113155, + 4173424983166279992, + 18446744073709551615, + 18446744073709551615, + 106, + 127, + 106, + 127, + 19, + 22, + true, + "deep data exploration", + "deep data exploration" + ], + [ + "term", + "single-term", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 16355783708075937518, + 758382672449514167, + 18446744073709551615, + 18446744073709551615, + 141, + 165, + 141, + 165, + 24, + 26, + true, + "fundamental capabilities", + "fundamental capabilities" + ], + [ + "term", + "single-term", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 13240311013633905449, + 17335418227251459731, + 18446744073709551615, + 18446744073709551615, + 26, + 38, + 26, + 38, + 6, + 7, + true, + "requirements", + "requirements" + ], + [ + "numval", + "ival", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 17767354399704235161, + 14832870493709788748, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "1", + "1" + ], + [ + "expression", + "word-concatenation", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 6180052837118668048, + 5133447445234002483, + 18446744073709551615, + 18446744073709551615, + 68, + 77, + 68, + 77, + 15, + 16, + true, + "multi-hop", + "multi-hop" + ], + [ + "numval", + "ival", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 17767354399704235162, + 6940844591694806953, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "2", + "2" + ], + [ + "expression", + "word-concatenation", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 14042859618039714361, + 17116381936376022837, + 18446744073709551615, + 18446744073709551615, + 62, + 72, + 62, + 72, + 13, + 14, + true, + "on-the-fly", + "on-the-fly" + ], + [ + "sentence", + "", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 9325671930388359069, + 16472228699755687388, + 18446744073709551615, + 18446744073709551615, + 62, + 73, + 62, + 73, + 13, + 15, + true, + "on-the-fly.", + "on-the-fly." + ], + [ + "term", + "single-term", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 4237976234056442894, + 17998220857740869205, + 18446744073709551615, + 18446744073709551615, + 11, + 26, + 11, + 26, + 3, + 5, + true, + "graph analytics", + "graph analytics" + ], + [ + "term", + "single-term", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 16104705485729814979, + 4683468690442044165, + 18446744073709551615, + 18446744073709551615, + 34, + 44, + 34, + 44, + 7, + 9, + true, + "full graph", + "full graph" + ], + [ + "term", + "single-term", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 8106478685921145000, + 17803707003211492579, + 18446744073709551615, + 18446744073709551615, + 48, + 55, + 48, + 55, + 10, + 11, + true, + "subsets", + "subsets" + ], + [ + "numval", + "fval", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 389609625533812191, + 1960551977415557980, + 18446744073709551615, + 18446744073709551615, + 409, + 413, + 409, + 413, + 67, + 68, + true, + "9,10", + "9,10" + ], + [ + "numval", + "ival", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 17767354399704235157, + 8804028754113186404, + 18446744073709551615, + 18446744073709551615, + 251, + 252, + 251, + 252, + 40, + 41, + true, + "5", + "5" + ], + [ + "parenthesis", + "round brackets", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 13881206615676963801, + 14805625262429425181, + 18446744073709551615, + 18446744073709551615, + 238, + 253, + 238, + 253, + 37, + 42, + true, + "(see section 5)", + "(see section 5)" + ], + [ + "sentence", + "", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 11326071441728208854, + 9963382484873233605, + 18446744073709551615, + 18446744073709551615, + 0, + 104, + 0, + 104, + 0, + 16, + true, + "Deep queries are essential to dynamically combine independent facts together in the given query context.", + "Deep queries are essential to dynamically combine independent facts together in the given query context." + ], + [ + "sentence", + "", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 10328879830457680735, + 16040219150643269355, + 18446744073709551615, + 18446744073709551615, + 105, + 254, + 105, + 254, + 16, + 43, + true, + "This would apply for example to explorational queries aimed to characterize petroleum system elements, as detailed in our case study (see section 5).", + "This would apply for example to explorational queries aimed to characterize petroleum system elements, as detailed in our case study (see section 5)." + ], + [ + "sentence", + "", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 10167104780571413476, + 11008366146913000479, + 18446744073709551615, + 18446744073709551615, + 255, + 326, + 255, + 326, + 43, + 55, + true, + "Graph analytics can further reveal hidden structure in the KG topology.", + "Graph analytics can further reveal hidden structure in the KG topology." + ], + [ + "sentence", + "", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 11539294678044249131, + 18223140021759824115, + 18446744073709551615, + 18446744073709551615, + 327, + 472, + 327, + 472, + 55, + 78, + true, + "Examples of advanced graphanalytical operations are page rank, node centralities, 9,10 node clustering, spectral analysis, and label propagation.", + "Examples of advanced graphanalytical operations are page rank, node centralities, 9,10 node clustering, spectral analysis, and label propagation." + ], + [ + "term", + "enum-term-mark-2", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 14039610523492290379, + 8337307582732886868, + 18446744073709551615, + 18446744073709551615, + 440, + 471, + 440, + 471, + 72, + 77, + true, + "analysis, and label propagation", + "analysis, and label propagation" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 11805369560476404678, + 15092745044709578481, + 18446744073709551615, + 18446744073709551615, + 0, + 12, + 0, + 12, + 0, + 2, + true, + "Deep queries", + "Deep queries" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 10975373624156076918, + 11938916105068148807, + 18446744073709551615, + 18446744073709551615, + 50, + 67, + 50, + 67, + 7, + 9, + true, + "independent facts", + "independent facts" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 16172198469897816706, + 17761989334252452030, + 18446744073709551615, + 18446744073709551615, + 90, + 103, + 90, + 103, + 13, + 15, + true, + "query context", + "query context" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 13481069231801630849, + 12775598423777163361, + 18446744073709551615, + 18446744073709551615, + 137, + 158, + 137, + 158, + 22, + 24, + true, + "explorational queries", + "explorational queries" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 2735423192832266389, + 2837241149989494242, + 18446744073709551615, + 18446744073709551615, + 181, + 206, + 181, + 206, + 27, + 30, + true, + "petroleum system elements", + "petroleum system elements" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 2873469788203493819, + 10206430255073580142, + 18446744073709551615, + 18446744073709551615, + 227, + 237, + 227, + 237, + 35, + 37, + true, + "case study", + "case study" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 968326748584202361, + 1506605499402907857, + 18446744073709551615, + 18446744073709551615, + 255, + 270, + 255, + 270, + 43, + 45, + true, + "Graph analytics", + "Graph analytics" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 8767693934171074605, + 9137059685047075674, + 18446744073709551615, + 18446744073709551615, + 290, + 306, + 290, + 306, + 48, + 50, + true, + "hidden structure", + "hidden structure" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 6992599754813268015, + 399356179529846231, + 18446744073709551615, + 18446744073709551615, + 314, + 325, + 314, + 325, + 52, + 54, + true, + "KG topology", + "KG topology" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 3093669945723512593, + 14416489053928682644, + 18446744073709551615, + 18446744073709551615, + 339, + 374, + 339, + 374, + 57, + 60, + true, + "advanced graphanalytical operations", + "advanced graphanalytical operations" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 6184977920700221726, + 16322060306778153855, + 18446744073709551615, + 18446744073709551615, + 379, + 388, + 379, + 388, + 61, + 63, + true, + "page rank", + "page rank" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 4977542118862070209, + 8478594702751520278, + 18446744073709551615, + 18446744073709551615, + 390, + 407, + 390, + 407, + 64, + 66, + true, + "node centralities", + "node centralities" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 9441128013663076980, + 4724848430636712553, + 18446744073709551615, + 18446744073709551615, + 414, + 429, + 414, + 429, + 68, + 70, + true, + "node clustering", + "node clustering" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 9079575722732701095, + 9418445845300672534, + 18446744073709551615, + 18446744073709551615, + 431, + 448, + 431, + 448, + 71, + 73, + true, + "spectral analysis", + "spectral analysis" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 4996066085078527360, + 7290288987028820693, + 18446744073709551615, + 18446744073709551615, + 454, + 471, + 454, + 471, + 75, + 77, + true, + "label propagation", + "label propagation" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 8106397496085150773, + 4162592152493996363, + 18446744073709551615, + 18446744073709551615, + 126, + 133, + 126, + 133, + 20, + 21, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 8106478708629288965, + 4163190785571344199, + 18446744073709551615, + 18446744073709551615, + 243, + 250, + 243, + 250, + 39, + 40, + true, + "section", + "section" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 14650277098690689540, + 7868203806272457, + 18446744073709551615, + 18446744073709551615, + 327, + 335, + 327, + 335, + 55, + 56, + true, + "Examples", + "Examples" + ], + [ + "numval", + "fval", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 329104147749426795, + 8846331132305971160, + 18446744073709551615, + 18446744073709551615, + 463, + 468, + 463, + 468, + 78, + 79, + true, + "11,12", + "11,12" + ], + [ + "numval", + "ival", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 17767354399704235162, + 13308611356903088115, + 18446744073709551615, + 18446744073709551615, + 657, + 658, + 657, + 658, + 111, + 112, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 17767354399704235161, + 13308611356852178135, + 18446744073709551615, + 18446744073709551615, + 736, + 737, + 736, + 737, + 126, + 127, + true, + "1", + "1" + ], + [ + "parenthesis", + "round brackets", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 3451858675896043664, + 6364806088702961820, + 18446744073709551615, + 18446744073709551615, + 734, + 745, + 734, + 745, + 124, + 130, + true, + "(>1B edges)", + "(>1B edges)" + ], + [ + "expression", + "word-concatenation", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 8759553427650775934, + 8064528190798949556, + 18446744073709551615, + 18446744073709551615, + 209, + 224, + 209, + 224, + 37, + 38, + true, + "graph-traversal", + "graph-traversal" + ], + [ + "expression", + "word-concatenation", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 6285955549867796622, + 3357902448538227798, + 18446744073709551615, + 18446744073709551615, + 281, + 297, + 281, + 297, + 48, + 49, + true, + "time-to-solution", + "time-to-solution" + ], + [ + "expression", + "word-concatenation", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 18014693639644065312, + 3586133881645139780, + 18446744073709551615, + 18446744073709551615, + 641, + 656, + 641, + 656, + 110, + 111, + true, + "graph-analytics", + "graph-analytics" + ], + [ + "expression", + "word-concatenation", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 6180052837118668048, + 18178055246764646091, + 18446744073709551615, + 18446744073709551615, + 697, + 706, + 697, + 706, + 119, + 120, + true, + "multi-hop", + "multi-hop" + ], + [ + "sentence", + "", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 3847089632682750791, + 9531589699293554617, + 18446744073709551615, + 18446744073709551615, + 0, + 231, + 0, + 231, + 0, + 40, + true, + "Both deep queries and graph analytics have in common that they are inherently expensive to compute on conventional graph databases, due to a rapid expansion of the number of visited nodes as a function of the graph-traversal depth.", + "Both deep queries and graph analytics have in common that they are inherently expensive to compute on conventional graph databases, due to a rapid expansion of the number of visited nodes as a function of the graph-traversal depth." + ], + [ + "sentence", + "", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 12949087731978363617, + 17941143682626011533, + 18446744073709551615, + 18446744073709551615, + 232, + 326, + 232, + 326, + 40, + 54, + true, + "This is a major obstacle in providing reasonable time-to-solution in the aforementioned cases.", + "This is a major obstacle in providing reasonable time-to-solution in the aforementioned cases." + ], + [ + "sentence", + "", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 5048161526303226736, + 8851548305893694604, + 18446744073709551615, + 18446744073709551615, + 327, + 462, + 327, + 462, + 54, + 78, + true, + "Virtually all established graph database products on the market today ** fall victim to this, as was also reported in multiple sources.", + "Virtually all established graph database products on the market today ** fall victim to this, as was also reported in multiple sources." + ], + [ + "sentence", + "", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 6385932786136317833, + 1838392916180151399, + 18446744073709551615, + 18446744073709551615, + 469, + 594, + 469, + 594, + 79, + 102, + true, + "Due to the poor performance we observed with available graph databases, we developed a new graph engine for the CPS platform.", + "Due to the poor performance we observed with available graph databases, we developed a new graph engine for the CPS platform." + ], + [ + "sentence", + "", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 905311317995826078, + 15561031818904732595, + 18446744073709551615, + 18446744073709551615, + 595, + 761, + 595, + 761, + 102, + 133, + true, + "This graph engine is able to execute advanced graph-analytics 2 as well as evaluate deep queries with multi-hop traversals on large graphs (>1B edges) extremely fast.", + "This graph engine is able to execute advanced graph-analytics 2 as well as evaluate deep queries with multi-hop traversals on large graphs (>1B edges) extremely fast." + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 7076268937724050913, + 5590677365945223320, + 18446744073709551615, + 18446744073709551615, + 5, + 17, + 5, + 17, + 1, + 3, + true, + "deep queries", + "deep queries" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 4237976234056442894, + 18290860271812073513, + 18446744073709551615, + 18446744073709551615, + 22, + 37, + 22, + 37, + 4, + 6, + true, + "graph analytics", + "graph analytics" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 9197647117066059649, + 7265372759594219155, + 18446744073709551615, + 18446744073709551615, + 102, + 130, + 102, + 130, + 17, + 20, + true, + "conventional graph databases", + "conventional graph databases" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 12732351021804658370, + 10575701010966589198, + 18446744073709551615, + 18446744073709551615, + 141, + 156, + 141, + 156, + 24, + 26, + true, + "rapid expansion", + "rapid expansion" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 10847427237839014643, + 6591514626586371035, + 18446744073709551615, + 18446744073709551615, + 174, + 187, + 174, + 187, + 30, + 32, + true, + "visited nodes", + "visited nodes" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 5112125995780679480, + 620973546713892862, + 18446744073709551615, + 18446744073709551615, + 209, + 230, + 209, + 230, + 37, + 39, + true, + "graph-traversal depth", + "graph-traversal depth" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 16419030849690516097, + 3943363386701357449, + 18446744073709551615, + 18446744073709551615, + 242, + 256, + 242, + 256, + 43, + 45, + true, + "major obstacle", + "major obstacle" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 6845289887138259707, + 14575597687784061544, + 18446744073709551615, + 18446744073709551615, + 270, + 297, + 270, + 297, + 47, + 49, + true, + "reasonable time-to-solution", + "reasonable time-to-solution" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 7636868254411294311, + 5058362325883158613, + 18446744073709551615, + 18446744073709551615, + 305, + 325, + 305, + 325, + 51, + 53, + true, + "aforementioned cases", + "aforementioned cases" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 5347546348172783098, + 11784313264771460442, + 18446744073709551615, + 18446744073709551615, + 353, + 376, + 353, + 376, + 57, + 60, + true, + "graph database products", + "graph database products" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 9507265079216451910, + 8216015788353070056, + 18446744073709551615, + 18446744073709551615, + 384, + 396, + 384, + 396, + 62, + 64, + true, + "market today", + "market today" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 2183265725869201141, + 4685897896439150964, + 18446744073709551615, + 18446744073709551615, + 445, + 461, + 445, + 461, + 75, + 77, + true, + "multiple sources", + "multiple sources" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 5254873347880861734, + 17629927352964185068, + 18446744073709551615, + 18446744073709551615, + 480, + 496, + 480, + 496, + 82, + 84, + true, + "poor performance", + "poor performance" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 3613728750551607700, + 11957105209137106607, + 18446744073709551615, + 18446744073709551615, + 514, + 539, + 514, + 539, + 87, + 90, + true, + "available graph databases", + "available graph databases" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 15602286436515055909, + 6910187018044769346, + 18446744073709551615, + 18446744073709551615, + 556, + 572, + 556, + 572, + 94, + 97, + true, + "new graph engine", + "new graph engine" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 12779036928191531604, + 9018866942868051774, + 18446744073709551615, + 18446744073709551615, + 581, + 593, + 581, + 593, + 99, + 101, + true, + "CPS platform", + "CPS platform" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 2924972194163802578, + 14724717228620861174, + 18446744073709551615, + 18446744073709551615, + 600, + 612, + 600, + 612, + 103, + 105, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 7576847127496348416, + 12388724362174645845, + 18446744073709551615, + 18446744073709551615, + 632, + 656, + 632, + 656, + 109, + 111, + true, + "advanced graph-analytics", + "advanced graph-analytics" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 18215467199078216516, + 4540466417441672306, + 18446744073709551615, + 18446744073709551615, + 670, + 691, + 670, + 691, + 115, + 118, + true, + "evaluate deep queries", + "evaluate deep queries" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 17398559026357729506, + 8060255572172587236, + 18446744073709551615, + 18446744073709551615, + 697, + 717, + 697, + 717, + 119, + 121, + true, + "multi-hop traversals", + "multi-hop traversals" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 13692197547477852594, + 14292005679357148705, + 18446744073709551615, + 18446744073709551615, + 721, + 733, + 721, + 733, + 122, + 124, + true, + "large graphs", + "large graphs" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 8106350691486682096, + 14145095568144885133, + 18446744073709551615, + 18446744073709551615, + 737, + 744, + 737, + 744, + 127, + 129, + true, + "B edges", + "B edges" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 16381206574973295053, + 13593287647362680849, + 18446744073709551615, + 18446744073709551615, + 164, + 170, + 164, + 170, + 28, + 29, + true, + "number", + "number" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 14637915316557309079, + 4828060395391576425, + 18446744073709551615, + 18446744073709551615, + 193, + 201, + 193, + 201, + 34, + 35, + true, + "function", + "function" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 16381206566370240312, + 12606485506230202974, + 18446744073709551615, + 18446744073709551615, + 405, + 411, + 405, + 411, + 66, + 67, + true, + "victim", + "victim" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 389609625548777262, + 8826555294676663632, + 18446744073709551615, + 18446744073709551615, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 389609625548777251, + 8826555296349648778, + 18446744073709551615, + 18446744073709551615, + 119, + 123, + 119, + 123, + 14, + 14, + false, + "2023", + "2023" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 8104408072666212335, + 13552219042525319352, + 18446744073709551615, + 18446744073709551615, + 71, + 78, + 71, + 78, + 8, + 8, + false, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 389609625548868096, + 8826558551385119058, + 18446744073709551615, + 18446744073709551615, + 82, + 86, + 82, + 86, + 8, + 9, + false, + "2.20", + "2.20" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 14654386914267794441, + 12796143052106760105, + 18446744073709551615, + 18446744073709551615, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 17767354399704235162, + 7753390158484899261, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 15441160910541481791, + 3518619573290839093, + 18446744073709551615, + 18446744073709551615, + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 15441160910541481543, + 3518617976696906498, + 18446744073709551615, + 18446744073709551615, + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 8536069645534292969, + 16063604623463467342, + 18446744073709551615, + 18446744073709551615, + 35, + 87, + 35, + 87, + 8, + 10, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 594099663775968682, + 14698211805947073928, + 18446744073709551615, + 18446744073709551615, + 156, + 208, + 156, + 208, + 22, + 37, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 1697220653346092555, + 8458710314769009562, + 18446744073709551615, + 18446744073709551615, + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 12213187056216195918, + 14309760985361468471, + 18446744073709551615, + 18446744073709551615, + 155, + 209, + 155, + 209, + 21, + 38, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 3856967589249015473, + 3576147774941915841, + 18446744073709551615, + 18446744073709551615, + 35, + 86, + 35, + 86, + 8, + 9, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "sentence", + "", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 10933383461306782608, + 10178418358179275356, + 18446744073709551615, + 18446744073709551615, + 19, + 125, + 19, + 125, + 6, + 16, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 12466457873768409517, + 3430070082404029638, + 18446744073709551615, + 18446744073709551615, + 88, + 108, + 88, + 108, + 10, + 13, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 12466457873768409517, + 3430070082403846184, + 18446744073709551615, + 18446744073709551615, + 213, + 233, + 213, + 233, + 39, + 42, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 10086796047802705645, + 11637015082128438412, + 18446744073709551615, + 18446744073709551615, + 252, + 263, + 252, + 263, + 47, + 49, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 6687370681685741393, + 17939310132506951168, + 18446744073709551615, + 18446744073709551615, + 284, + 319, + 284, + 319, + 53, + 57, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 329104161846385964, + 16017248647642597247, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 18, + 19, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 969969168017005656, + 2961182532179915323, + 18446744073709551615, + 18446744073709551615, + 144, + 154, + 144, + 154, + 20, + 21, + true, + "Conditions", + "Conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 329104161825278214, + 16021621362593374209, + 18446744073709551615, + 18446744073709551615, + 238, + 243, + 238, + 243, + 43, + 44, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 12178341415895516060, + 12061595171928625555, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 45, + 46, + true, + "use", + "use" + ], + [ + "numval", + "ival", + 4361549257370278754, + "TEXT", + "#/texts/76", + 1.0, + 17767354399704235159, + 18348318207235940730, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "7", + "7" + ], + [ + "numval", + "ival", + 4361549257370278754, + "TEXT", + "#/texts/76", + 1.0, + 15441160910541481979, + 2772124700731079428, + 18446744073709551615, + 18446744073709551615, + 3, + 5, + 3, + 5, + 2, + 3, + true, + "15", + "15" + ], + [ + "numval", + "fval", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 12178341415896435198, + 14026574630810798704, + 18446744073709551615, + 18446744073709551615, + 100, + 103, + 100, + 103, + 19, + 20, + true, + "3.1", + "3.1" + ], + [ + "numval", + "fval", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 12178341415896435199, + 14026574630786503486, + 18446744073709551615, + 18446744073709551615, + 154, + 157, + 154, + 157, + 29, + 30, + true, + "3.2", + "3.2" + ], + [ + "numval", + "fval", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 12178341415896435196, + 14026574630635842602, + 18446744073709551615, + 18446744073709551615, + 233, + 236, + 233, + 236, + 45, + 46, + true, + "3.3", + "3.3" + ], + [ + "numval", + "ival", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 17767354399704235156, + 5196757730407799108, + 18446744073709551615, + 18446744073709551615, + 211, + 212, + 211, + 212, + 39, + 39, + false, + "4", + "4" + ], + [ + "name", + "person-name", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 8106351243953564135, + 5354954892260486825, + 18446744073709551615, + 18446744073709551615, + 212, + 220, + 212, + 220, + 39, + 42, + false, + "J Later", + "J. Later" + ], + [ + "expression", + "wtoken-concatenation", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 329104162105779366, + 17726293975143479221, + 18446744073709551615, + 18446744073709551615, + 208, + 213, + 208, + 213, + 39, + 40, + true, + "Neo4J", + "Neo4J" + ], + [ + "sentence", + "", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 15169275422072685506, + 8924490843340168699, + 18446744073709551615, + 18446744073709551615, + 0, + 88, + 0, + 88, + 0, + 17, + true, + "In the remaining part of this section, we elaborate on our newly developed graph engine.", + "In the remaining part of this section, we elaborate on our newly developed graph engine." + ], + [ + "sentence", + "", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 10870737769007965775, + 8022598482893376902, + 18446744073709551615, + 18446744073709551615, + 89, + 142, + 89, + 142, + 17, + 27, + true, + "In section 3.1, we discuss the implementation design.", + "In section 3.1, we discuss the implementation design." + ], + [ + "sentence", + "", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 16126804403293380759, + 942571218184287509, + 18446744073709551615, + 18446744073709551615, + 143, + 324, + 143, + 324, + 27, + 63, + true, + "In section 3.2, we discuss performance results and compare it to Neo4J. Later, in section 3.3, we will explain how the deep queries are formulated and evaluated in the graph engine.", + "In section 3.2, we discuss performance results and compare it to Neo4J. Later, in section 3.3, we will explain how the deep queries are formulated and evaluated in the graph engine." + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 2924972194163802578, + 1343185001122892048, + 18446744073709551615, + 18446744073709551615, + 75, + 87, + 75, + 87, + 14, + 16, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 5689391492622578219, + 4178339675981596420, + 18446744073709551615, + 18446744073709551615, + 120, + 141, + 120, + 141, + 24, + 26, + true, + "implementation design", + "implementation design" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 7309351122725453953, + 1424245629440322320, + 18446744073709551615, + 18446744073709551615, + 170, + 189, + 170, + 189, + 33, + 35, + true, + "performance results", + "performance results" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 7076268937724050913, + 17647570497214443245, + 18446744073709551615, + 18446744073709551615, + 262, + 274, + 262, + 274, + 52, + 54, + true, + "deep queries", + "deep queries" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 2924972194163802578, + 1343185001122940285, + 18446744073709551615, + 18446744073709551615, + 311, + 323, + 311, + 323, + 60, + 62, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 389609625632304952, + 17729073815639454901, + 18446744073709551615, + 18446744073709551615, + 17, + 21, + 17, + 21, + 3, + 4, + true, + "part", + "part" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 8106478708629288965, + 9706904241751620002, + 18446744073709551615, + 18446744073709551615, + 30, + 37, + 30, + 37, + 6, + 7, + true, + "section", + "section" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 8106478708629288965, + 9706904241751616082, + 18446744073709551615, + 18446744073709551615, + 92, + 99, + 92, + 99, + 18, + 19, + true, + "section", + "section" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 8106478708629288965, + 9706904241751613295, + 18446744073709551615, + 18446744073709551615, + 146, + 153, + 146, + 153, + 28, + 29, + true, + "section", + "section" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 329104162105779366, + 17726293975143479221, + 18446744073709551615, + 18446744073709551615, + 208, + 213, + 208, + 213, + 39, + 40, + true, + "Neo4J", + "Neo4J" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 8106478708629288965, + 9706904241751509683, + 18446744073709551615, + 18446744073709551615, + 225, + 232, + 225, + 232, + 44, + 45, + true, + "section", + "section" + ], + [ + "numval", + "fval", + 13428900458866068249, + "TEXT", + "#/texts/78", + 1.0, + 12178341415896435198, + 3629736405801839701, + 18446744073709551615, + 18446744073709551615, + 0, + 3, + 0, + 3, + 0, + 1, + true, + "3.1", + "3.1" + ], + [ + "numval", + "fval", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 329104147748831777, + 2423845697831217766, + 18446744073709551615, + 18446744073709551615, + 148, + 153, + 148, + 153, + 24, + 25, + true, + "13,14", + "13,14" + ], + [ + "parenthesis", + "round brackets", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 10362086140065524622, + 13319597512915342985, + 18446744073709551615, + 18446744073709551615, + 410, + 441, + 406, + 437, + 71, + 77, + true, + "(typically an unsigned integer)", + "(typically an unsigned integer)" + ], + [ + "sentence", + "", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 11829310439673149559, + 1096556366946463125, + 18446744073709551615, + 18446744073709551615, + 0, + 147, + 0, + 147, + 0, + 24, + true, + "In computer science, two prevalent implementation schemes for graphs have emerged, one using adjacency lists and one relying on adjacency matrices.", + "In computer science, two prevalent implementation schemes for graphs have emerged, one using adjacency lists and one relying on adjacency matrices." + ], + [ + "sentence", + "", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 6164571272575257207, + 6881600727512470107, + 18446744073709551615, + 18446744073709551615, + 154, + 279, + 154, + 279, + 25, + 47, + true, + "In the adjacency list format, every node is essentially an object which contains a set of indices representing its neighbors.", + "In the adjacency list format, every node is essentially an object which contains a set of indices representing its neighbors." + ], + [ + "sentence", + "", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 1300122478685074787, + 11675804466477020290, + 18446744073709551615, + 18446744073709551615, + 287, + 344, + 283, + 340, + 48, + 60, + true, + "The edges are therefore stored as a property of the node.", + "The edges are therefore stored as a property of the node." + ], + [ + "sentence", + "", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 17653262178725528111, + 14906657644492502176, + 18446744073709551615, + 18446744073709551615, + 345, + 502, + 341, + 498, + 60, + 89, + true, + "In the adjacency matrix approach, all nodes obtain an identifier (typically an unsigned integer) and the edges are stored as a list of nodeidentifier tuples.", + "In the adjacency matrix approach, all nodes obtain an identifier (typically an unsigned integer) and the edges are stored as a list of nodeidentifier tuples." + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 4736549060189165039, + 5665060641995713376, + 18446744073709551615, + 18446744073709551615, + 3, + 19, + 3, + 19, + 1, + 3, + true, + "computer science", + "computer science" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 9799555633140790718, + 11746588474588557255, + 18446744073709551615, + 18446744073709551615, + 25, + 57, + 25, + 57, + 5, + 8, + true, + "prevalent implementation schemes", + "prevalent implementation schemes" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 3120046212755594191, + 15111940143832491527, + 18446744073709551615, + 18446744073709551615, + 93, + 108, + 93, + 108, + 15, + 17, + true, + "adjacency lists", + "adjacency lists" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 16579929503880818246, + 9745979266607673578, + 18446744073709551615, + 18446744073709551615, + 128, + 146, + 128, + 146, + 21, + 23, + true, + "adjacency matrices", + "adjacency matrices" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 8120096385401382748, + 6847735266077093, + 18446744073709551615, + 18446744073709551615, + 161, + 182, + 161, + 182, + 27, + 30, + true, + "adjacency list format", + "adjacency list format" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 6848778759897299700, + 6443024831232567950, + 18446744073709551615, + 18446744073709551615, + 352, + 377, + 348, + 373, + 62, + 65, + true, + "adjacency matrix approach", + "adjacency matrix approach" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 11971109509670241646, + 13587803609989610789, + 18446744073709551615, + 18446744073709551615, + 424, + 440, + 420, + 436, + 74, + 76, + true, + "unsigned integer", + "unsigned integer" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 16381206539879417749, + 6940358010355959125, + 18446744073709551615, + 18446744073709551615, + 62, + 68, + 62, + 68, + 9, + 10, + true, + "graphs", + "graphs" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 389609625621164460, + 15514573413788730542, + 18446744073709551615, + 18446744073709551615, + 190, + 194, + 190, + 194, + 32, + 33, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 16381206566431505764, + 3115955576590759354, + 18446744073709551615, + 18446744073709551615, + 213, + 219, + 213, + 219, + 36, + 37, + true, + "object", + "object" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 12178341415895638602, + 7154652167856968436, + 18446744073709551615, + 18446744073709551615, + 237, + 240, + 237, + 240, + 40, + 41, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 8106398345633211267, + 17718037347447073081, + 18446744073709551615, + 18446744073709551615, + 244, + 251, + 244, + 251, + 42, + 43, + true, + "indices", + "indices" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 6169326768163434458, + 7636521745233070077, + 18446744073709551615, + 18446744073709551615, + 269, + 278, + 269, + 278, + 45, + 46, + true, + "neighbors", + "neighbors" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 329104162186494203, + 7406953761343574094, + 18446744073709551615, + 18446744073709551615, + 291, + 296, + 287, + 292, + 49, + 50, + true, + "edges", + "edges" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 14814125841683215315, + 12241347891873585821, + 18446744073709551615, + 18446744073709551615, + 323, + 331, + 319, + 327, + 55, + 56, + true, + "property", + "property" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 389609625621164460, + 15514573413788712138, + 18446744073709551615, + 18446744073709551615, + 339, + 343, + 335, + 339, + 58, + 59, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 329104161758737773, + 7410031061966183022, + 18446744073709551615, + 18446744073709551615, + 383, + 388, + 379, + 384, + 67, + 68, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 15995920061809434509, + 1660220959282764167, + 18446744073709551615, + 18446744073709551615, + 399, + 409, + 395, + 405, + 70, + 71, + true, + "identifier", + "identifier" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 329104162186494203, + 7406953761389627803, + 18446744073709551615, + 18446744073709551615, + 450, + 455, + 446, + 451, + 79, + 80, + true, + "edges", + "edges" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 389609625633315922, + 15508611803302681211, + 18446744073709551615, + 18446744073709551615, + 472, + 476, + 468, + 472, + 84, + 85, + true, + "list", + "list" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 16381206516227726330, + 7881028199436551027, + 18446744073709551615, + 18446744073709551615, + 495, + 501, + 491, + 497, + 87, + 88, + true, + "tuples", + "tuples" + ], + [ + "numval", + "ival", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 15441160910541481977, + 17073318187218057934, + 18446744073709551615, + 18446744073709551615, + 111, + 113, + 111, + 113, + 17, + 18, + true, + "13", + "13" + ], + [ + "expression", + "word-concatenation", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 12320233288833810715, + 13142345776581091864, + 18446744073709551615, + 18446744073709551615, + 71, + 88, + 71, + 88, + 12, + 13, + true, + "matrix-operations", + "matrix-operations" + ], + [ + "expression", + "word-concatenation", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 8759553427650775934, + 3946005403065402451, + 18446744073709551615, + 18446744073709551615, + 140, + 155, + 140, + 155, + 23, + 24, + true, + "graph-traversal", + "graph-traversal" + ], + [ + "sentence", + "", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 14752907807764014625, + 7112639404154086708, + 18446744073709551615, + 18446744073709551615, + 0, + 110, + 0, + 110, + 0, + 17, + true, + "It is commonly known that most graph operations can be translated into matrix-operations using linear algebra.", + "It is commonly known that most graph operations can be translated into matrix-operations using linear algebra." + ], + [ + "sentence", + "", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 7006429886037621508, + 125144349920919638, + 18446744073709551615, + 18446744073709551615, + 114, + 159, + 114, + 159, + 18, + 26, + true, + "For example, consider the graph-traversal V !", + "For example, consider the graph-traversal V !" + ], + [ + "sentence", + "", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 2226059187471967763, + 8569977295461945765, + 18446744073709551615, + 18446744073709551615, + 160, + 269, + 160, + 269, + 26, + 55, + true, + "A W, in which we start from a set of nodes V and traverse the edge A in order to obtain a new set of nodes W.", + "A W, in which we start from a set of nodes V and traverse the edge A in order to obtain a new set of nodes W." + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 20806960854514546, + 8439122538985534303, + 18446744073709551615, + 18446744073709551615, + 26, + 47, + 26, + 47, + 5, + 8, + true, + "most graph operations", + "most graph operations" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 11590149467838756247, + 4637729301611600179, + 18446744073709551615, + 18446744073709551615, + 95, + 109, + 95, + 109, + 14, + 16, + true, + "linear algebra", + "linear algebra" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 15112762946309336257, + 573858880113216858, + 18446744073709551615, + 18446744073709551615, + 140, + 157, + 140, + 157, + 23, + 25, + true, + "graph-traversal V", + "graph-traversal V" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 8106352617178756957, + 5831781823529690849, + 18446744073709551615, + 18446744073709551615, + 197, + 204, + 197, + 204, + 37, + 39, + true, + "nodes V", + "nodes V" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 8106342689900874417, + 3060939093264365538, + 18446744073709551615, + 18446744073709551615, + 250, + 257, + 250, + 257, + 49, + 51, + true, + "new set", + "new set" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 8106352617178756958, + 5831781823513588259, + 18446744073709551615, + 18446744073709551615, + 261, + 268, + 261, + 268, + 52, + 54, + true, + "nodes W", + "nodes W" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 11590149467838756247, + 4637729301611654414, + 18446744073709551615, + 18446744073709551615, + 307, + 321, + 307, + 321, + 61, + 63, + true, + "linear algebra", + "linear algebra" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 12320233288833810715, + 13142345776581091864, + 18446744073709551615, + 18446744073709551615, + 71, + 88, + 71, + 88, + 12, + 13, + true, + "matrix-operations", + "matrix-operations" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 8106397496085150773, + 13505373486844891217, + 18446744073709551615, + 18446744073709551615, + 118, + 125, + 118, + 125, + 19, + 20, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 12178341415895638602, + 6908352950519464398, + 18446744073709551615, + 18446744073709551615, + 190, + 193, + 190, + 193, + 35, + 36, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 389609625699630670, + 16553309401039496143, + 18446744073709551615, + 18446744073709551615, + 222, + 226, + 222, + 226, + 42, + 43, + true, + "edge", + "edge" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 329104161571401725, + 2466996076977359002, + 18446744073709551615, + 18446744073709551615, + 232, + 237, + 232, + 237, + 45, + 46, + true, + "order", + "order" + ], + [ + "numval", + "ival", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 17767354399704235161, + 16151623650567223960, + 18446744073709551615, + 18446744073709551615, + 36, + 37, + 36, + 37, + 7, + 8, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 17767354399704235160, + 16151623650470238720, + 18446744073709551615, + 18446744073709551615, + 53, + 54, + 53, + 54, + 13, + 14, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 17767354399704235162, + 16151623650448785184, + 18446744073709551615, + 18446744073709551615, + 67, + 68, + 67, + 68, + 18, + 19, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 15441160910541481788, + 6320979167967070076, + 18446744073709551615, + 18446744073709551615, + 80, + 82, + 80, + 82, + 21, + 21, + false, + "26", + "26" + ], + [ + "numval", + "ival", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 17767354399704235161, + 16151623650567217445, + 18446744073709551615, + 18446744073709551615, + 87, + 88, + 86, + 87, + 23, + 24, + true, + "1", + "1" + ], + [ + "expression", + "wtoken-concatenation", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 329104159258693175, + 15542061184572896869, + 18446744073709551615, + 18446744073709551615, + 2, + 9, + 2, + 9, + 1, + 2, + true, + "^{!}=", + "$^{!}$=" + ], + [ + "expression", + "wtoken-concatenation", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 5948620232447446819, + 6902872370209677045, + 18446744073709551615, + 18446744073709551615, + 22, + 35, + 22, + 35, + 6, + 7, + true, + "^{!}_{i}=", + "$^{!}$$_{i}$=" + ], + [ + "expression", + "wtoken-concatenation", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 7116489890516680880, + 17126918343435525249, + 18446744073709551615, + 18446744073709551615, + 73, + 83, + 73, + 83, + 21, + 22, + true, + "GLYPH", + "GLYPH" + ], + [ + "sentence", + "", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 4763538990111593484, + 7324065661356045772, + 18446744073709551615, + 18446744073709551615, + 2, + 14, + 2, + 14, + 1, + 4, + true, + "$^{!}$= Av !", + "$^{!}$= Av !" + ], + [ + "term", + "single-term", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 15441160910541480528, + 6320979252162665530, + 18446744073709551615, + 18446744073709551615, + 10, + 12, + 10, + 12, + 2, + 3, + true, + "Av", + "Av" + ], + [ + "term", + "single-term", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 5948620232447446819, + 6902872370209677045, + 18446744073709551615, + 18446744073709551615, + 22, + 35, + 22, + 35, + 6, + 7, + true, + "^{!}_{i}=", + "$^{!}$$_{i}$=" + ], + [ + "term", + "single-term", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 7116489890516680880, + 17126918343435525249, + 18446744073709551615, + 18446744073709551615, + 73, + 83, + 73, + 83, + 21, + 22, + true, + "GLYPH", + "GLYPH" + ], + [ + "term", + "single-term", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 17767354399704232782, + 16151733485621225349, + 18446744073709551615, + 18446744073709551615, + 89, + 91, + 88, + 89, + 24, + 25, + true, + "\u00de", + "\u00de" + ], + [ + "numval", + "ival", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 17767354399704235161, + 3519266004279806136, + 18446744073709551615, + 18446744073709551615, + 298, + 299, + 298, + 299, + 49, + 50, + true, + "1", + "1" + ], + [ + "parenthesis", + "reference", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 12178341415896395122, + 17624485535393633926, + 18446744073709551615, + 18446744073709551615, + 297, + 300, + 297, + 300, + 48, + 51, + true, + "(1)", + "(1)" + ], + [ + "expression", + "word-concatenation", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 10308187620027892234, + 14969691802076868346, + 18446744073709551615, + 18446744073709551615, + 87, + 103, + 87, + 103, + 15, + 16, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "expression", + "word-concatenation", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 8106398446669642199, + 15040166940957951725, + 18446744073709551615, + 18446744073709551615, + 223, + 230, + 223, + 230, + 35, + 36, + true, + "k-order", + "k-order" + ], + [ + "sentence", + "", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 3851804627813001865, + 13015440137812302191, + 18446744073709551615, + 18446744073709551615, + 9, + 67, + 9, + 67, + 2, + 13, + true, + "A being the adjacency matrix representation of the edge A.", + "A being the adjacency matrix representation of the edge A." + ], + [ + "sentence", + "", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 2032619489352425818, + 538991010858960620, + 18446744073709551615, + 18446744073709551615, + 68, + 195, + 68, + 195, + 13, + 29, + true, + "Translating single graph-traversals into linear algebra operations significantly simplifies the job of deeper graph traversals.", + "Translating single graph-traversals into linear algebra operations significantly simplifies the job of deeper graph traversals." + ], + [ + "term", + "single-term", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 4873481650009956064, + 15386058022656772733, + 18446744073709551615, + 18446744073709551615, + 21, + 52, + 21, + 52, + 5, + 8, + true, + "adjacency matrix representation", + "adjacency matrix representation" + ], + [ + "term", + "single-term", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 596744655414698488, + 8466256972977815768, + 18446744073709551615, + 18446744073709551615, + 80, + 103, + 80, + 103, + 14, + 16, + true, + "single graph-traversals", + "single graph-traversals" + ], + [ + "term", + "single-term", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 10234516777483182094, + 11037622281680419501, + 18446744073709551615, + 18446744073709551615, + 109, + 134, + 109, + 134, + 17, + 20, + true, + "linear algebra operations", + "linear algebra operations" + ], + [ + "term", + "single-term", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 2942854117731362049, + 8442434054030131933, + 18446744073709551615, + 18446744073709551615, + 178, + 194, + 178, + 194, + 26, + 28, + true, + "graph traversals", + "graph traversals" + ], + [ + "term", + "single-term", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 4946368908371952460, + 5455430373091082826, + 18446744073709551615, + 18446744073709551615, + 223, + 243, + 223, + 243, + 35, + 37, + true, + "k-order neighborhood", + "k-order neighborhood" + ], + [ + "term", + "single-term", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 8106397710734548391, + 7348922376317937116, + 18446744073709551615, + 18446744073709551615, + 301, + 308, + 301, + 308, + 51, + 53, + true, + "k times", + "k times" + ], + [ + "term", + "single-term", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 389609625699630670, + 11520602034870783404, + 18446744073709551615, + 18446744073709551615, + 60, + 64, + 60, + 64, + 10, + 11, + true, + "edge", + "edge" + ], + [ + "term", + "single-term", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 12178341415895642350, + 17624314687733832779, + 18446744073709551615, + 18446744073709551615, + 164, + 167, + 164, + 167, + 23, + 24, + true, + "job", + "job" + ], + [ + "term", + "single-term", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 8106397496085150773, + 2498148866898256562, + 18446744073709551615, + 18446744073709551615, + 200, + 207, + 200, + 207, + 30, + 31, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 389609625621164460, + 11520212110934286375, + 18446744073709551615, + 18446744073709551615, + 247, + 251, + 247, + 251, + 38, + 39, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 14650266729725885817, + 73079070439285428, + 18446744073709551615, + 18446744073709551615, + 288, + 296, + 288, + 296, + 47, + 48, + true, + "Equation", + "Equation" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 15441160910541481860, + 1648917876881521913, + 18446744073709551615, + 18446744073709551615, + 46, + 48, + 44, + 46, + 8, + 8, + false, + "16", + "16" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 15441160910541481861, + 1648917874734449247, + 18446744073709551615, + 18446744073709551615, + 56, + 58, + 54, + 56, + 8, + 8, + false, + "17", + "17" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 15441160910541481860, + 1648917876881497232, + 18446744073709551615, + 18446744073709551615, + 67, + 69, + 65, + 67, + 9, + 9, + false, + "16", + "16" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 15441160910541481861, + 1648917874734455276, + 18446744073709551615, + 18446744073709551615, + 78, + 80, + 76, + 78, + 10, + 10, + false, + "17", + "17" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 15441160910541481860, + 1648917876881494293, + 18446744073709551615, + 18446744073709551615, + 89, + 91, + 87, + 89, + 11, + 11, + false, + "16", + "16" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 15441160910541481861, + 1648917874734450570, + 18446744073709551615, + 18446744073709551615, + 100, + 102, + 98, + 100, + 12, + 12, + false, + "17", + "17" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 17767354399704235162, + 10344599291481597093, + 18446744073709551615, + 18446744073709551615, + 109, + 110, + 106, + 107, + 15, + 16, + true, + "2", + "2" + ], + [ + "expression", + "wtoken-concatenation", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 329104159258693175, + 2252135346473627204, + 18446744073709551615, + 18446744073709551615, + 2, + 9, + 2, + 9, + 1, + 2, + true, + "^{!}=", + "$^{!}$=" + ], + [ + "expression", + "wtoken-concatenation", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 16381206533995544860, + 4408889735966271730, + 18446744073709551615, + 18446744073709551615, + 10, + 18, + 10, + 18, + 2, + 3, + true, + "A^{k}v", + "A$^{k}$v" + ], + [ + "expression", + "wtoken-concatenation", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 329104159258693175, + 2252135346473624072, + 18446744073709551615, + 18446744073709551615, + 19, + 26, + 19, + 26, + 3, + 4, + true, + "^{!}=", + "$^{!}$=" + ], + [ + "expression", + "wtoken-concatenation", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 3078199901586211847, + 17286928490334208994, + 18446744073709551615, + 18446744073709551615, + 39, + 59, + 37, + 57, + 8, + 9, + true, + "GLYPHGLYPH", + "GLYPHGLYPH" + ], + [ + "expression", + "wtoken-concatenation", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 7116489890516676705, + 2094556379511614885, + 18446744073709551615, + 18446744073709551615, + 60, + 70, + 58, + 68, + 9, + 10, + true, + "GLYPH", + "GLYPH" + ], + [ + "expression", + "wtoken-concatenation", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 7116489890516677026, + 2094556361631692442, + 18446744073709551615, + 18446744073709551615, + 71, + 81, + 69, + 79, + 10, + 11, + true, + "GLYPH", + "GLYPH" + ], + [ + "expression", + "wtoken-concatenation", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 7116489890516676705, + 2094556379511615262, + 18446744073709551615, + 18446744073709551615, + 82, + 92, + 80, + 90, + 11, + 12, + true, + "GLYPH", + "GLYPH" + ], + [ + "expression", + "wtoken-concatenation", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 7116489890516677026, + 2094556361631691128, + 18446744073709551615, + 18446744073709551615, + 93, + 103, + 91, + 101, + 12, + 13, + true, + "GLYPH", + "GLYPH" + ], + [ + "sentence", + "", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 17097266637859369288, + 13058552981961095366, + 18446744073709551615, + 18446744073709551615, + 2, + 38, + 2, + 36, + 1, + 8, + true, + "$^{!}$= A$^{k}$v $^{!}$= AA \u2026 Av !", + "$^{!}$= A$^{k}$v $^{!}$= AA \u2026 Av !" + ], + [ + "numval", + "fval", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 329104147748297973, + 7115759532919018249, + 18446744073709551615, + 18446744073709551615, + 503, + 508, + 503, + 508, + 93, + 94, + true, + "15,16", + "15,16" + ], + [ + "numval", + "ival", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 17767354399704235161, + 18186955703423630693, + 18446744073709551615, + 18446744073709551615, + 76, + 77, + 76, + 77, + 13, + 14, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 17767354399704235161, + 18186955703423585534, + 18446744073709551615, + 18446744073709551615, + 265, + 266, + 265, + 266, + 45, + 46, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 17767354399704235161, + 18186955703423582023, + 18446744073709551615, + 18446744073709551615, + 372, + 373, + 372, + 373, + 69, + 70, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 17767354399704235161, + 18186955703423684934, + 18446744073709551615, + 18446744073709551615, + 681, + 682, + 681, + 682, + 121, + 122, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 17767354399704235162, + 18186955704176922280, + 18446744073709551615, + 18446744073709551615, + 776, + 777, + 776, + 777, + 142, + 143, + true, + "2", + "2" + ], + [ + "parenthesis", + "reference", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12178341415896395122, + 15809942743487463376, + 18446744073709551615, + 18446744073709551615, + 75, + 78, + 75, + 78, + 12, + 15, + true, + "(1)", + "(1)" + ], + [ + "parenthesis", + "reference", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12178341415896395122, + 15809942743487507881, + 18446744073709551615, + 18446744073709551615, + 264, + 267, + 264, + 267, + 44, + 47, + true, + "(1)", + "(1)" + ], + [ + "parenthesis", + "reference", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12178341415896395122, + 15809942743487478174, + 18446744073709551615, + 18446744073709551615, + 371, + 374, + 371, + 374, + 68, + 71, + true, + "(1)", + "(1)" + ], + [ + "parenthesis", + "reference", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12178341415896395122, + 15809942743487485425, + 18446744073709551615, + 18446744073709551615, + 680, + 683, + 680, + 683, + 120, + 123, + true, + "(1)", + "(1)" + ], + [ + "parenthesis", + "round brackets", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 16380808301996821998, + 15698441970854417697, + 18446744073709551615, + 18446744073709551615, + 431, + 437, + 431, + 437, + 80, + 83, + true, + "(SpMV)", + "(SpMV)" + ], + [ + "expression", + "word-concatenation", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 369269646322723959, + 3452033464673941326, + 18446744073709551615, + 18446744073709551615, + 532, + 548, + 532, + 548, + 98, + 99, + true, + "graph-analytical", + "graph-analytical" + ], + [ + "expression", + "word-concatenation", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 6184977925120246739, + 3173903523454537724, + 18446744073709551615, + 18446744073709551615, + 626, + 635, + 626, + 635, + 112, + 113, + true, + "page-rank", + "page-rank" + ], + [ + "expression", + "word-concatenation", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 369269646322723959, + 3452033464673955722, + 18446744073709551615, + 18446744073709551615, + 821, + 837, + 821, + 837, + 151, + 152, + true, + "graph-analytical", + "graph-analytical" + ], + [ + "expression", + "latex", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 389609625699793568, + 8079745072465029660, + 18446744073709551615, + 18446744073709551615, + 746, + 752, + 746, + 752, + 135, + 136, + true, + "^{!}", + "$^{!}$" + ], + [ + "sentence", + "", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 5775536069845081632, + 13168819924872490931, + 18446744073709551615, + 18446744073709551615, + 0, + 108, + 0, + 108, + 0, + 20, + true, + "Therefore, deep queries can be implemented efficiently as long as Equation (1) can be evaluated efficiently.", + "Therefore, deep queries can be implemented efficiently as long as Equation (1) can be evaluated efficiently." + ], + [ + "sentence", + "", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 18120357783789340975, + 10586026389391004299, + 18446744073709551615, + 18446744073709551615, + 109, + 293, + 109, + 293, + 20, + 53, + true, + "Over the past decades, lots of research has been conducted in the High Performance Computing community on the acceleration and parallelization of Equation (1) in the context of graphs.", + "Over the past decades, lots of research has been conducted in the High Performance Computing community on the acceleration and parallelization of Equation (1) in the context of graphs." + ], + [ + "sentence", + "", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 14902507466498554187, + 4746952862969696045, + 18446744073709551615, + 18446744073709551615, + 294, + 502, + 294, + 502, + 53, + 93, + true, + "In this context, the matrix A is sparse and the linear operation of Equation (1) is referred to as a sparse matrix vector multiplication (SpMV), for which highly optimized implementations have been developed.", + "In this context, the matrix A is sparse and the linear operation of Equation (1) is referred to as a sparse matrix vector multiplication (SpMV), for which highly optimized implementations have been developed." + ], + [ + "sentence", + "", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 972796118155588075, + 768003519665868807, + 18446744073709551615, + 18446744073709551615, + 509, + 600, + 509, + 600, + 94, + 107, + true, + "Notably, most advanced graph-analytical operations can be formulated using SpMV operations.", + "Notably, most advanced graph-analytical operations can be formulated using SpMV operations." + ], + [ + "sentence", + "", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 9907555007325257702, + 14048835156430871966, + 18446744073709551615, + 18446744073709551615, + 601, + 731, + 601, + 731, + 107, + 131, + true, + "The most trivial case is page-rank, in which one recursively executes Equation (1) in combination with a renormalization until w !", + "The most trivial case is page-rank, in which one recursively executes Equation (1) in combination with a renormalization until w !" + ], + [ + "sentence", + "", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 8106340349709889698, + 8414473377762557457, + 18446744073709551615, + 18446744073709551615, + 746, + 753, + 746, + 753, + 135, + 137, + true, + "$^{!}$.", + "$^{!}$." + ], + [ + "sentence", + "", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12894202728737595682, + 16352149120081517995, + 18446744073709551615, + 18446744073709551615, + 754, + 960, + 754, + 960, + 137, + 172, + true, + "In our previous work, 2 we have also shown in detail that advanced graph-analytical operations such as node centralities and spectral analysis of the graph can be done effectively with only SpMV operations.", + "In our previous work, 2 we have also shown in detail that advanced graph-analytical operations such as node centralities and spectral analysis of the graph can be done effectively with only SpMV operations." + ], + [ + "term", + "enum-term-mark-2", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 11536514477790993181, + 1172284701792082182, + 18446744073709551615, + 18446744073709551615, + 219, + 251, + 219, + 251, + 39, + 42, + true, + "acceleration and parallelization", + "acceleration and parallelization" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 7076268937724050913, + 16904614497174539564, + 18446744073709551615, + 18446744073709551615, + 11, + 23, + 11, + 23, + 2, + 4, + true, + "deep queries", + "deep queries" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 16264934656264211635, + 4511209860311708157, + 18446744073709551615, + 18446744073709551615, + 118, + 130, + 118, + 130, + 22, + 24, + true, + "past decades", + "past decades" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 9489326438360715521, + 16290129180775347077, + 18446744073709551615, + 18446744073709551615, + 175, + 211, + 175, + 211, + 33, + 37, + true, + "High Performance Computing community", + "High Performance Computing community" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 11302648084487719921, + 15524726029551673233, + 18446744073709551615, + 18446744073709551615, + 342, + 358, + 342, + 358, + 64, + 66, + true, + "linear operation", + "linear operation" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 15221837707742504836, + 13831002432772278526, + 18446744073709551615, + 18446744073709551615, + 395, + 430, + 395, + 430, + 76, + 80, + true, + "sparse matrix vector multiplication", + "sparse matrix vector multiplication" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 14322965921221919498, + 11473013839699150580, + 18446744073709551615, + 18446744073709551615, + 523, + 559, + 523, + 559, + 97, + 100, + true, + "advanced graph-analytical operations", + "advanced graph-analytical operations" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 4304811534835870538, + 1394572827254744857, + 18446744073709551615, + 18446744073709551615, + 584, + 599, + 584, + 599, + 104, + 106, + true, + "SpMV operations", + "SpMV operations" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 3064129899577325203, + 17940686330604280218, + 18446744073709551615, + 18446744073709551615, + 610, + 622, + 610, + 622, + 109, + 111, + true, + "trivial case", + "trivial case" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12580512760652482076, + 13583953375349869788, + 18446744073709551615, + 18446744073709551615, + 761, + 774, + 761, + 774, + 139, + 141, + true, + "previous work", + "previous work" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 9852550978467154499, + 1277807238982273312, + 18446744073709551615, + 18446744073709551615, + 821, + 848, + 821, + 848, + 151, + 153, + true, + "graph-analytical operations", + "graph-analytical operations" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 4977542118862070209, + 12350177894838464292, + 18446744073709551615, + 18446744073709551615, + 857, + 874, + 857, + 874, + 155, + 157, + true, + "node centralities", + "node centralities" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 9079575722732701095, + 13885675971977617571, + 18446744073709551615, + 18446744073709551615, + 879, + 896, + 879, + 896, + 158, + 160, + true, + "spectral analysis", + "spectral analysis" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 10146385311913577445, + 3881716146591211694, + 18446744073709551615, + 18446744073709551615, + 939, + 959, + 939, + 959, + 168, + 171, + true, + "only SpMV operations", + "only SpMV operations" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 14650266729725885817, + 10065410986654176293, + 18446744073709551615, + 18446744073709551615, + 66, + 74, + 66, + 74, + 11, + 12, + true, + "Equation", + "Equation" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 389609625633531007, + 8072331032130786374, + 18446744073709551615, + 18446744073709551615, + 132, + 136, + 132, + 136, + 25, + 26, + true, + "lots", + "lots" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 14634109233387695059, + 8230722811753480240, + 18446744073709551615, + 18446744073709551615, + 140, + 148, + 140, + 148, + 27, + 28, + true, + "research", + "research" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 7389039184143186430, + 4020754544621133412, + 18446744073709551615, + 18446744073709551615, + 219, + 231, + 219, + 231, + 39, + 40, + true, + "acceleration", + "acceleration" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 4992630253996742003, + 9350224994567595782, + 18446744073709551615, + 18446744073709551615, + 236, + 251, + 236, + 251, + 41, + 42, + true, + "parallelization", + "parallelization" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 14650266729725885817, + 10065410986654222038, + 18446744073709551615, + 18446744073709551615, + 255, + 263, + 255, + 263, + 43, + 44, + true, + "Equation", + "Equation" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 8106398484416909789, + 6445237109781063638, + 18446744073709551615, + 18446744073709551615, + 275, + 282, + 275, + 282, + 49, + 50, + true, + "context", + "context" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 16381206539879417749, + 14212076125635256438, + 18446744073709551615, + 18446744073709551615, + 286, + 292, + 286, + 292, + 51, + 52, + true, + "graphs", + "graphs" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 8106398484416909789, + 6445237109781064988, + 18446744073709551615, + 18446744073709551615, + 302, + 309, + 302, + 309, + 55, + 56, + true, + "context", + "context" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 16381206594266103973, + 11886141757564266520, + 18446744073709551615, + 18446744073709551615, + 315, + 321, + 315, + 321, + 58, + 59, + true, + "matrix", + "matrix" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 14650266729725885817, + 10065410986654222955, + 18446744073709551615, + 18446744073709551615, + 362, + 370, + 362, + 370, + 67, + 68, + true, + "Equation", + "Equation" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 389609625540553279, + 8138653950944491239, + 18446744073709551615, + 18446744073709551615, + 432, + 436, + 432, + 436, + 81, + 82, + true, + "SpMV", + "SpMV" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 5211900619579820608, + 5678836820111735314, + 18446744073709551615, + 18446744073709551615, + 466, + 481, + 466, + 481, + 88, + 89, + true, + "implementations", + "implementations" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 14650266729725885817, + 10065410986654148478, + 18446744073709551615, + 18446744073709551615, + 671, + 679, + 671, + 679, + 119, + 120, + true, + "Equation", + "Equation" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 2989796650905950968, + 6786698869169380411, + 18446744073709551615, + 18446744073709551615, + 687, + 698, + 687, + 698, + 124, + 125, + true, + "combination", + "combination" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 18007575068397390542, + 14744607835693282246, + 18446744073709551615, + 18446744073709551615, + 706, + 721, + 706, + 721, + 127, + 128, + true, + "renormalization", + "renormalization" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 16381206568246674273, + 11429613659753629348, + 18446744073709551615, + 18446744073709551615, + 800, + 806, + 800, + 806, + 148, + 149, + true, + "detail", + "detail" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 329104159211247965, + 9136957105756070646, + 18446744073709551615, + 18446744073709551615, + 904, + 909, + 904, + 909, + 162, + 163, + true, + "graph", + "graph" + ], + [ + "sentence", + "", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 13884895358995816532, + 10671792459590835108, + 18446744073709551615, + 18446744073709551615, + 0, + 204, + 0, + 204, + 0, + 37, + true, + "Since both deep queries and advanced graph analytics hugely benefit from a fast SpMV kernel, we have opted to design the graph engine in the CPS platform to work entirely with the adjacency matrix format.", + "Since both deep queries and advanced graph analytics hugely benefit from a fast SpMV kernel, we have opted to design the graph engine in the CPS platform to work entirely with the adjacency matrix format." + ], + [ + "term", + "single-term", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 7076268937724050913, + 14136229895878741561, + 18446744073709551615, + 18446744073709551615, + 11, + 23, + 11, + 23, + 2, + 4, + true, + "deep queries", + "deep queries" + ], + [ + "term", + "single-term", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 1325639643510008878, + 3345783597167709430, + 18446744073709551615, + 18446744073709551615, + 28, + 52, + 28, + 52, + 5, + 8, + true, + "advanced graph analytics", + "advanced graph analytics" + ], + [ + "term", + "single-term", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 13973298705492850553, + 16427562040426690599, + 18446744073709551615, + 18446744073709551615, + 75, + 91, + 75, + 91, + 12, + 15, + true, + "fast SpMV kernel", + "fast SpMV kernel" + ], + [ + "term", + "single-term", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 2924972194163802578, + 13928333483376329414, + 18446744073709551615, + 18446744073709551615, + 121, + 133, + 121, + 133, + 22, + 24, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 12779036928191531604, + 14066357852666934661, + 18446744073709551615, + 18446744073709551615, + 141, + 153, + 141, + 153, + 26, + 28, + true, + "CPS platform", + "CPS platform" + ], + [ + "term", + "single-term", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 17729840004664227381, + 11831936904412939564, + 18446744073709551615, + 18446744073709551615, + 180, + 203, + 180, + 203, + 33, + 36, + true, + "adjacency matrix format", + "adjacency matrix format" + ], + [ + "numval", + "fval", + 15385417954505503552, + "TEXT", + "#/texts/86", + 1.0, + 12178341415896435199, + 16109275631913765862, + 18446744073709551615, + 18446744073709551615, + 0, + 3, + 0, + 3, + 0, + 1, + true, + "3.2", + "3.2" + ], + [ + "numval", + "ival", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 15441160910541481861, + 93251422791520216, + 18446744073709551615, + 18446744073709551615, + 1214, + 1216, + 1214, + 1216, + 194, + 195, + true, + "17", + "17" + ], + [ + "parenthesis", + "round brackets", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 12519231069705186881, + 3244334725177660806, + 18446744073709551615, + 18446744073709551615, + 488, + 501, + 488, + 501, + 74, + 79, + true, + "(or even all)", + "(or even all)" + ], + [ + "parenthesis", + "round brackets", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 14654336952868801491, + 17810856186567143228, + 18446744073709551615, + 18446744073709551615, + 1024, + 1032, + 1024, + 1032, + 158, + 161, + true, + "(SpMSpV)", + "(SpMSpV)" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 4206645798095581420, + 12345799280851160234, + 18446744073709551615, + 18446744073709551615, + 35, + 49, + 35, + 49, + 5, + 6, + true, + "matrices-based", + "matrices-based" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16287604968962194110, + 17739614045833498641, + 18446744073709551615, + 18446744073709551615, + 166, + 178, + 166, + 178, + 23, + 24, + true, + "node-centric", + "node-centric" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8248539907401009667, + 2265519894835621749, + 18446744073709551615, + 18446744073709551615, + 208, + 221, + 208, + 221, + 28, + 29, + true, + "data-locality", + "data-locality" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 10308187620027892234, + 10064995642940078062, + 18446744073709551615, + 18446744073709551615, + 452, + 468, + 452, + 468, + 70, + 71, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 10308187620027892234, + 10064995642940158702, + 18446744073709551615, + 18446744073709551615, + 709, + 725, + 709, + 725, + 114, + 115, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 10308187620027892234, + 10064995642940178589, + 18446744073709551615, + 18446744073709551615, + 915, + 931, + 915, + 931, + 144, + 145, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 4747238687892740263, + 10797718497119362743, + 18446744073709551615, + 18446744073709551615, + 981, + 994, + 981, + 994, + 155, + 156, + true, + "sparse-matrix", + "sparse-matrix" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 14404307070545181542, + 7498659441809181561, + 18446744073709551615, + 18446744073709551615, + 1152, + 1164, + 1152, + 1164, + 183, + 184, + true, + "cache-misses", + "cache-misses" + ], + [ + "expression", + "wtoken-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 4747241205902184445, + 17195728821214094844, + 18446744073709551615, + 18446744073709551615, + 995, + 1008, + 995, + 1008, + 156, + 157, + true, + "sparse-vector", + "sparse-vector" + ], + [ + "sentence", + "", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 10270848120294375051, + 14807652315789881244, + 18446744073709551615, + 18446744073709551615, + 0, + 115, + 0, + 115, + 0, + 14, + true, + "Both adjacency lists and adjacency matrices-based graph implementations have specific advantages and disadvantages.", + "Both adjacency lists and adjacency matrices-based graph implementations have specific advantages and disadvantages." + ], + [ + "sentence", + "", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16964777468013118327, + 12125593630312832079, + 18446744073709551615, + 18446744073709551615, + 116, + 281, + 116, + 281, + 14, + 40, + true, + "The adjacency list format is very well suited for node-centric operations since it exploits data-locality for local graph operations, such as first order traversals.", + "The adjacency list format is very well suited for node-centric operations since it exploits data-locality for local graph operations, such as first order traversals." + ], + [ + "sentence", + "", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 15450473824681718350, + 11316135833913700142, + 18446744073709551615, + 18446744073709551615, + 282, + 416, + 282, + 416, + 40, + 63, + true, + "However, it proves suboptimal for global scale graph operations, which are required for deep queries and the advanced graph analytics.", + "However, it proves suboptimal for global scale graph operations, which are required for deep queries and the advanced graph analytics." + ], + [ + "sentence", + "", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 7827972506356892881, + 14520623047921494047, + 18446744073709551615, + 18446744073709551615, + 417, + 559, + 417, + 559, + 63, + 89, + true, + "Here, one typically has to perform graph-traversals starting from many (or even all) nodes and accumulating the weight in the resulting nodes.", + "Here, one typically has to perform graph-traversals starting from many (or even all) nodes and accumulating the weight in the resulting nodes." + ], + [ + "sentence", + "", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 4011742133887184242, + 11488487201968882536, + 18446744073709551615, + 18446744073709551615, + 560, + 674, + 560, + 674, + 89, + 110, + true, + "In an adjacency list format, this often leads to many cache misses during execution, resulting in low performance.", + "In an adjacency list format, this often leads to many cache misses during execution, resulting in low performance." + ], + [ + "sentence", + "", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 17312495112187822630, + 6156571435774831825, + 18446744073709551615, + 18446744073709551615, + 675, + 842, + 675, + 842, + 110, + 131, + true, + "Furthermore, parallelizing global graph-traversals in the adjacency list format suffers significantly from concurrent write conflicts between threads during execution.", + "Furthermore, parallelizing global graph-traversals in the adjacency list format suffers significantly from concurrent write conflicts between threads during execution." + ], + [ + "sentence", + "", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 4883310113983762862, + 957944940518831119, + 18446744073709551615, + 18446744073709551615, + 843, + 910, + 843, + 910, + 131, + 143, + true, + "In the adjacency matrix format, these problems are not encountered.", + "In the adjacency matrix format, these problems are not encountered." + ], + [ + "sentence", + "", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 18351971490013416011, + 4499762574647525101, + 18446744073709551615, + 18446744073709551615, + 911, + 1033, + 911, + 1033, + 143, + 162, + true, + "The graph-traversals can be directly translated into a SpMV or even a sparse-matrix sparse-vector multiplication (SpMSpV).", + "The graph-traversals can be directly translated into a SpMV or even a sparse-matrix sparse-vector multiplication (SpMSpV)." + ], + [ + "sentence", + "", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 3718840373702004760, + 15287027377710879857, + 18446744073709551615, + 18446744073709551615, + 1034, + 1213, + 1034, + 1213, + 162, + 194, + true, + "It has also been well established how to execute the SpMV effectively in a multithreaded fashion, and how to minimize cache-misses by applying a clever sorting of the tuples list.", + "It has also been well established how to execute the SpMV effectively in a multithreaded fashion, and how to minimize cache-misses by applying a clever sorting of the tuples list." + ], + [ + "term", + "enum-term-mark-3", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 13909108517714618598, + 9720415877503432059, + 18446744073709551615, + 18446744073709551615, + 86, + 114, + 86, + 114, + 10, + 13, + true, + "advantages and disadvantages", + "advantages and disadvantages" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 3120046212755594191, + 2344204684828230349, + 18446744073709551615, + 18446744073709551615, + 5, + 20, + 5, + 20, + 1, + 3, + true, + "adjacency lists", + "adjacency lists" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 17793847285598996650, + 12421205174358986052, + 18446744073709551615, + 18446744073709551615, + 25, + 71, + 25, + 71, + 4, + 8, + true, + "adjacency matrices-based graph implementations", + "adjacency matrices-based graph implementations" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 11527770354380822892, + 1525925456019954586, + 18446744073709551615, + 18446744073709551615, + 77, + 96, + 77, + 96, + 9, + 11, + true, + "specific advantages", + "specific advantages" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8120096385401382748, + 7910554425673840453, + 18446744073709551615, + 18446744073709551615, + 120, + 141, + 120, + 141, + 15, + 18, + true, + "adjacency list format", + "adjacency list format" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 5874885026837231955, + 4072122117926610489, + 18446744073709551615, + 18446744073709551615, + 166, + 189, + 166, + 189, + 23, + 25, + true, + "node-centric operations", + "node-centric operations" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 4924931942798985710, + 12826410515891443201, + 18446744073709551615, + 18446744073709551615, + 226, + 248, + 226, + 248, + 30, + 33, + true, + "local graph operations", + "local graph operations" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 1107787356815021239, + 15363999495061554235, + 18446744073709551615, + 18446744073709551615, + 258, + 280, + 258, + 280, + 36, + 39, + true, + "first order traversals", + "first order traversals" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 1457142279144648781, + 1447675611503317247, + 18446744073709551615, + 18446744073709551615, + 316, + 345, + 316, + 345, + 46, + 50, + true, + "global scale graph operations", + "global scale graph operations" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 7076268937724050913, + 4243200811936881452, + 18446744073709551615, + 18446744073709551615, + 370, + 382, + 370, + 382, + 55, + 57, + true, + "deep queries", + "deep queries" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 1325639643510008878, + 2772141157715748489, + 18446744073709551615, + 18446744073709551615, + 391, + 415, + 391, + 415, + 59, + 62, + true, + "advanced graph analytics", + "advanced graph analytics" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8120096385401382748, + 7910554425673711036, + 18446744073709551615, + 18446744073709551615, + 566, + 587, + 566, + 587, + 91, + 94, + true, + "adjacency list format", + "adjacency list format" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 14217562351589216718, + 11354169541202862310, + 18446744073709551615, + 18446744073709551615, + 609, + 626, + 609, + 626, + 99, + 102, + true, + "many cache misses", + "many cache misses" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16554996714335964809, + 1433800992424113617, + 18446744073709551615, + 18446744073709551615, + 658, + 673, + 658, + 673, + 107, + 109, + true, + "low performance", + "low performance" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 10574476198529132526, + 11121077699650702677, + 18446744073709551615, + 18446744073709551615, + 702, + 725, + 702, + 725, + 113, + 115, + true, + "global graph-traversals", + "global graph-traversals" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8120096385401382748, + 7910554425673690012, + 18446744073709551615, + 18446744073709551615, + 733, + 754, + 733, + 754, + 117, + 120, + true, + "adjacency list format", + "adjacency list format" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 2315331045020242414, + 377342086051294718, + 18446744073709551615, + 18446744073709551615, + 782, + 808, + 782, + 808, + 123, + 126, + true, + "concurrent write conflicts", + "concurrent write conflicts" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 17729840004664227381, + 4716030849938756040, + 18446744073709551615, + 18446744073709551615, + 850, + 873, + 850, + 873, + 133, + 136, + true, + "adjacency matrix format", + "adjacency matrix format" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 9199204757600564117, + 7993466816880230708, + 18446744073709551615, + 18446744073709551615, + 981, + 1023, + 981, + 1023, + 155, + 158, + true, + "sparse-matrix sparse-vector multiplication", + "sparse-matrix sparse-vector multiplication" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16579222348944913804, + 16370295638287544198, + 18446744073709551615, + 18446744073709551615, + 1109, + 1130, + 1109, + 1130, + 176, + 178, + true, + "multithreaded fashion", + "multithreaded fashion" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 11359951565081966931, + 2519317996116724135, + 18446744073709551615, + 18446744073709551615, + 1179, + 1193, + 1179, + 1193, + 187, + 189, + true, + "clever sorting", + "clever sorting" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 15699951717655812132, + 7744836953142101576, + 18446744073709551615, + 18446744073709551615, + 1201, + 1212, + 1201, + 1212, + 191, + 193, + true, + "tuples list", + "tuples list" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 13058502189641135024, + 7914974336984974505, + 18446744073709551615, + 18446744073709551615, + 101, + 114, + 101, + 114, + 12, + 13, + true, + "disadvantages", + "disadvantages" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8248539907401009667, + 2265519894835621749, + 18446744073709551615, + 18446744073709551615, + 208, + 221, + 208, + 221, + 28, + 29, + true, + "data-locality", + "data-locality" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 10308187620027892234, + 10064995642940078062, + 18446744073709551615, + 18446744073709551615, + 452, + 468, + 452, + 468, + 70, + 71, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 329104161758737773, + 17985157174507178569, + 18446744073709551615, + 18446744073709551615, + 502, + 507, + 502, + 507, + 79, + 80, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16381206557786164800, + 7873023782251793662, + 18446744073709551615, + 18446744073709551615, + 529, + 535, + 529, + 535, + 83, + 84, + true, + "weight", + "weight" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 329104161758737773, + 17985157174507501037, + 18446744073709551615, + 18446744073709551615, + 553, + 558, + 553, + 558, + 87, + 88, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 6168355606348623882, + 17222187151520418992, + 18446744073709551615, + 18446744073709551615, + 634, + 643, + 634, + 643, + 103, + 104, + true, + "execution", + "execution" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8106478041490969672, + 12943647009928962637, + 18446744073709551615, + 18446744073709551615, + 817, + 824, + 817, + 824, + 127, + 128, + true, + "threads", + "threads" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 6168355606348623882, + 17222187151520399878, + 18446744073709551615, + 18446744073709551615, + 832, + 841, + 832, + 841, + 129, + 130, + true, + "execution", + "execution" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 14814125877433299736, + 17739915814128640735, + 18446744073709551615, + 18446744073709551615, + 881, + 889, + 881, + 889, + 138, + 139, + true, + "problems", + "problems" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 10308187620027892234, + 10064995642940178589, + 18446744073709551615, + 18446744073709551615, + 915, + 931, + 915, + 931, + 144, + 145, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 389609625540553279, + 16811944284857720705, + 18446744073709551615, + 18446744073709551615, + 966, + 970, + 966, + 970, + 151, + 152, + true, + "SpMV", + "SpMV" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16381206514525102695, + 15744992214345462802, + 18446744073709551615, + 18446744073709551615, + 1025, + 1031, + 1025, + 1031, + 159, + 160, + true, + "SpMSpV", + "SpMSpV" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 389609625540553279, + 16811944284857728765, + 18446744073709551615, + 18446744073709551615, + 1087, + 1091, + 1087, + 1091, + 172, + 173, + true, + "SpMV", + "SpMV" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 14404307070545181542, + 7498659441809181561, + 18446744073709551615, + 18446744073709551615, + 1152, + 1164, + 1152, + 1164, + 183, + 184, + true, + "cache-misses", + "cache-misses" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 389609625548777262, + 8826555294676663632, + 18446744073709551615, + 18446744073709551615, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 389609625548777251, + 8826555296349648778, + 18446744073709551615, + 18446744073709551615, + 119, + 123, + 119, + 123, + 14, + 14, + false, + "2023", + "2023" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 8104408072666212335, + 13552219042525319352, + 18446744073709551615, + 18446744073709551615, + 71, + 78, + 71, + 78, + 8, + 8, + false, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 389609625548868096, + 8826558551385119058, + 18446744073709551615, + 18446744073709551615, + 82, + 86, + 82, + 86, + 8, + 9, + false, + "2.20", + "2.20" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 14654386914267794441, + 12796143052106760105, + 18446744073709551615, + 18446744073709551615, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 17767354399704235162, + 7753390158484899261, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 15441160910541481791, + 3518619573290839093, + 18446744073709551615, + 18446744073709551615, + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 15441160910541481543, + 3518617976696906498, + 18446744073709551615, + 18446744073709551615, + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 8536069645534292969, + 16063604623463467342, + 18446744073709551615, + 18446744073709551615, + 35, + 87, + 35, + 87, + 8, + 10, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 594099663775968682, + 14698211805947073928, + 18446744073709551615, + 18446744073709551615, + 156, + 208, + 156, + 208, + 22, + 37, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 1697220653346092555, + 8458710314769009562, + 18446744073709551615, + 18446744073709551615, + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 12213187056216195918, + 14309760985361468471, + 18446744073709551615, + 18446744073709551615, + 155, + 209, + 155, + 209, + 21, + 38, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 3856967589249015473, + 3576147774941915841, + 18446744073709551615, + 18446744073709551615, + 35, + 86, + 35, + 86, + 8, + 9, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "sentence", + "", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 10933383461306782608, + 10178418358179275356, + 18446744073709551615, + 18446744073709551615, + 19, + 125, + 19, + 125, + 6, + 16, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 12466457873768409517, + 3430070082404029638, + 18446744073709551615, + 18446744073709551615, + 88, + 108, + 88, + 108, + 10, + 13, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 12466457873768409517, + 3430070082403846184, + 18446744073709551615, + 18446744073709551615, + 213, + 233, + 213, + 233, + 39, + 42, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 10086796047802705645, + 11637015082128438412, + 18446744073709551615, + 18446744073709551615, + 252, + 263, + 252, + 263, + 47, + 49, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 6687370681685741393, + 17939310132506951168, + 18446744073709551615, + 18446744073709551615, + 284, + 319, + 284, + 319, + 53, + 57, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 329104161846385964, + 16017248647642597247, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 18, + 19, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 969969168017005656, + 2961182532179915323, + 18446744073709551615, + 18446744073709551615, + 144, + 154, + 144, + 154, + 20, + 21, + true, + "Conditions", + "Conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 329104161825278214, + 16021621362593374209, + 18446744073709551615, + 18446744073709551615, + 238, + 243, + 238, + 243, + 43, + 44, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 12178341415895516060, + 12061595171928625555, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 45, + 46, + true, + "use", + "use" + ], + [ + "numval", + "fval", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415896427344, + 10294451467892719516, + 18446744073709551615, + 18446744073709551615, + 302, + 305, + 294, + 297, + 57, + 58, + true, + "1.5", + "1.5" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 17767354399704235156, + 4257155890605923351, + 18446744073709551615, + 18446744073709551615, + 165, + 166, + 165, + 166, + 28, + 28, + false, + "4", + "4" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 17767354399704235163, + 4257155890625182636, + 18446744073709551615, + 18446744073709551615, + 206, + 207, + 202, + 203, + 36, + 37, + true, + "3", + "3" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415896310600, + 10294532231444872390, + 18446744073709551615, + 18446744073709551615, + 257, + 260, + 253, + 256, + 46, + 47, + false, + "500", + "500" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 15441160910541481167, + 17130124993064148148, + 18446744073709551615, + 18446744073709551615, + 267, + 269, + 261, + 263, + 49, + 50, + true, + "64", + "64" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 17767354399704235156, + 4257155890605968002, + 18446744073709551615, + 18446744073709551615, + 466, + 467, + 458, + 459, + 90, + 90, + false, + "4", + "4" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 17767354399704235161, + 4257155890657921360, + 18446744073709551615, + 18446744073709551615, + 526, + 527, + 518, + 519, + 103, + 104, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 17767354399704235156, + 4257155890605956247, + 18446744073709551615, + 18446744073709551615, + 677, + 678, + 669, + 670, + 131, + 131, + false, + "4", + "4" + ], + [ + "parenthesis", + "round brackets", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104053346205471, + 10439298819923435512, + 18446744073709551615, + 18446744073709551615, + 104, + 109, + 104, + 109, + 17, + 20, + true, + "(TTS)", + "(TTS)" + ], + [ + "parenthesis", + "round brackets", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8690615943213957258, + 13520407369602191453, + 18446744073709551615, + 18446744073709551615, + 266, + 277, + 260, + 271, + 48, + 53, + true, + "(64M edges)", + "(64M edges)" + ], + [ + "parenthesis", + "round brackets", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 14861489952347503301, + 15388803781782082196, + 18446744073709551615, + 18446744073709551615, + 301, + 313, + 293, + 305, + 56, + 61, + true, + "(1.5B edges)", + "(1.5B edges)" + ], + [ + "parenthesis", + "round brackets", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415896391104, + 10294450863445651194, + 18446744073709551615, + 18446744073709551615, + 725, + 728, + 717, + 720, + 140, + 141, + false, + "(s)", + "(s)" + ], + [ + "expression", + "word-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 6285955549867796622, + 3519954987974151695, + 18446744073709551615, + 18446744073709551615, + 87, + 103, + 87, + 103, + 16, + 17, + true, + "time-to-solution", + "time-to-solution" + ], + [ + "expression", + "word-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104161594420373, + 1121791991465001484, + 18446744073709551615, + 18446744073709551615, + 223, + 228, + 219, + 224, + 41, + 42, + true, + "k-hop", + "k-hop" + ], + [ + "expression", + "word-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 5469579567331425221, + 2987931756247219616, + 18446744073709551615, + 18446744073709551615, + 282, + 295, + 276, + 289, + 54, + 55, + true, + "twitter-graph", + "twitter-graph" + ], + [ + "expression", + "word-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8384182147497629769, + 5245314349915949977, + 18446744073709551615, + 18446744073709551615, + 427, + 439, + 419, + 431, + 85, + 86, + true, + "higher-order", + "higher-order" + ], + [ + "expression", + "word-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8106398446669642199, + 13297225597520616176, + 18446744073709551615, + 18446744073709551615, + 619, + 626, + 611, + 618, + 122, + 123, + true, + "k-order", + "k-order" + ], + [ + "expression", + "wtoken-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104162105779366, + 8851381515603210985, + 18446744073709551615, + 18446744073709551615, + 162, + 167, + 162, + 167, + 28, + 29, + true, + "Neo4J", + "Neo4J" + ], + [ + "expression", + "wtoken-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 14639714523137288664, + 14458905888448100979, + 18446744073709551615, + 18446744073709551615, + 252, + 260, + 248, + 256, + 46, + 47, + true, + "graph500", + "graph500" + ], + [ + "expression", + "wtoken-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104162105779366, + 8851381515603099426, + 18446744073709551615, + 18446744073709551615, + 463, + 468, + 455, + 460, + 90, + 91, + true, + "Neo4J", + "Neo4J" + ], + [ + "expression", + "wtoken-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 10308187620027892234, + 15392908800969629769, + 18446744073709551615, + 18446744073709551615, + 627, + 643, + 619, + 635, + 123, + 124, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "expression", + "wtoken-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104162105779366, + 8851381515603112553, + 18446744073709551615, + 18446744073709551615, + 674, + 679, + 666, + 671, + 131, + 132, + true, + "Neo4J", + "Neo4J" + ], + [ + "expression", + "wtoken-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8106352617177552874, + 13208133057108101651, + 18446744073709551615, + 18446744073709551615, + 721, + 728, + 713, + 720, + 140, + 141, + true, + "node(s)", + "node(s)" + ], + [ + "sentence", + "", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 9900070277501077316, + 14879933636210090686, + 18446744073709551615, + 18446744073709551615, + 0, + 208, + 0, + 204, + 0, + 38, + true, + "To illustrate the advantages of the adjacency matrix format for our needs, we show the time-to-solution (TTS) for queries with increasing order of traversals for Neo4J \u2021\u2021 and our graph engine in Figure 3.", + "To illustrate the advantages of the adjacency matrix format for our needs, we show the time-to-solution (TTS) for queries with increasing order of traversals for Neo4J \u2021\u2021 and our graph engine in Figure 3." + ], + [ + "sentence", + "", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 7523992103366410925, + 3988616322684329041, + 18446744073709551615, + 18446744073709551615, + 209, + 314, + 205, + 306, + 38, + 62, + true, + "We computed a k-hop traversal query on the graph500 \u00a7\u00a7 (64M edges) and twitter-graph \u00b6\u00b6 (1.5B edges).", + "We computed a k-hop traversal query on the graph500 \u00a7\u00a7 (64M edges) and twitter-graph \u00b6\u00b6 (1.5B edges)." + ], + [ + "sentence", + "", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 18326286580678309185, + 9777799881842905097, + 18446744073709551615, + 18446744073709551615, + 315, + 354, + 307, + 346, + 62, + 69, + true, + "Two important observations can be made.", + "Two important observations can be made." + ], + [ + "sentence", + "", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 7520242923557925246, + 16073154361344584091, + 18446744073709551615, + 18446744073709551615, + 355, + 457, + 347, + 449, + 69, + 89, + true, + "Firstly, our graph engine is able to run easily third, fourth, and even higher-order graph traversals.", + "Firstly, our graph engine is able to run easily third, fourth, and even higher-order graph traversals." + ], + [ + "sentence", + "", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 4048875314896090291, + 7787099331214545680, + 18446744073709551615, + 18446744073709551615, + 458, + 533, + 450, + 525, + 89, + 106, + true, + "With Neo4J, this proves very difficult, as the TTS grows upwards of 1 hour.", + "With Neo4J, this proves very difficult, as the TTS grows upwards of 1 hour." + ], + [ + "sentence", + "", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 7221032262585555497, + 11521686287281845163, + 18446744073709551615, + 18446744073709551615, + 534, + 644, + 526, + 636, + 106, + 125, + true, + "Secondly, our graph engine shows minimal variance in the TTS between all runs of the k-order graph-traversals.", + "Secondly, our graph engine shows minimal variance in the TTS between all runs of the k-order graph-traversals." + ], + [ + "sentence", + "", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8536372207058713595, + 9010029933076937587, + 18446744073709551615, + 18446744073709551615, + 645, + 745, + 637, + 737, + 125, + 145, + true, + "This is in stark contrast to Neo4J, where the TTS strongly depends on which node(s) one starts from.", + "This is in stark contrast to Neo4J, where the TTS strongly depends on which node(s) one starts from." + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 17729840004664227381, + 2529224331117607819, + 18446744073709551615, + 18446744073709551615, + 36, + 59, + 36, + 59, + 6, + 9, + true, + "adjacency matrix format", + "adjacency matrix format" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 14650948151731409985, + 3189176551282868037, + 18446744073709551615, + 18446744073709551615, + 162, + 174, + 162, + 170, + 28, + 30, + true, + "Neo4J \u2021\u2021", + "Neo4J \u2021\u2021" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 2924972194163802578, + 1896936451152973716, + 18446744073709551615, + 18446744073709551615, + 183, + 195, + 179, + 191, + 32, + 34, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 2458518862891897854, + 14746702567168992825, + 18446744073709551615, + 18446744073709551615, + 223, + 244, + 219, + 240, + 41, + 44, + true, + "k-hop traversal query", + "k-hop traversal query" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 1053045962906589641, + 13526147670869926808, + 18446744073709551615, + 18446744073709551615, + 252, + 265, + 248, + 259, + 46, + 48, + true, + "graph500 \u00a7\u00a7", + "graph500 \u00a7\u00a7" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8106471963032718189, + 12463756649278513661, + 18446744073709551615, + 18446744073709551615, + 269, + 276, + 263, + 270, + 50, + 52, + true, + "M edges", + "M edges" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 15299718545731398218, + 17088910247353771644, + 18446744073709551615, + 18446744073709551615, + 282, + 300, + 276, + 292, + 54, + 56, + true, + "twitter-graph \u00b6\u00b6", + "twitter-graph \u00b6\u00b6" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8106350691486682096, + 2513264736124284390, + 18446744073709551615, + 18446744073709551615, + 305, + 312, + 297, + 304, + 58, + 60, + true, + "B edges", + "B edges" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8005677520082126207, + 15295884295027505463, + 18446744073709551615, + 18446744073709551615, + 315, + 341, + 307, + 333, + 62, + 65, + true, + "Two important observations", + "Two important observations" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 2924972194163802578, + 1896936451152637986, + 18446744073709551615, + 18446744073709551615, + 368, + 380, + 360, + 372, + 72, + 74, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 2942854117731362049, + 1617244827717225678, + 18446744073709551615, + 18446744073709551615, + 440, + 456, + 432, + 448, + 86, + 88, + true, + "graph traversals", + "graph traversals" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 2924972194163802578, + 1896936451152650353, + 18446744073709551615, + 18446744073709551615, + 548, + 560, + 540, + 552, + 109, + 111, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 16295992254884720120, + 17528779414945550420, + 18446744073709551615, + 18446744073709551615, + 567, + 583, + 559, + 575, + 112, + 114, + true, + "minimal variance", + "minimal variance" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 11094301811558263715, + 18428740830186439295, + 18446744073709551615, + 18446744073709551615, + 619, + 643, + 611, + 635, + 122, + 124, + true, + "k-order graph-traversals", + "k-order graph-traversals" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 4914830112961611503, + 2589359867907671877, + 18446744073709551615, + 18446744073709551615, + 656, + 670, + 648, + 662, + 128, + 130, + true, + "stark contrast", + "stark contrast" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 15360629769874482523, + 14086244081592280733, + 18446744073709551615, + 18446744073709551615, + 18, + 28, + 18, + 28, + 3, + 4, + true, + "advantages", + "advantages" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104161565915183, + 1151200886323461192, + 18446744073709551615, + 18446744073709551615, + 68, + 73, + 68, + 73, + 11, + 12, + true, + "needs", + "needs" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 6285955549867796622, + 3519954987974151695, + 18446744073709551615, + 18446744073709551615, + 87, + 103, + 87, + 103, + 16, + 17, + true, + "time-to-solution", + "time-to-solution" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415895656509, + 10294424381813189025, + 18446744073709551615, + 18446744073709551615, + 105, + 108, + 105, + 108, + 18, + 19, + true, + "TTS", + "TTS" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8106477782290185579, + 3942987813051468226, + 18446744073709551615, + 18446744073709551615, + 114, + 121, + 114, + 121, + 21, + 22, + true, + "queries", + "queries" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104161571401725, + 1123588280959364869, + 18446744073709551615, + 18446744073709551615, + 138, + 143, + 138, + 143, + 24, + 25, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8619280805974492668, + 13535634626495482105, + 18446744073709551615, + 18446744073709551615, + 147, + 157, + 147, + 157, + 26, + 27, + true, + "traversals", + "traversals" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 16381206514091025767, + 877060830667997748, + 18446744073709551615, + 18446744073709551615, + 199, + 205, + 195, + 201, + 35, + 36, + true, + "Figure", + "Figure" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104162105779366, + 8851381515603099426, + 18446744073709551615, + 18446744073709551615, + 463, + 468, + 455, + 460, + 90, + 91, + true, + "Neo4J", + "Neo4J" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415895656509, + 10294424381813163053, + 18446744073709551615, + 18446744073709551615, + 505, + 508, + 497, + 500, + 99, + 100, + true, + "TTS", + "TTS" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8106478012012949344, + 6607109766774689060, + 18446744073709551615, + 18446744073709551615, + 515, + 522, + 507, + 514, + 101, + 102, + true, + "upwards", + "upwards" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 389609625695186535, + 7021071120195777033, + 18446744073709551615, + 18446744073709551615, + 528, + 532, + 520, + 524, + 104, + 105, + true, + "hour", + "hour" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415895656509, + 10294424381813288835, + 18446744073709551615, + 18446744073709551615, + 591, + 594, + 583, + 586, + 116, + 117, + true, + "TTS", + "TTS" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 389609625633007953, + 7019735724866522291, + 18446744073709551615, + 18446744073709551615, + 607, + 611, + 599, + 603, + 119, + 120, + true, + "runs", + "runs" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104162105779366, + 8851381515603112553, + 18446744073709551615, + 18446744073709551615, + 674, + 679, + 666, + 671, + 131, + 132, + true, + "Neo4J", + "Neo4J" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415895656509, + 10294424381813291998, + 18446744073709551615, + 18446744073709551615, + 691, + 694, + 683, + 686, + 135, + 136, + true, + "TTS", + "TTS" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541481862, + 6611832599487460343, + 18446744073709551615, + 18446744073709551615, + 111, + 113, + 111, + 113, + 17, + 18, + true, + "18", + "18" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235162, + 16086706123952683919, + 18446744073709551615, + 18446744073709551615, + 435, + 436, + 435, + 436, + 82, + 83, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541481860, + 6611832599456912896, + 18446744073709551615, + 18446744073709551615, + 437, + 439, + 437, + 439, + 83, + 84, + true, + "16", + "16" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541481166, + 6611754875794384515, + 18446744073709551615, + 18446744073709551615, + 442, + 444, + 442, + 444, + 85, + 86, + true, + "65", + "65" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 12178341415896310785, + 14422419606559923738, + 18446744073709551615, + 18446744073709551615, + 445, + 448, + 445, + 448, + 86, + 87, + true, + "536", + "536" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235156, + 16086706123708070212, + 18446744073709551615, + 18446744073709551615, + 590, + 591, + 590, + 591, + 113, + 114, + true, + "4", + "4" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541481849, + 6611832587831823848, + 18446744073709551615, + 18446744073709551615, + 622, + 624, + 622, + 624, + 120, + 120, + false, + "32", + "32" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235152, + 16086706131746349816, + 18446744073709551615, + 18446744073709551615, + 676, + 677, + 676, + 677, + 131, + 132, + true, + "8", + "8" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235156, + 16086706123708539554, + 18446744073709551615, + 18446744073709551615, + 756, + 757, + 756, + 757, + 145, + 145, + false, + "4", + "4" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541481854, + 6611832586181155412, + 18446744073709551615, + 18446744073709551615, + 786, + 788, + 786, + 788, + 151, + 152, + true, + "33", + "33" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235152, + 16086706131746369369, + 18446744073709551615, + 18446744073709551615, + 878, + 879, + 872, + 873, + 168, + 169, + true, + "8", + "8" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541481849, + 6611832587831816065, + 18446744073709551615, + 18446744073709551615, + 920, + 922, + 914, + 916, + 176, + 177, + true, + "32", + "32" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235153, + 16086706131730015547, + 18446744073709551615, + 18446744073709551615, + 1000, + 1001, + 994, + 995, + 192, + 193, + false, + "9", + "9" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235156, + 16086706123708163426, + 18446744073709551615, + 18446744073709551615, + 1012, + 1013, + 1006, + 1007, + 195, + 196, + true, + "4", + "4" + ], + [ + "parenthesis", + "round brackets", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 6137469846456037455, + 5202143414337932883, + 18446744073709551615, + 18446744073709551615, + 598, + 637, + 598, + 637, + 115, + 123, + true, + "(equivalent to a single 32-bit integer)", + "(equivalent to a single 32-bit integer)" + ], + [ + "expression", + "word-concatenation", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 6187817560337829240, + 141181149316580822, + 18446744073709551615, + 18446744073709551615, + 250, + 259, + 250, + 259, + 42, + 43, + true, + "in-memory", + "in-memory" + ], + [ + "expression", + "word-concatenation", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17096868097919627199, + 13945693794675913524, + 18446744073709551615, + 18446744073709551615, + 416, + 428, + 416, + 428, + 79, + 80, + true, + "block-matrix", + "block-matrix" + ], + [ + "expression", + "word-concatenation", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 7166534900846969312, + 708063167962396611, + 18446744073709551615, + 18446744073709551615, + 984, + 994, + 978, + 988, + 191, + 192, + true, + "bare-metal", + "bare-metal" + ], + [ + "expression", + "latex", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16381206549576689335, + 15188561199320662041, + 18446744073709551615, + 18446744073709551615, + 816, + 830, + 816, + 824, + 156, + 157, + true, + "^{\u2020\u2020\u2020}", + "$^{\u2020\u2020\u2020}$" + ], + [ + "expression", + "wtoken-concatenation", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16380810014374034475, + 7208847194404732485, + 18446744073709551615, + 18446744073709551615, + 622, + 628, + 622, + 628, + 120, + 121, + true, + "32-bit", + "32-bit" + ], + [ + "expression", + "wtoken-concatenation", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104162105779366, + 6648037853909519105, + 18446744073709551615, + 18446744073709551615, + 753, + 758, + 753, + 758, + 145, + 146, + true, + "Neo4J", + "Neo4J" + ], + [ + "expression", + "wtoken-concatenation", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16381206511586975208, + 18226864088168425059, + 18446744073709551615, + 18446744073709551615, + 995, + 1001, + 989, + 995, + 192, + 193, + true, + "POWER9", + "POWER9" + ], + [ + "sentence", + "", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 11108464234987066551, + 6109858459339794472, + 18446744073709551615, + 18446744073709551615, + 0, + 141, + 0, + 141, + 0, + 25, + true, + "Another big advantage of using the adjacency matrix format is that we can exploit advanced compression methods 18 such as CSR or blocked COO.", + "Another big advantage of using the adjacency matrix format is that we can exploit advanced compression methods 18 such as CSR or blocked COO." + ], + [ + "sentence", + "", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 10246563988291592306, + 2276066290660368261, + 18446744073709551615, + 18446744073709551615, + 142, + 260, + 142, + 260, + 25, + 44, + true, + "This reduces significantly the memory footprint of the graph and allows bigger graphs to be hosted entirely in-memory.", + "This reduces significantly the memory footprint of the graph and allows bigger graphs to be hosted entirely in-memory." + ], + [ + "sentence", + "", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 3415607433018794013, + 4687853286732689567, + 18446744073709551615, + 18446744073709551615, + 261, + 390, + 261, + 390, + 44, + 73, + true, + "In our case, we have opted to represent the edges by blocked matrices of a fixed size, in which each block matrix is of type COO.", + "In our case, we have opted to represent the edges by blocked matrices of a fixed size, in which each block matrix is of type COO." + ], + [ + "sentence", + "", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 10474976233539682374, + 18026011700976907235, + 18446744073709551615, + 18446744073709551615, + 391, + 536, + 391, + 536, + 73, + 103, + true, + "We chose the size of the block-matrix to be 2 16 = 65 536, allowing a pair of indices to be compactly represented by two unsigned short integers.", + "We chose the size of the block-matrix to be 2 16 = 65 536, allowing a pair of indices to be compactly represented by two unsigned short integers." + ], + [ + "sentence", + "", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 3685477724550052401, + 9873589769454795234, + 18446744073709551615, + 18446744073709551615, + 537, + 684, + 537, + 684, + 103, + 134, + true, + "Consequently, an edge has a memory footprint of only 4 bytes (equivalent to a single 32-bit integer), while a weighted edge a footprint of 8 bytes.", + "Consequently, an edge has a memory footprint of only 4 bytes (equivalent to a single 32-bit integer), while a weighted edge a footprint of 8 bytes." + ], + [ + "sentence", + "", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 9302359278229801872, + 12812648110667786289, + 18446744073709551615, + 18446744073709551615, + 689, + 832, + 689, + 826, + 135, + 159, + true, + "This is a significant reduction in memory footprint compared to Neo4J graph databases, which use 33 bytes for unweighted edges $^{\u2020\u2020\u2020}$).", + "This is a significant reduction in memory footprint compared to Neo4J graph databases, which use 33 bytes for unweighted edges $^{\u2020\u2020\u2020}$)." + ], + [ + "sentence", + "", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 8925645990612399548, + 16713121652458311820, + 18446744073709551615, + 18446744073709551615, + 833, + 1027, + 827, + 1021, + 159, + 200, + true, + "Consequently, we can host graphs of close to 8 billion edges on a virtual machine with 32 GB of free memory, and even close to one trillion edges on a bare-metal POWER9 node with 4 TB of memory.", + "Consequently, we can host graphs of close to 8 billion edges on a virtual machine with 32 GB of free memory, and even close to one trillion edges on a bare-metal POWER9 node with 4 TB of memory." + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 10068551836820132, + 11381505126689469627, + 18446744073709551615, + 18446744073709551615, + 8, + 21, + 8, + 21, + 1, + 3, + true, + "big advantage", + "big advantage" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17729840004664227381, + 5880070554995887882, + 18446744073709551615, + 18446744073709551615, + 35, + 58, + 35, + 58, + 6, + 9, + true, + "adjacency matrix format", + "adjacency matrix format" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 14731630785121984489, + 7830997585744793399, + 18446744073709551615, + 18446744073709551615, + 82, + 110, + 82, + 110, + 14, + 17, + true, + "advanced compression methods", + "advanced compression methods" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 13543987209185531185, + 9337570148046077703, + 18446744073709551615, + 18446744073709551615, + 173, + 189, + 173, + 189, + 29, + 31, + true, + "memory footprint", + "memory footprint" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15115837464859551979, + 355789543703900736, + 18446744073709551615, + 18446744073709551615, + 314, + 330, + 314, + 330, + 56, + 58, + true, + "blocked matrices", + "blocked matrices" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 5385534283636121811, + 11562346121021298365, + 18446744073709551615, + 18446744073709551615, + 336, + 346, + 336, + 346, + 60, + 62, + true, + "fixed size", + "fixed size" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17096888249457072514, + 8451717139878198506, + 18446744073709551615, + 18446744073709551615, + 362, + 374, + 362, + 374, + 66, + 68, + true, + "block matrix", + "block matrix" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 14635113557370658205, + 5397031793806654273, + 18446744073709551615, + 18446744073709551615, + 381, + 389, + 381, + 389, + 70, + 72, + true, + "type COO", + "type COO" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 3115430403987697525, + 12871961618510872276, + 18446744073709551615, + 18446744073709551615, + 512, + 535, + 512, + 535, + 99, + 102, + true, + "unsigned short integers", + "unsigned short integers" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 13543987209185531185, + 9337570148045967789, + 18446744073709551615, + 18446744073709551615, + 565, + 581, + 565, + 581, + 109, + 111, + true, + "memory footprint", + "memory footprint" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15138418052525916838, + 2992602504471532064, + 18446744073709551615, + 18446744073709551615, + 615, + 636, + 615, + 636, + 119, + 122, + true, + "single 32-bit integer", + "single 32-bit integer" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 2663450017388020648, + 12004026720926102257, + 18446744073709551615, + 18446744073709551615, + 647, + 660, + 647, + 660, + 126, + 128, + true, + "weighted edge", + "weighted edge" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 10871439885151345979, + 15151057053119522021, + 18446744073709551615, + 18446744073709551615, + 699, + 720, + 699, + 720, + 138, + 140, + true, + "significant reduction", + "significant reduction" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 13543987209185531185, + 9337570148045966672, + 18446744073709551615, + 18446744073709551615, + 724, + 740, + 724, + 740, + 141, + 143, + true, + "memory footprint", + "memory footprint" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 18215283283750336502, + 17448408987700874112, + 18446744073709551615, + 18446744073709551615, + 753, + 774, + 753, + 774, + 145, + 148, + true, + "Neo4J graph databases", + "Neo4J graph databases" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 1126026715282292268, + 3148784051061061026, + 18446744073709551615, + 18446744073709551615, + 799, + 815, + 799, + 815, + 154, + 156, + true, + "unweighted edges", + "unweighted edges" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 14857172535504849859, + 5773864008533755225, + 18446744073709551615, + 18446744073709551615, + 880, + 893, + 874, + 887, + 169, + 171, + true, + "billion edges", + "billion edges" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 14387950977550393964, + 8007787524976843735, + 18446744073709551615, + 18446744073709551615, + 899, + 914, + 893, + 908, + 173, + 175, + true, + "virtual machine", + "virtual machine" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 3124577709661373204, + 18272255581661630916, + 18446744073709551615, + 18446744073709551615, + 929, + 940, + 923, + 934, + 179, + 181, + true, + "free memory", + "free memory" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 2187881130485149075, + 6718806557176407140, + 18446744073709551615, + 18446744073709551615, + 964, + 978, + 958, + 972, + 187, + 189, + true, + "trillion edges", + "trillion edges" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 8455337934804664784, + 11785499481816920454, + 18446744073709551615, + 18446744073709551615, + 984, + 1006, + 978, + 1000, + 191, + 194, + true, + "bare-metal POWER9 node", + "bare-metal POWER9 node" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 12178341415896222617, + 14422417698616585242, + 18446744073709551615, + 18446744073709551615, + 122, + 125, + 122, + 125, + 20, + 21, + true, + "CSR", + "CSR" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 12178341415896222361, + 14422417712036675521, + 18446744073709551615, + 18446744073709551615, + 137, + 140, + 137, + 140, + 23, + 24, + true, + "COO", + "COO" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104159211247965, + 10155196688984100956, + 18446744073709551615, + 18446744073709551615, + 197, + 202, + 197, + 202, + 33, + 34, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16381206539879417749, + 17210280397844822730, + 18446744073709551615, + 18446744073709551615, + 221, + 227, + 221, + 227, + 37, + 38, + true, + "graphs", + "graphs" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 389609625695123443, + 16925837287041287106, + 18446744073709551615, + 18446744073709551615, + 268, + 272, + 268, + 272, + 46, + 47, + true, + "case", + "case" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104162186494203, + 6595611082205689321, + 18446744073709551615, + 18446744073709551615, + 305, + 310, + 305, + 310, + 54, + 55, + true, + "edges", + "edges" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 389609625741058932, + 16925015935541254706, + 18446744073709551615, + 18446744073709551615, + 404, + 408, + 404, + 408, + 76, + 77, + true, + "size", + "size" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17096868097919627199, + 13945693794675913524, + 18446744073709551615, + 18446744073709551615, + 416, + 428, + 416, + 428, + 79, + 80, + true, + "block-matrix", + "block-matrix" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 389609625632301288, + 16928603534806395316, + 18446744073709551615, + 18446744073709551615, + 461, + 465, + 461, + 465, + 90, + 91, + true, + "pair", + "pair" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 8106398345633211267, + 6358279098714856308, + 18446744073709551615, + 18446744073709551615, + 469, + 476, + 469, + 476, + 92, + 93, + true, + "indices", + "indices" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 389609625699630670, + 16925667780777279203, + 18446744073709551615, + 18446744073709551615, + 554, + 558, + 554, + 558, + 106, + 107, + true, + "edge", + "edge" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104159327206248, + 10350794591348378566, + 18446744073709551615, + 18446744073709551615, + 592, + 597, + 592, + 597, + 114, + 115, + true, + "bytes", + "bytes" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 6187534604692512350, + 9764773695057964576, + 18446744073709551615, + 18446744073709551615, + 663, + 672, + 663, + 672, + 129, + 130, + true, + "footprint", + "footprint" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104159327206248, + 10350794591348386969, + 18446744073709551615, + 18446744073709551615, + 678, + 683, + 678, + 683, + 132, + 133, + true, + "bytes", + "bytes" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104159327206248, + 10350794591348326495, + 18446744073709551615, + 18446744073709551615, + 789, + 794, + 789, + 794, + 152, + 153, + true, + "bytes", + "bytes" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16381206539879417749, + 17210280397844777021, + 18446744073709551615, + 18446744073709551615, + 859, + 865, + 853, + 859, + 164, + 165, + true, + "graphs", + "graphs" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104161517017582, + 10314291369512679023, + 18446744073709551615, + 18446744073709551615, + 869, + 874, + 863, + 868, + 166, + 167, + true, + "close", + "close" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541479948, + 6611754888016241485, + 18446744073709551615, + 18446744073709551615, + 923, + 925, + 917, + 919, + 177, + 178, + true, + "GB", + "GB" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541487906, + 6611830889990664661, + 18446744073709551615, + 18446744073709551615, + 1014, + 1016, + 1008, + 1010, + 196, + 197, + true, + "TB", + "TB" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16381206567042997791, + 4033589895737852410, + 18446744073709551615, + 18446744073709551615, + 1020, + 1026, + 1014, + 1020, + 198, + 199, + true, + "memory", + "memory" + ], + [ + "numval", + "fval", + 15132906055887224772, + "TEXT", + "#/texts/91", + 1.0, + 12178341415896435196, + 16211286906118314940, + 18446744073709551615, + 18446744073709551615, + 0, + 3, + 0, + 3, + 0, + 1, + true, + "3.3", + "3.3" + ], + [ + "sentence", + "", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 11091581991954269716, + 10456364627355927036, + 18446744073709551615, + 18446744073709551615, + 0, + 57, + 0, + 57, + 0, + 12, + true, + "The goal of querying a KG is to answer complex questions.", + "The goal of querying a KG is to answer complex questions." + ], + [ + "sentence", + "", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 14577311106096638975, + 10656479587756904158, + 18446744073709551615, + 18446744073709551615, + 58, + 179, + 58, + 179, + 12, + 35, + true, + "As such, users need to be provided with a functionality to formulate complex queries on the KG and quickly evaluate them.", + "As such, users need to be provided with a functionality to formulate complex queries on the KG and quickly evaluate them." + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 14314461436358843828, + 14492210953206209285, + 18446744073709551615, + 18446744073709551615, + 39, + 56, + 39, + 56, + 9, + 11, + true, + "complex questions", + "complex questions" + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 3916373036270397758, + 5882827815055053772, + 18446744073709551615, + 18446744073709551615, + 127, + 142, + 127, + 142, + 25, + 27, + true, + "complex queries", + "complex queries" + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 389609625699055241, + 9332893958662962709, + 18446744073709551615, + 18446744073709551615, + 4, + 8, + 4, + 8, + 1, + 2, + true, + "goal", + "goal" + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 15441160910541480204, + 16382477296675596695, + 18446744073709551615, + 18446744073709551615, + 23, + 25, + 23, + 25, + 5, + 6, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 329104159157820437, + 17616820081691235592, + 18446744073709551615, + 18446744073709551615, + 67, + 72, + 67, + 72, + 15, + 16, + true, + "users", + "users" + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 4083292969395203883, + 5654192674738865140, + 18446744073709551615, + 18446744073709551615, + 100, + 113, + 100, + 113, + 22, + 23, + true, + "functionality", + "functionality" + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 15441160910541480204, + 16382477296675604919, + 18446744073709551615, + 18446744073709551615, + 150, + 152, + 150, + 152, + 29, + 30, + true, + "KG", + "KG" + ], + [ + "numval", + "ival", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 17767354399704235156, + 18395627803235450761, + 18446744073709551615, + 18446744073709551615, + 640, + 641, + 640, + 641, + 116, + 117, + true, + "4", + "4" + ], + [ + "parenthesis", + "round brackets", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 4361897097975010664, + 13376241768661335660, + 18446744073709551615, + 18446744073709551615, + 397, + 414, + 397, + 414, + 71, + 75, + true, + "(or intermediate)", + "(or intermediate)" + ], + [ + "parenthesis", + "round brackets", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 389609625545054248, + 12214200532744887129, + 18446744073709551615, + 18446744073709551615, + 552, + 556, + 552, + 556, + 99, + 102, + true, + "(UI)", + "(UI)" + ], + [ + "parenthesis", + "round brackets", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 7105842701545013905, + 2265574179061884287, + 18446744073709551615, + 18446744073709551615, + 628, + 642, + 628, + 642, + 113, + 118, + true, + "(see Figure 4)", + "(see Figure 4)" + ], + [ + "expression", + "word-concatenation", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 18047739014778172965, + 15647829115234352063, + 18446744073709551615, + 18446744073709551615, + 318, + 337, + 318, + 337, + 60, + 61, + true, + "data-transformation", + "data-transformation" + ], + [ + "sentence", + "", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 9175725069191114564, + 6016235278680059217, + 18446744073709551615, + 18446744073709551615, + 0, + 168, + 0, + 168, + 0, + 33, + true, + "In order to avoid imposing a complex query language onto users, we have devised a way to define complex graph queries in a declarative format, which we call a workflow.", + "In order to avoid imposing a complex query language onto users, we have devised a way to define complex graph queries in a declarative format, which we call a workflow." + ], + [ + "sentence", + "", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 57107468990616569, + 6772300740708680643, + 18446744073709551615, + 18446744073709551615, + 169, + 254, + 169, + 254, + 33, + 48, + true, + "Workflows are represented as a DAG of operations and are conceptually related to DFs.", + "Workflows are represented as a DAG of operations and are conceptually related to DFs." + ], + [ + "sentence", + "", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 2103206676431633354, + 7765713919478438730, + 18446744073709551615, + 18446744073709551615, + 255, + 445, + 255, + 445, + 48, + 82, + true, + "Unlike the former, the nodes of workflow DAGs do not represent data-transformation tasks, but specific graph operations which mutate an input (or intermediate) set of nodes into another set.", + "Unlike the former, the nodes of workflow DAGs do not represent data-transformation tasks, but specific graph operations which mutate an input (or intermediate) set of nodes into another set." + ], + [ + "sentence", + "", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 14814020289477828640, + 7062038975480317251, + 18446744073709551615, + 18446744073709551615, + 446, + 481, + 446, + 481, + 82, + 88, + true, + "We call these operations worktasks.", + "We call these operations worktasks." + ], + [ + "sentence", + "", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 17133158107037793945, + 13159924364498959182, + 18446744073709551615, + 18446744073709551615, + 482, + 643, + 482, + 643, + 88, + 119, + true, + "For further convenience, we have developed a graphical user interface (UI) which allows to define such workflows in a visual programming approach (see Figure 4).", + "For further convenience, we have developed a graphical user interface (UI) which allows to define such workflows in a visual programming approach (see Figure 4)." + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 9548493583803247969, + 10298167230238939895, + 18446744073709551615, + 18446744073709551615, + 29, + 51, + 29, + 51, + 6, + 9, + true, + "complex query language", + "complex query language" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 15274395271512051612, + 7615673165598889472, + 18446744073709551615, + 18446744073709551615, + 96, + 117, + 96, + 117, + 19, + 22, + true, + "complex graph queries", + "complex graph queries" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 15791836353173876541, + 15444549276320690836, + 18446744073709551615, + 18446744073709551615, + 123, + 141, + 123, + 141, + 24, + 26, + true, + "declarative format", + "declarative format" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 2221511436793850179, + 15451583893193860762, + 18446744073709551615, + 18446744073709551615, + 287, + 300, + 287, + 300, + 55, + 57, + true, + "workflow DAGs", + "workflow DAGs" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 5733862415078796863, + 5449520185842609371, + 18446744073709551615, + 18446744073709551615, + 318, + 343, + 318, + 343, + 60, + 62, + true, + "data-transformation tasks", + "data-transformation tasks" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 16077761921532073702, + 15751654443455067910, + 18446744073709551615, + 18446744073709551615, + 349, + 374, + 349, + 374, + 64, + 67, + true, + "specific graph operations", + "specific graph operations" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 3406005564439493096, + 962936403426020052, + 18446744073709551615, + 18446744073709551615, + 460, + 480, + 460, + 480, + 85, + 87, + true, + "operations worktasks", + "operations worktasks" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 9340106368352020484, + 9901483225457527231, + 18446744073709551615, + 18446744073709551615, + 486, + 505, + 486, + 505, + 89, + 91, + true, + "further convenience", + "further convenience" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 7582281372004134232, + 1510365298380696859, + 18446744073709551615, + 18446744073709551615, + 527, + 551, + 527, + 551, + 96, + 99, + true, + "graphical user interface", + "graphical user interface" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 5681538719543297340, + 7633237073525727434, + 18446744073709551615, + 18446744073709551615, + 580, + 594, + 580, + 594, + 106, + 108, + true, + "such workflows", + "such workflows" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 17267900621492324657, + 3642376820636860698, + 18446744073709551615, + 18446744073709551615, + 600, + 627, + 600, + 627, + 110, + 113, + true, + "visual programming approach", + "visual programming approach" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 329104161571401725, + 9611792532481010924, + 18446744073709551615, + 18446744073709551615, + 3, + 8, + 3, + 8, + 1, + 2, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 329104159157820437, + 13570310750889527762, + 18446744073709551615, + 18446744073709551615, + 57, + 62, + 57, + 62, + 10, + 11, + true, + "users", + "users" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 12178341415895525628, + 8266666853824012019, + 18446744073709551615, + 18446744073709551615, + 82, + 85, + 82, + 85, + 16, + 17, + true, + "way", + "way" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 14638857990842534974, + 605379263320532680, + 18446744073709551615, + 18446744073709551615, + 159, + 167, + 159, + 167, + 31, + 32, + true, + "workflow", + "workflow" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 6183387189920121296, + 1410025149081126995, + 18446744073709551615, + 18446744073709551615, + 169, + 178, + 169, + 178, + 33, + 34, + true, + "Workflows", + "Workflows" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 12178341415896112046, + 8266776682312646277, + 18446744073709551615, + 18446744073709551615, + 200, + 203, + 200, + 203, + 38, + 39, + true, + "DAG", + "DAG" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 13985988710970420061, + 12003721642956804645, + 18446744073709551615, + 18446744073709551615, + 207, + 217, + 207, + 217, + 40, + 41, + true, + "operations", + "operations" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 12178341415896110548, + 8266776675949707180, + 18446744073709551615, + 18446744073709551615, + 250, + 253, + 250, + 253, + 46, + 47, + true, + "DFs", + "DFs" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 329104161758737773, + 9650253826023077819, + 18446744073709551615, + 18446744073709551615, + 278, + 283, + 278, + 283, + 53, + 54, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 329104161828910287, + 9583702698217235724, + 18446744073709551615, + 18446744073709551615, + 391, + 396, + 391, + 396, + 70, + 71, + true, + "input", + "input" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 12178341415895638602, + 8266662293356183469, + 18446744073709551615, + 18446744073709551615, + 415, + 418, + 415, + 418, + 75, + 76, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 329104161758737773, + 9650253826023172647, + 18446744073709551615, + 18446744073709551615, + 422, + 427, + 422, + 427, + 77, + 78, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 12178341415895638602, + 8266662293356182036, + 18446744073709551615, + 18446744073709551615, + 441, + 444, + 441, + 444, + 80, + 81, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 15441160910541484266, + 13744330050619277317, + 18446744073709551615, + 18446744073709551615, + 553, + 555, + 553, + 555, + 100, + 101, + true, + "UI", + "UI" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 16381206514091025767, + 10238121304656839602, + 18446744073709551615, + 18446744073709551615, + 633, + 639, + 633, + 639, + 115, + 116, + true, + "Figure", + "Figure" + ], + [ + "expression", + "word-concatenation", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 15221896740599576202, + 7666904121768591309, + 18446744073709551615, + 18446744073709551615, + 59, + 73, + 59, + 73, + 10, + 11, + true, + "node-retrieval", + "node-retrieval" + ], + [ + "sentence", + "", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 1859492819924485121, + 10838117205519727135, + 18446744073709551615, + 18446744073709551615, + 0, + 128, + 0, + 128, + 0, + 20, + true, + "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions.", + "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions." + ], + [ + "sentence", + "", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 4963035477772371835, + 4020325737246968829, + 18446744073709551615, + 18446744073709551615, + 129, + 262, + 129, + 262, + 20, + 44, + true, + "In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design.", + "In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design." + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 17889054130498802051, + 13611413549729115921, + 18446744073709551615, + 18446744073709551615, + 27, + 44, + 27, + 44, + 5, + 7, + true, + "fundamental types", + "fundamental types" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 16654294478124171317, + 10151652501900860692, + 18446744073709551615, + 18446744073709551615, + 86, + 103, + 86, + 103, + 14, + 16, + true, + "logical operators", + "logical operators" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 11555096374369856312, + 7157942907653228754, + 18446744073709551615, + 18446744073709551615, + 108, + 127, + 108, + 127, + 17, + 19, + true, + "transform functions", + "transform functions" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 17030057430150962643, + 11687865223449973507, + 18446744073709551615, + 18446744073709551615, + 136, + 154, + 136, + 154, + 22, + 24, + true, + "following sections", + "following sections" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 17730388821334829224, + 6503636413294871875, + 18446744073709551615, + 18446744073709551615, + 238, + 261, + 238, + 261, + 40, + 43, + true, + "adjacency matrix design", + "adjacency matrix design" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 3534171294115941544, + 8731026536612016164, + 18446744073709551615, + 18446744073709551615, + 48, + 57, + 48, + 57, + 8, + 9, + true, + "worktasks", + "worktasks" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 3503811091434006699, + 4368860458480451668, + 18446744073709551615, + 18446744073709551615, + 75, + 84, + 75, + 84, + 12, + 13, + true, + "traversal", + "traversal" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 16381206568246674273, + 3558057784302965696, + 18446744073709551615, + 18446744073709551615, + 175, + 181, + 175, + 181, + 29, + 30, + true, + "detail", + "detail" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 3534171294115941544, + 8731026536612028033, + 18446744073709551615, + 18446744073709551615, + 190, + 199, + 190, + 199, + 32, + 33, + true, + "worktasks", + "worktasks" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 8106398484416909789, + 4307351017350543686, + 18446744073709551615, + 18446744073709551615, + 223, + 230, + 223, + 230, + 37, + 38, + true, + "context", + "context" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 389609625548777262, + 8826555294676663632, + 18446744073709551615, + 18446744073709551615, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 389609625548777251, + 8826555296349648778, + 18446744073709551615, + 18446744073709551615, + 119, + 123, + 119, + 123, + 14, + 14, + false, + "2023", + "2023" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 8104408072666212335, + 13552219042525319352, + 18446744073709551615, + 18446744073709551615, + 71, + 78, + 71, + 78, + 8, + 8, + false, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 389609625548868096, + 8826558551385119058, + 18446744073709551615, + 18446744073709551615, + 82, + 86, + 82, + 86, + 8, + 9, + false, + "2.20", + "2.20" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 14654386914267794441, + 12796143052106760105, + 18446744073709551615, + 18446744073709551615, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 17767354399704235162, + 7753390158484899261, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 15441160910541481791, + 3518619573290839093, + 18446744073709551615, + 18446744073709551615, + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 15441160910541481543, + 3518617976696906498, + 18446744073709551615, + 18446744073709551615, + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 8536069645534292969, + 16063604623463467342, + 18446744073709551615, + 18446744073709551615, + 35, + 87, + 35, + 87, + 8, + 10, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 594099663775968682, + 14698211805947073928, + 18446744073709551615, + 18446744073709551615, + 156, + 208, + 156, + 208, + 22, + 37, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 1697220653346092555, + 8458710314769009562, + 18446744073709551615, + 18446744073709551615, + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 12213187056216195918, + 14309760985361468471, + 18446744073709551615, + 18446744073709551615, + 155, + 209, + 155, + 209, + 21, + 38, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 3856967589249015473, + 3576147774941915841, + 18446744073709551615, + 18446744073709551615, + 35, + 86, + 35, + 86, + 8, + 9, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "sentence", + "", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 10933383461306782608, + 10178418358179275356, + 18446744073709551615, + 18446744073709551615, + 19, + 125, + 19, + 125, + 6, + 16, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 12466457873768409517, + 3430070082404029638, + 18446744073709551615, + 18446744073709551615, + 88, + 108, + 88, + 108, + 10, + 13, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 12466457873768409517, + 3430070082403846184, + 18446744073709551615, + 18446744073709551615, + 213, + 233, + 213, + 233, + 39, + 42, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 10086796047802705645, + 11637015082128438412, + 18446744073709551615, + 18446744073709551615, + 252, + 263, + 252, + 263, + 47, + 49, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 6687370681685741393, + 17939310132506951168, + 18446744073709551615, + 18446744073709551615, + 284, + 319, + 284, + 319, + 53, + 57, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 329104161846385964, + 16017248647642597247, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 18, + 19, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 969969168017005656, + 2961182532179915323, + 18446744073709551615, + 18446744073709551615, + 144, + 154, + 144, + 154, + 20, + 21, + true, + "Conditions", + "Conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 329104161825278214, + 16021621362593374209, + 18446744073709551615, + 18446744073709551615, + 238, + 243, + 238, + 243, + 43, + 44, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 12178341415895516060, + 12061595171928625555, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 45, + 46, + true, + "use", + "use" + ], + [ + "numval", + "ival", + 4361549266593946746, + "TEXT", + "#/texts/96", + 1.0, + 17767354399704235153, + 1792635071361844496, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "9", + "9" + ], + [ + "numval", + "ival", + 4361549266593946746, + "TEXT", + "#/texts/96", + 1.0, + 15441160910541481979, + 7911155768595088752, + 18446744073709551615, + 18446744073709551615, + 3, + 5, + 3, + 5, + 2, + 3, + true, + "15", + "15" + ], + [ + "numval", + "fval", + 9802652237802670052, + "TEXT", + "#/texts/97", + 1.0, + 12178341415896435196, + 198388536621247129, + 18446744073709551615, + 18446744073709551615, + 0, + 3, + 0, + 3, + 0, + 0, + false, + "3.3", + "3.3" + ], + [ + "numval", + "ival", + 9802652237802670052, + "TEXT", + "#/texts/97", + 1.0, + 17767354399704235161, + 3052200858272860943, + 18446744073709551615, + 18446744073709551615, + 4, + 5, + 4, + 5, + 0, + 1, + false, + "1", + "1" + ], + [ + "expression", + "wtoken-concatenation", + 9802652237802670052, + "TEXT", + "#/texts/97", + 1.0, + 329104147725285867, + 13023020285713349824, + 18446744073709551615, + 18446744073709551615, + 0, + 5, + 0, + 5, + 0, + 1, + true, + "3.3.1", + "3.3.1" + ], + [ + "parenthesis", + "round brackets", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 6343195480109663451, + 11165462414382695465, + 18446744073709551615, + 18446744073709551615, + 119, + 132, + 119, + 132, + 23, + 26, + true, + "(approximate)", + "(approximate)" + ], + [ + "expression", + "latex", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 389609625699793568, + 1705012483593147870, + 18446744073709551615, + 18446744073709551615, + 253, + 259, + 253, + 259, + 48, + 49, + true, + "^{!}", + "$^{!}$" + ], + [ + "sentence", + "", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 13639548757740861010, + 11696805249441926913, + 18446744073709551615, + 18446744073709551615, + 0, + 69, + 0, + 69, + 0, + 13, + true, + "This task finds a set of nodes which satisfy certain search criteria.", + "This task finds a set of nodes which satisfy certain search criteria." + ], + [ + "sentence", + "", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 9504985242355517435, + 18023630049865929203, + 18446744073709551615, + 18446744073709551615, + 70, + 216, + 70, + 216, + 13, + 41, + true, + "This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property.", + "This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property." + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 1139782918783911343, + 10980002430644435601, + 18446744073709551615, + 18446744073709551615, + 45, + 68, + 45, + 68, + 9, + 12, + true, + "certain search criteria", + "certain search criteria" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 1353284443403185756, + 13247714493573934499, + 18446744073709551615, + 18446744073709551615, + 100, + 111, + 100, + 111, + 19, + 21, + true, + "single node", + "single node" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 6764280510749928008, + 2538978002994667418, + 18446744073709551615, + 18446744073709551615, + 141, + 162, + 141, + 162, + 28, + 31, + true, + "exact node identifier", + "exact node identifier" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 6423270415561497308, + 8377404395557394670, + 18446744073709551615, + 18446744073709551615, + 196, + 215, + 196, + 215, + 38, + 40, + true, + "particular property", + "particular property" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 18403572735135737032, + 14924261502817611542, + 18446744073709551615, + 18446744073709551615, + 239, + 259, + 239, + 259, + 45, + 49, + true, + "node vector v ^{!}", + "node vector v $^{!}$" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 389609625631210899, + 1695322703373668221, + 18446744073709551615, + 18446744073709551615, + 5, + 9, + 5, + 9, + 1, + 2, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 12178341415895638602, + 16401925845918103767, + 18446744073709551615, + 18446744073709551615, + 18, + 21, + 18, + 21, + 4, + 5, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 329104161758737773, + 9063467011231067037, + 18446744073709551615, + 18446744073709551615, + 25, + 30, + 25, + 30, + 6, + 7, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 389609625621548280, + 1694766356608744958, + 18446744073709551615, + 18446744073709551615, + 133, + 137, + 133, + 137, + 26, + 27, + true, + "name", + "name" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 329104161758737773, + 9063467011231090358, + 18446744073709551615, + 18446744073709551615, + 175, + 180, + 175, + 180, + 34, + 35, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 389609625631210899, + 1695322703373656343, + 18446744073709551615, + 18446744073709551615, + 221, + 225, + 221, + 225, + 42, + 43, + true, + "task", + "task" + ], + [ + "numval", + "ival", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 17767354399704235161, + 3863023118325513235, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 2, + 3, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 17767354399704235160, + 3863023118293440507, + 18446744073709551615, + 18446744073709551615, + 33, + 34, + 33, + 34, + 8, + 9, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 17767354399704235162, + 3863023118274566919, + 18446744073709551615, + 18446744073709551615, + 47, + 48, + 47, + 48, + 13, + 14, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 15441160910541481788, + 1525860005576289474, + 18446744073709551615, + 18446744073709551615, + 60, + 62, + 60, + 62, + 16, + 16, + false, + "26", + "26" + ], + [ + "numval", + "ival", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 17767354399704235163, + 3863023118291550190, + 18446744073709551615, + 18446744073709551615, + 67, + 68, + 66, + 67, + 18, + 19, + true, + "3", + "3" + ], + [ + "expression", + "wtoken-concatenation", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 5948620232447446819, + 3619933651552123134, + 18446744073709551615, + 18446744073709551615, + 2, + 15, + 2, + 15, + 1, + 2, + true, + "^{!}_{i}=", + "$^{!}$$_{i}$=" + ], + [ + "expression", + "wtoken-concatenation", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 7116489890516680880, + 11145030960935339860, + 18446744073709551615, + 18446744073709551615, + 53, + 63, + 53, + 63, + 16, + 17, + true, + "GLYPH", + "GLYPH" + ], + [ + "sentence", + "", + 11778884428660217326, + "TEXT", + "#/texts/100", + 1.0, + 11753315931385641908, + 2734980420462844181, + 18446744073709551615, + 18446744073709551615, + 6, + 69, + 6, + 69, + 1, + 13, + true, + "S represents the set of nodes that satisfy the search criteria.", + "S represents the set of nodes that satisfy the search criteria." + ], + [ + "term", + "single-term", + 11778884428660217326, + "TEXT", + "#/texts/100", + 1.0, + 6565208683621509436, + 15059693667290050564, + 18446744073709551615, + 18446744073709551615, + 53, + 68, + 53, + 68, + 10, + 12, + true, + "search criteria", + "search criteria" + ], + [ + "term", + "single-term", + 11778884428660217326, + "TEXT", + "#/texts/100", + 1.0, + 12178341415895638602, + 1959738706672078328, + 18446744073709551615, + 18446744073709551615, + 23, + 26, + 23, + 26, + 4, + 5, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 11778884428660217326, + "TEXT", + "#/texts/100", + 1.0, + 329104161758737773, + 9790437187668217640, + 18446744073709551615, + 18446744073709551615, + 30, + 35, + 30, + 35, + 6, + 7, + true, + "nodes", + "nodes" + ], + [ + "numval", + "fval", + 12875050310340408203, + "TEXT", + "#/texts/101", + 1.0, + 12178341415896435196, + 17738549797942293450, + 18446744073709551615, + 18446744073709551615, + 0, + 3, + 0, + 3, + 0, + 0, + false, + "3.3", + "3.3" + ], + [ + "numval", + "ival", + 12875050310340408203, + "TEXT", + "#/texts/101", + 1.0, + 17767354399704235162, + 16045717610508207921, + 18446744073709551615, + 18446744073709551615, + 4, + 5, + 4, + 5, + 0, + 1, + false, + "2", + "2" + ], + [ + "expression", + "wtoken-concatenation", + 12875050310340408203, + "TEXT", + "#/texts/101", + 1.0, + 329104147725285866, + 5872895868719124566, + 18446744073709551615, + 18446744073709551615, + 0, + 5, + 0, + 5, + 0, + 1, + true, + "3.3.2", + "3.3.2" + ], + [ + "numval", + "fval", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 12178341415896435198, + 9356552374251491539, + 18446744073709551615, + 18446744073709551615, + 102, + 105, + 102, + 105, + 16, + 17, + true, + "3.1", + "3.1" + ], + [ + "expression", + "word-concatenation", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 8759553427650775934, + 8729620688739724694, + 18446744073709551615, + 18446744073709551615, + 21, + 36, + 21, + 36, + 4, + 5, + true, + "graph-traversal", + "graph-traversal" + ], + [ + "expression", + "word-concatenation", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 8759553427650775934, + 8729620688739718345, + 18446744073709551615, + 18446744073709551615, + 51, + 66, + 51, + 66, + 8, + 9, + true, + "graph-traversal", + "graph-traversal" + ], + [ + "expression", + "word-concatenation", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 10308187620027892234, + 17660718396182283452, + 18446744073709551615, + 18446744073709551615, + 215, + 231, + 215, + 231, + 37, + 38, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "expression", + "word-concatenation", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 18223709631064383906, + 5290334765848251647, + 18446744073709551615, + 18446744073709551615, + 474, + 485, + 474, + 485, + 79, + 80, + true, + "path-length", + "path-length" + ], + [ + "expression", + "latex", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 389609625699793568, + 15897512725555629958, + 18446744073709551615, + 18446744073709551615, + 181, + 187, + 181, + 187, + 30, + 31, + true, + "^{!}", + "$^{!}$" + ], + [ + "expression", + "wtoken-concatenation", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 329104159258693175, + 3850990531940834900, + 18446744073709551615, + 18446744073709551615, + 170, + 177, + 170, + 177, + 28, + 29, + true, + "^{!}=", + "$^{!}$=" + ], + [ + "sentence", + "", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 6704314193472549131, + 12203121571784219650, + 18446744073709551615, + 18446744073709551615, + 0, + 67, + 0, + 67, + 0, + 10, + true, + "The simplest type of graph-traversal is the direct graph-traversal.", + "The simplest type of graph-traversal is the direct graph-traversal." + ], + [ + "sentence", + "", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 5996076621080095124, + 3025184828607773624, + 18446744073709551615, + 18446744073709551615, + 68, + 188, + 68, + 188, + 10, + 32, + true, + "As explained in detail in section 3.1, these can be implemented as a straightforward SpMV operation w $^{!}$= Av $^{!}$.", + "As explained in detail in section 3.1, these can be implemented as a straightforward SpMV operation w $^{!}$= Av $^{!}$." + ], + [ + "sentence", + "", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 4548432471191064907, + 12405546864064907829, + 18446744073709551615, + 18446744073709551615, + 189, + 274, + 189, + 274, + 32, + 47, + true, + "In more advanced types of graph-traversals, we evaluate all paths of different depth.", + "In more advanced types of graph-traversals, we evaluate all paths of different depth." + ], + [ + "sentence", + "", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 13078819215206981832, + 1971017990059557300, + 18446744073709551615, + 18446744073709551615, + 275, + 486, + 275, + 486, + 47, + 81, + true, + "Since the number of paths connecting two nodes might increase exponentially with the pathlength, one typically reduces the contribution of each path by weighting it with the inverse factorial of the path-length.", + "Since the number of paths connecting two nodes might increase exponentially with the pathlength, one typically reduces the contribution of each path by weighting it with the inverse factorial of the path-length." + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 11151330421356998770, + 885013344391439016, + 18446744073709551615, + 18446744073709551615, + 4, + 17, + 4, + 17, + 1, + 3, + true, + "simplest type", + "simplest type" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 3993395428054562947, + 9116231836733178409, + 18446744073709551615, + 18446744073709551615, + 44, + 66, + 44, + 66, + 7, + 9, + true, + "direct graph-traversal", + "direct graph-traversal" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 13752318599956892986, + 3243882999611675116, + 18446744073709551615, + 18446744073709551615, + 137, + 167, + 137, + 167, + 24, + 27, + true, + "straightforward SpMV operation", + "straightforward SpMV operation" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 13549848866814318649, + 12951100156455938138, + 18446744073709551615, + 18446744073709551615, + 197, + 211, + 197, + 211, + 34, + 36, + true, + "advanced types", + "advanced types" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 13127417780813530133, + 2668820547192862622, + 18446744073709551615, + 18446744073709551615, + 258, + 273, + 258, + 273, + 44, + 46, + true, + "different depth", + "different depth" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 2920317602823103143, + 14435126486004632161, + 18446744073709551615, + 18446744073709551615, + 449, + 466, + 449, + 466, + 75, + 77, + true, + "inverse factorial", + "inverse factorial" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 12062282599681290620, + 16899929540755782838, + 18446744073709551615, + 18446744073709551615, + 554, + 568, + 554, + 568, + 95, + 97, + true, + "indirect paths", + "indirect paths" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 16381206568246674273, + 6949217655613054178, + 18446744073709551615, + 18446744073709551615, + 84, + 90, + 84, + 90, + 13, + 14, + true, + "detail", + "detail" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 8106478708629288965, + 2634479629892977717, + 18446744073709551615, + 18446744073709551615, + 94, + 101, + 94, + 101, + 15, + 16, + true, + "section", + "section" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 15441160910541480528, + 6349876376603311031, + 18446744073709551615, + 18446744073709551615, + 178, + 180, + 178, + 180, + 29, + 30, + true, + "Av", + "Av" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 10308187620027892234, + 17660718396182283452, + 18446744073709551615, + 18446744073709551615, + 215, + 231, + 215, + 231, + 37, + 38, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 329104161667979410, + 16219622563067219904, + 18446744073709551615, + 18446744073709551615, + 249, + 254, + 249, + 254, + 42, + 43, + true, + "paths", + "paths" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 16381206574973295053, + 8613944306628715549, + 18446744073709551615, + 18446744073709551615, + 285, + 291, + 285, + 291, + 49, + 50, + true, + "number", + "number" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 329104161667979410, + 16219622563067217673, + 18446744073709551615, + 18446744073709551615, + 295, + 300, + 295, + 300, + 51, + 52, + true, + "paths", + "paths" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 329104161758737773, + 16234752642064308276, + 18446744073709551615, + 18446744073709551615, + 316, + 321, + 316, + 321, + 54, + 55, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 13972526853646866432, + 4963666646089781896, + 18446744073709551615, + 18446744073709551615, + 360, + 370, + 360, + 370, + 60, + 61, + true, + "pathlength", + "pathlength" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 4603153860084293890, + 10724656896814481236, + 18446744073709551615, + 18446744073709551615, + 398, + 410, + 398, + 410, + 66, + 67, + true, + "contribution", + "contribution" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 389609625632305102, + 15909948299731138978, + 18446744073709551615, + 18446744073709551615, + 419, + 423, + 419, + 423, + 69, + 70, + true, + "path", + "path" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 18223709631064383906, + 5290334765848251647, + 18446744073709551615, + 18446744073709551615, + 474, + 485, + 474, + 485, + 79, + 80, + true, + "path-length", + "path-length" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 8106397496085150773, + 11486711446788774948, + 18446744073709551615, + 18446744073709551615, + 491, + 498, + 491, + 498, + 82, + 83, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 389609625695123443, + 15908991454686209555, + 18446744073709551615, + 18446744073709551615, + 513, + 517, + 513, + 517, + 86, + 87, + true, + "case", + "case" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 8106397733466170068, + 4491208394922089960, + 18446744073709551615, + 18446744073709551615, + 572, + 579, + 572, + 579, + 98, + 99, + true, + "follows", + "follows" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235162, + 9989301225055039953, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 5, + 6, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235162, + 9989301225055040085, + 18446744073709551615, + 18446744073709551615, + 18, + 19, + 18, + 19, + 6, + 7, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235163, + 9989301225441111510, + 18446744073709551615, + 18446744073709551615, + 26, + 27, + 26, + 27, + 10, + 11, + true, + "3", + "3" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235163, + 9989301225441111382, + 18446744073709551615, + 18446744073709551615, + 28, + 29, + 28, + 29, + 11, + 12, + true, + "3", + "3" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235161, + 9989301227833998387, + 18446744073709551615, + 18446744073709551615, + 41, + 42, + 41, + 42, + 14, + 14, + false, + "1", + "1" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235161, + 9989301227833995448, + 18446744073709551615, + 18446744073709551615, + 51, + 52, + 51, + 52, + 15, + 15, + false, + "1", + "1" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235161, + 9989301227833985318, + 18446744073709551615, + 18446744073709551615, + 61, + 62, + 61, + 62, + 16, + 16, + false, + "1", + "1" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 15441160910541481862, + 7426216222773784579, + 18446744073709551615, + 18446744073709551615, + 71, + 73, + 71, + 73, + 17, + 17, + false, + "18", + "18" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 15441160910541481863, + 7426216222719391073, + 18446744073709551615, + 18446744073709551615, + 82, + 84, + 82, + 84, + 18, + 18, + false, + "19", + "19" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 12178341415896413249, + 17313632338592011779, + 18446744073709551615, + 18446744073709551615, + 103, + 106, + 103, + 106, + 21, + 22, + false, + "- 1", + "- 1" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235160, + 9989301226364846173, + 18446744073709551615, + 18446744073709551615, + 114, + 115, + 114, + 115, + 22, + 22, + false, + "0", + "0" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235161, + 9989301227833984724, + 18446744073709551615, + 18446744073709551615, + 124, + 125, + 124, + 125, + 23, + 23, + false, + "1", + "1" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235156, + 9989301228016144908, + 18446744073709551615, + 18446744073709551615, + 140, + 141, + 139, + 140, + 28, + 29, + true, + "4", + "4" + ], + [ + "expression", + "latex", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 389609625699793568, + 263634606544655319, + 18446744073709551615, + 18446744073709551615, + 129, + 135, + 129, + 135, + 25, + 26, + true, + "^{!}", + "$^{!}$" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 329104159258693175, + 3768331475560236011, + 18446744073709551615, + 18446744073709551615, + 2, + 9, + 2, + 9, + 1, + 2, + true, + "^{!}=", + "$^{!}$=" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 2902585676651763710, + 13661890687540821317, + 18446744073709551615, + 18446744073709551615, + 34, + 43, + 34, + 43, + 14, + 15, + true, + "GLYPH", + "GLYPH" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 2902585676651763710, + 13661890687540821195, + 18446744073709551615, + 18446744073709551615, + 44, + 53, + 44, + 53, + 15, + 16, + true, + "GLYPH", + "GLYPH" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 2902585676651763710, + 13661890687540889198, + 18446744073709551615, + 18446744073709551615, + 54, + 63, + 54, + 63, + 16, + 17, + true, + "GLYPH", + "GLYPH" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 7116489890516677091, + 1239928655295932073, + 18446744073709551615, + 18446744073709551615, + 64, + 74, + 64, + 74, + 17, + 18, + true, + "GLYPH", + "GLYPH" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 7116489890516677417, + 1239930097162833493, + 18446744073709551615, + 18446744073709551615, + 75, + 85, + 75, + 85, + 18, + 19, + true, + "GLYPH", + "GLYPH" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 329104159258693175, + 3768331475560262129, + 18446744073709551615, + 18446744073709551615, + 88, + 95, + 88, + 95, + 20, + 21, + true, + "^{!}=", + "$^{!}$=" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 14652258881047556399, + 6257375905829990882, + 18446744073709551615, + 18446744073709551615, + 96, + 106, + 96, + 106, + 21, + 22, + true, + "e^{A}- 1", + "e$^{A}$- 1" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 2902585676651763647, + 13661890647980804945, + 18446744073709551615, + 18446744073709551615, + 107, + 116, + 107, + 116, + 22, + 23, + true, + "GLYPH", + "GLYPH" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 2902585676651763710, + 13661890687540890061, + 18446744073709551615, + 18446744073709551615, + 117, + 126, + 117, + 126, + 23, + 24, + true, + "GLYPH", + "GLYPH" + ], + [ + "sentence", + "", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 9916410401532570841, + 4842910269063890136, + 18446744073709551615, + 18446744073709551615, + 2, + 21, + 2, + 21, + 1, + 8, + true, + "$^{!}$= A + A 2 2 !", + "$^{!}$= A + A 2 2 !" + ], + [ + "sentence", + "", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 8106351318766820018, + 16049833510509929686, + 18446744073709551615, + 18446744073709551615, + 24, + 31, + 24, + 31, + 9, + 13, + true, + "A 3 3 !", + "A 3 3 !" + ], + [ + "numval", + "ival", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 17767354399704235162, + 13895472510679550781, + 18446744073709551615, + 18446744073709551615, + 194, + 195, + 192, + 193, + 40, + 41, + true, + "2", + "2" + ], + [ + "expression", + "word-concatenation", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 8759553427650775934, + 13977638772783275859, + 18446744073709551615, + 18446744073709551615, + 28, + 43, + 28, + 43, + 7, + 8, + true, + "graph-traversal", + "graph-traversal" + ], + [ + "expression", + "word-concatenation", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 4667405858993953327, + 7138937013884052228, + 18446744073709551615, + 18446744073709551615, + 79, + 94, + 79, + 94, + 15, + 16, + true, + "matrix-function", + "matrix-function" + ], + [ + "expression", + "latex", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 389609625699793568, + 11988523914287915245, + 18446744073709551615, + 18446744073709551615, + 145, + 151, + 143, + 149, + 30, + 31, + true, + "^{!}", + "$^{!}$" + ], + [ + "expression", + "wtoken-concatenation", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 329104159258693175, + 1907154720488094232, + 18446744073709551615, + 18446744073709551615, + 126, + 133, + 126, + 133, + 25, + 26, + true, + "^{!}=", + "$^{!}$=" + ], + [ + "sentence", + "", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 4946211207219256029, + 13284838307535775434, + 18446744073709551615, + 18446744073709551615, + 0, + 152, + 0, + 150, + 0, + 32, + true, + "In its most generic case, a graph-traversal can therefore be written down as a matrix-function applied on an edge, that is, w $^{!}$= fA \u00f0 \u00de v $^{!}$.", + "In its most generic case, a graph-traversal can therefore be written down as a matrix-function applied on an edge, that is, w $^{!}$= fA \u00f0 \u00de v $^{!}$." + ], + [ + "sentence", + "", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 3911818568884640949, + 14572114789426270089, + 18446744073709551615, + 18446744073709551615, + 153, + 307, + 151, + 305, + 32, + 57, + true, + "As discussed in detail in previous work, 2 this type of operation can be evaluated extremely efficiently using a recursive Chebyshev polynomial expansion.", + "As discussed in detail in previous work, 2 this type of operation can be evaluated extremely efficiently using a recursive Chebyshev polynomial expansion." + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 17844194112438609771, + 15245159028300247470, + 18446744073709551615, + 18446744073709551615, + 12, + 24, + 12, + 24, + 3, + 5, + true, + "generic case", + "generic case" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 16381206566428137843, + 6998923394139746856, + 18446744073709551615, + 18446744073709551615, + 143, + 151, + 141, + 149, + 29, + 31, + true, + "v ^{!}", + "v $^{!}$" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 12580512760652482076, + 9613461018338631967, + 18446744073709551615, + 18446744073709551615, + 179, + 192, + 177, + 190, + 37, + 39, + true, + "previous work", + "previous work" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 17218927816364445558, + 9299012655407568424, + 18446744073709551615, + 18446744073709551615, + 266, + 306, + 264, + 304, + 52, + 56, + true, + "recursive Chebyshev polynomial expansion", + "recursive Chebyshev polynomial expansion" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 8759553427650775934, + 13977638772783275859, + 18446744073709551615, + 18446744073709551615, + 28, + 43, + 28, + 43, + 7, + 8, + true, + "graph-traversal", + "graph-traversal" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 4667405858993953327, + 7138937013884052228, + 18446744073709551615, + 18446744073709551615, + 79, + 94, + 79, + 94, + 15, + 16, + true, + "matrix-function", + "matrix-function" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 389609625699630670, + 11988322654688297783, + 18446744073709551615, + 18446744073709551615, + 109, + 113, + 109, + 113, + 19, + 20, + true, + "edge", + "edge" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 15441160910541486262, + 16040958438799012769, + 18446744073709551615, + 18446744073709551615, + 134, + 136, + 134, + 136, + 26, + 27, + true, + "fA", + "fA" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 16381206568246674273, + 7876224071511699486, + 18446744073709551615, + 18446744073709551615, + 169, + 175, + 167, + 173, + 35, + 36, + true, + "detail", + "detail" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 389609625631434316, + 11975495498267754916, + 18446744073709551615, + 18446744073709551615, + 201, + 205, + 199, + 203, + 42, + 43, + true, + "type", + "type" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 6167836358624304835, + 5859531764025592016, + 18446744073709551615, + 18446744073709551615, + 209, + 218, + 207, + 216, + 44, + 45, + true, + "operation", + "operation" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 389609625548777262, + 8826555294676663632, + 18446744073709551615, + 18446744073709551615, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 389609625548777251, + 8826555296349648778, + 18446744073709551615, + 18446744073709551615, + 119, + 123, + 119, + 123, + 14, + 14, + false, + "2023", + "2023" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 8104408072666212335, + 13552219042525319352, + 18446744073709551615, + 18446744073709551615, + 71, + 78, + 71, + 78, + 8, + 8, + false, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 389609625548868096, + 8826558551385119058, + 18446744073709551615, + 18446744073709551615, + 82, + 86, + 82, + 86, + 8, + 9, + false, + "2.20", + "2.20" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 14654386914267794441, + 12796143052106760105, + 18446744073709551615, + 18446744073709551615, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 17767354399704235162, + 7753390158484899261, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 15441160910541481791, + 3518619573290839093, + 18446744073709551615, + 18446744073709551615, + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 15441160910541481543, + 3518617976696906498, + 18446744073709551615, + 18446744073709551615, + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 8536069645534292969, + 16063604623463467342, + 18446744073709551615, + 18446744073709551615, + 35, + 87, + 35, + 87, + 8, + 10, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 594099663775968682, + 14698211805947073928, + 18446744073709551615, + 18446744073709551615, + 156, + 208, + 156, + 208, + 22, + 37, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 1697220653346092555, + 8458710314769009562, + 18446744073709551615, + 18446744073709551615, + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 12213187056216195918, + 14309760985361468471, + 18446744073709551615, + 18446744073709551615, + 155, + 209, + 155, + 209, + 21, + 38, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 3856967589249015473, + 3576147774941915841, + 18446744073709551615, + 18446744073709551615, + 35, + 86, + 35, + 86, + 8, + 9, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "sentence", + "", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 10933383461306782608, + 10178418358179275356, + 18446744073709551615, + 18446744073709551615, + 19, + 125, + 19, + 125, + 6, + 16, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 12466457873768409517, + 3430070082404029638, + 18446744073709551615, + 18446744073709551615, + 88, + 108, + 88, + 108, + 10, + 13, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 12466457873768409517, + 3430070082403846184, + 18446744073709551615, + 18446744073709551615, + 213, + 233, + 213, + 233, + 39, + 42, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 10086796047802705645, + 11637015082128438412, + 18446744073709551615, + 18446744073709551615, + 252, + 263, + 252, + 263, + 47, + 49, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 6687370681685741393, + 17939310132506951168, + 18446744073709551615, + 18446744073709551615, + 284, + 319, + 284, + 319, + 53, + 57, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 329104161846385964, + 16017248647642597247, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 18, + 19, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 969969168017005656, + 2961182532179915323, + 18446744073709551615, + 18446744073709551615, + 144, + 154, + 144, + 154, + 20, + 21, + true, + "Conditions", + "Conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 329104161825278214, + 16021621362593374209, + 18446744073709551615, + 18446744073709551615, + 238, + 243, + 238, + 243, + 43, + 44, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 12178341415895516060, + 12061595171928625555, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 45, + 46, + true, + "use", + "use" + ], + [ + "numval", + "fval", + 10252446451495472512, + "TEXT", + "#/texts/106", + 1.0, + 12178341415896435196, + 4867750156681578759, + 18446744073709551615, + 18446744073709551615, + 0, + 3, + 0, + 3, + 0, + 0, + false, + "3.3", + "3.3" + ], + [ + "numval", + "ival", + 10252446451495472512, + "TEXT", + "#/texts/106", + 1.0, + 17767354399704235163, + 11397855393475351535, + 18446744073709551615, + 18446744073709551615, + 4, + 5, + 4, + 5, + 0, + 1, + false, + "3", + "3" + ], + [ + "expression", + "wtoken-concatenation", + 10252446451495472512, + "TEXT", + "#/texts/106", + 1.0, + 329104147725285869, + 6000044661942170615, + 18446744073709551615, + 18446744073709551615, + 0, + 5, + 0, + 5, + 0, + 1, + true, + "3.3.3", + "3.3.3" + ], + [ + "sentence", + "", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 13700534978823339273, + 12250932271253598025, + 18446744073709551615, + 18446744073709551615, + 0, + 115, + 0, + 115, + 0, + 22, + true, + "In logical operations, two sets of nodes are merged into one resulting set, each represented through a node vector.", + "In logical operations, two sets of nodes are merged into one resulting set, each represented through a node vector." + ], + [ + "sentence", + "", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 7959509938832284547, + 7438636696455487498, + 18446744073709551615, + 18446744073709551615, + 116, + 176, + 116, + 176, + 22, + 36, + true, + "There are three common logical operations, AND, OR, and NOT.", + "There are three common logical operations, AND, OR, and NOT." + ], + [ + "sentence", + "", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 2530535679015163989, + 6499975118547970703, + 18446744073709551615, + 18446744073709551615, + 177, + 310, + 177, + 310, + 36, + 60, + true, + "In the AND and OR operations, we compute the geometric or the arithmetic mean respectively for each pairwise elements in the vectors.", + "In the AND and OR operations, we compute the geometric or the arithmetic mean respectively for each pairwise elements in the vectors." + ], + [ + "sentence", + "", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 11716731942292146940, + 10918583807814825475, + 18446744073709551615, + 18446744073709551615, + 311, + 390, + 311, + 390, + 60, + 77, + true, + "In the NOT operation, we inverse the sign for each element of the input vector.", + "In the NOT operation, we inverse the sign for each element of the input vector." + ], + [ + "term", + "enum-term-mark-4", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 11299968147221621659, + 15638039005966470642, + 18446744073709551615, + 18446744073709551615, + 164, + 175, + 164, + 175, + 31, + 35, + true, + "OR, and NOT", + "OR, and NOT" + ], + [ + "term", + "enum-term-mark-4", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 5900106061036628893, + 4204146521886515958, + 18446744073709551615, + 18446744073709551615, + 184, + 194, + 184, + 194, + 38, + 41, + true, + "AND and OR", + "AND and OR" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 12603609256967955544, + 14772344826157711306, + 18446744073709551615, + 18446744073709551615, + 3, + 21, + 3, + 21, + 1, + 3, + true, + "logical operations", + "logical operations" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 7596240835069815859, + 12179457947178679624, + 18446744073709551615, + 18446744073709551615, + 103, + 114, + 103, + 114, + 19, + 21, + true, + "node vector", + "node vector" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 14800086467850479009, + 666210018065836720, + 18446744073709551615, + 18446744073709551615, + 132, + 157, + 132, + 157, + 25, + 28, + true, + "common logical operations", + "common logical operations" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 12821814845233359770, + 1370605997523919099, + 18446744073709551615, + 18446744073709551615, + 192, + 205, + 192, + 205, + 40, + 42, + true, + "OR operations", + "OR operations" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 14773391768607445380, + 7907749020990852481, + 18446744073709551615, + 18446744073709551615, + 239, + 254, + 239, + 254, + 49, + 51, + true, + "arithmetic mean", + "arithmetic mean" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 15262760339251519687, + 2135796427551055674, + 18446744073709551615, + 18446744073709551615, + 277, + 294, + 277, + 294, + 54, + 56, + true, + "pairwise elements", + "pairwise elements" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 17380026057076513286, + 8554846137731019782, + 18446744073709551615, + 18446744073709551615, + 318, + 331, + 318, + 331, + 62, + 64, + true, + "NOT operation", + "NOT operation" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 785590888379155985, + 2070684625654949058, + 18446744073709551615, + 18446744073709551615, + 377, + 389, + 377, + 389, + 74, + 76, + true, + "input vector", + "input vector" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 389609625741077841, + 8558423680807701295, + 18446744073709551615, + 18446744073709551615, + 27, + 31, + 27, + 31, + 5, + 6, + true, + "sets", + "sets" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 329104161758737773, + 11460579442964916464, + 18446744073709551615, + 18446744073709551615, + 35, + 40, + 35, + 40, + 7, + 8, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 12178341415895638602, + 6980157083956599502, + 18446744073709551615, + 18446744073709551615, + 71, + 74, + 71, + 74, + 13, + 14, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 15441160910541487730, + 17782216520369344466, + 18446744073709551615, + 18446744073709551615, + 164, + 166, + 164, + 166, + 31, + 32, + true, + "OR", + "OR" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 12178341415896300384, + 6980219868635577240, + 18446744073709551615, + 18446744073709551615, + 172, + 175, + 172, + 175, + 34, + 35, + true, + "NOT", + "NOT" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 12178341415896229184, + 6980242431802160591, + 18446744073709551615, + 18446744073709551615, + 184, + 187, + 184, + 187, + 38, + 39, + true, + "AND", + "AND" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 8106477900816818323, + 7610138008398569534, + 18446744073709551615, + 18446744073709551615, + 302, + 309, + 302, + 309, + 58, + 59, + true, + "vectors", + "vectors" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 389609625741040683, + 8558425378506358436, + 18446744073709551615, + 18446744073709551615, + 348, + 352, + 348, + 352, + 68, + 69, + true, + "sign", + "sign" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 8106397492274286821, + 4903176674517740239, + 18446744073709551615, + 18446744073709551615, + 362, + 369, + 362, + 369, + 71, + 72, + true, + "element", + "element" + ], + [ + "numval", + "fval", + 16289627123982758705, + "TEXT", + "#/texts/108", + 1.0, + 12178341415896435196, + 4375676351556568035, + 18446744073709551615, + 18446744073709551615, + 0, + 3, + 0, + 3, + 0, + 0, + false, + "3.3", + "3.3" + ], + [ + "numval", + "ival", + 16289627123982758705, + "TEXT", + "#/texts/108", + 1.0, + 17767354399704235156, + 14141377842797647357, + 18446744073709551615, + 18446744073709551615, + 4, + 5, + 4, + 5, + 0, + 1, + false, + "4", + "4" + ], + [ + "expression", + "wtoken-concatenation", + 16289627123982758705, + "TEXT", + "#/texts/108", + 1.0, + 329104147725285868, + 17145181082057860493, + 18446744073709551615, + 18446744073709551615, + 0, + 5, + 0, + 5, + 0, + 1, + true, + "3.3.4", + "3.3.4" + ], + [ + "sentence", + "", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 15673589737287090621, + 17018937476478673316, + 18446744073709551615, + 18446744073709551615, + 0, + 82, + 0, + 82, + 0, + 13, + true, + "Lastly, we implement operations which transform the weights associated with nodes.", + "Lastly, we implement operations which transform the weights associated with nodes." + ], + [ + "sentence", + "", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 415786532727651604, + 2431945995463542775, + 18446744073709551615, + 18446744073709551615, + 83, + 172, + 83, + 172, + 13, + 27, + true, + "One such operation renormalizes and ultimately ranks the nodes according to their weight.", + "One such operation renormalizes and ultimately ranks the nodes according to their weight." + ], + [ + "term", + "single-term", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 13828980233091888506, + 10657123071909144340, + 18446744073709551615, + 18446744073709551615, + 83, + 114, + 83, + 114, + 13, + 17, + true, + "One such operation renormalizes", + "One such operation renormalizes" + ], + [ + "term", + "single-term", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 13985988710970420061, + 13844832953337165230, + 18446744073709551615, + 18446744073709551615, + 21, + 31, + 21, + 31, + 4, + 5, + true, + "operations", + "operations" + ], + [ + "term", + "single-term", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 8106477822555716423, + 4302380438101543009, + 18446744073709551615, + 18446744073709551615, + 52, + 59, + 52, + 59, + 8, + 9, + true, + "weights", + "weights" + ], + [ + "term", + "single-term", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 329104161758737773, + 15575547173408857515, + 18446744073709551615, + 18446744073709551615, + 76, + 81, + 76, + 81, + 11, + 12, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 329104161758737773, + 15575547173408845563, + 18446744073709551615, + 18446744073709551615, + 140, + 145, + 140, + 145, + 21, + 22, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 16381206557786164800, + 15950847297401313251, + 18446744073709551615, + 18446744073709551615, + 165, + 171, + 165, + 171, + 25, + 26, + true, + "weight", + "weight" + ], + [ + "numval", + "ival", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 17767354399704235160, + 3668124634718140630, + 18446744073709551615, + 18446744073709551615, + 351, + 352, + 351, + 352, + 64, + 65, + true, + "0", + "0" + ], + [ + "parenthesis", + "round brackets", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 5941512262948363295, + 9796117134063746060, + 18446744073709551615, + 18446744073709551615, + 334, + 353, + 334, + 353, + 61, + 66, + true, + "(v $^{!}$$_{i}$= 0)", + "(v $^{!}$$_{i}$= 0)" + ], + [ + "expression", + "word-concatenation", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 1526275179175870585, + 7011924518277257184, + 18446744073709551615, + 18446744073709551615, + 510, + 521, + 510, + 521, + 96, + 97, + true, + "depth-first", + "depth-first" + ], + [ + "expression", + "wtoken-concatenation", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 5948620232447446819, + 12075094026550463871, + 18446744073709551615, + 18446744073709551615, + 337, + 350, + 337, + 350, + 63, + 64, + true, + "^{!}_{i}=", + "$^{!}$$_{i}$=" + ], + [ + "sentence", + "", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 17111569020893923564, + 7584218824091880092, + 18446744073709551615, + 18446744073709551615, + 0, + 137, + 0, + 137, + 0, + 25, + true, + "With these four types of operations, we can express rich queries to answer complex questions, which can have multiple inputs and outputs.", + "With these four types of operations, we can express rich queries to answer complex questions, which can have multiple inputs and outputs." + ], + [ + "sentence", + "", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 1177385013172704664, + 14024608104153249268, + 18446744073709551615, + 18446744073709551615, + 138, + 209, + 138, + 209, + 25, + 39, + true, + "Let us now discuss how a workflow is evaluated within the graph engine.", + "Let us now discuss how a workflow is evaluated within the graph engine." + ], + [ + "sentence", + "", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 8042820535480076507, + 16114465682263414482, + 18446744073709551615, + 18446744073709551615, + 210, + 291, + 210, + 291, + 39, + 54, + true, + "Once a workflow has been submitted, each worktask is initially assigned a vector.", + "Once a workflow has been submitted, each worktask is initially assigned a vector." + ], + [ + "sentence", + "", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 8483744196766237297, + 6529174387420215088, + 18446744073709551615, + 18446744073709551615, + 292, + 354, + 292, + 354, + 54, + 67, + true, + "These vectors are all initialized to zero (v $^{!}$$_{i}$= 0).", + "These vectors are all initialized to zero (v $^{!}$$_{i}$= 0)." + ], + [ + "sentence", + "", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 13788972938660668664, + 2865802558503613028, + 18446744073709551615, + 18446744073709551615, + 355, + 453, + 355, + 453, + 67, + 87, + true, + "Next, the graph will analyze the DAG of worktasks and identify which tasks can be run in parallel.", + "Next, the graph will analyze the DAG of worktasks and identify which tasks can be run in parallel." + ], + [ + "sentence", + "", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 5892389074642583888, + 1449681128272710361, + 18446744073709551615, + 18446744073709551615, + 454, + 623, + 454, + 623, + 87, + 119, + true, + "This is achieved by performing a topological sort using depth-first traversal, which yields a list in which each item is a set of tasks that can be executed in parallel.", + "This is achieved by performing a topological sort using depth-first traversal, which yields a list in which each item is a set of tasks that can be executed in parallel." + ], + [ + "sentence", + "", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 856669285578903305, + 9015668049869296425, + 18446744073709551615, + 18446744073709551615, + 624, + 691, + 624, + 691, + 119, + 130, + true, + "The graph engine then proceeds with the parallel task computations.", + "The graph engine then proceeds with the parallel task computations." + ], + [ + "term", + "enum-term-mark-3", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 4974816129218667479, + 1616253342736872326, + 18446744073709551615, + 18446744073709551615, + 118, + 136, + 118, + 136, + 21, + 24, + true, + "inputs and outputs", + "inputs and outputs" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 18146432382445665275, + 8129649123858642143, + 18446744073709551615, + 18446744073709551615, + 52, + 64, + 52, + 64, + 10, + 12, + true, + "rich queries", + "rich queries" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 14314461436358843828, + 3961703421592680698, + 18446744073709551615, + 18446744073709551615, + 75, + 92, + 75, + 92, + 14, + 16, + true, + "complex questions", + "complex questions" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 16086744441680563418, + 6576900302010150786, + 18446744073709551615, + 18446744073709551615, + 109, + 124, + 109, + 124, + 20, + 22, + true, + "multiple inputs", + "multiple inputs" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 2924972194163802578, + 2750704591142470583, + 18446744073709551615, + 18446744073709551615, + 196, + 208, + 196, + 208, + 36, + 38, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 124916246655760082, + 14997524246937848977, + 18446744073709551615, + 18446744073709551615, + 487, + 503, + 487, + 503, + 93, + 95, + true, + "topological sort", + "topological sort" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 2664167013663794215, + 14240742823709584702, + 18446744073709551615, + 18446744073709551615, + 510, + 531, + 510, + 531, + 96, + 98, + true, + "depth-first traversal", + "depth-first traversal" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 2924972194163802578, + 2750704591142518027, + 18446744073709551615, + 18446744073709551615, + 628, + 640, + 628, + 640, + 120, + 122, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 16741233145656393762, + 10469151420604399424, + 18446744073709551615, + 18446744073709551615, + 664, + 690, + 664, + 690, + 126, + 129, + true, + "parallel task computations", + "parallel task computations" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 329104159243796903, + 10102864649349257834, + 18446744073709551615, + 18446744073709551615, + 16, + 21, + 16, + 21, + 3, + 4, + true, + "types", + "types" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 13985988710970420061, + 5670507251515585408, + 18446744073709551615, + 18446744073709551615, + 25, + 35, + 25, + 35, + 5, + 6, + true, + "operations", + "operations" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 8106342536052271615, + 12336710488765885535, + 18446744073709551615, + 18446744073709551615, + 129, + 136, + 129, + 136, + 23, + 24, + true, + "outputs", + "outputs" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 14638857990842534974, + 12699876069210148656, + 18446744073709551615, + 18446744073709551615, + 163, + 171, + 163, + 171, + 31, + 32, + true, + "workflow", + "workflow" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 14638857990842534974, + 12699876069210239434, + 18446744073709551615, + 18446744073709551615, + 217, + 225, + 217, + 225, + 41, + 42, + true, + "workflow", + "workflow" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 14638857990856728723, + 12699698953372660440, + 18446744073709551615, + 18446744073709551615, + 251, + 259, + 251, + 259, + 47, + 48, + true, + "worktask", + "worktask" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 16381206519458118578, + 17808566753953437795, + 18446744073709551615, + 18446744073709551615, + 284, + 290, + 284, + 290, + 52, + 53, + true, + "vector", + "vector" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 8106477900816818323, + 3863078491683433197, + 18446744073709551615, + 18446744073709551615, + 298, + 305, + 298, + 305, + 55, + 56, + true, + "vectors", + "vectors" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 389609625695751254, + 2338768517388150094, + 18446744073709551615, + 18446744073709551615, + 355, + 359, + 355, + 359, + 67, + 68, + true, + "Next", + "Next" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 329104159211247965, + 10104926125817128652, + 18446744073709551615, + 18446744073709551615, + 365, + 370, + 365, + 370, + 70, + 71, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 12178341415896112046, + 6992158052106598564, + 18446744073709551615, + 18446744073709551615, + 388, + 391, + 388, + 391, + 74, + 75, + true, + "DAG", + "DAG" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 3534171294115941544, + 16274811017124843027, + 18446744073709551615, + 18446744073709551615, + 395, + 404, + 395, + 404, + 76, + 77, + true, + "worktasks", + "worktasks" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 329104159214088329, + 10104410304708834324, + 18446744073709551615, + 18446744073709551615, + 424, + 429, + 424, + 429, + 80, + 81, + true, + "tasks", + "tasks" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 14814034872218884114, + 4906660160194336109, + 18446744073709551615, + 18446744073709551615, + 444, + 452, + 444, + 452, + 85, + 86, + true, + "parallel", + "parallel" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 389609625633315922, + 2353303435662040762, + 18446744073709551615, + 18446744073709551615, + 548, + 552, + 548, + 552, + 102, + 103, + true, + "list", + "list" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 389609625698616944, + 2338675321444319215, + 18446744073709551615, + 18446744073709551615, + 567, + 571, + 567, + 571, + 106, + 107, + true, + "item", + "item" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 12178341415895638602, + 6992187631556280274, + 18446744073709551615, + 18446744073709551615, + 577, + 580, + 577, + 580, + 109, + 110, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 329104159214088329, + 10104410304708836508, + 18446744073709551615, + 18446744073709551615, + 584, + 589, + 584, + 589, + 111, + 112, + true, + "tasks", + "tasks" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 14814034872218884114, + 4906660160194342930, + 18446744073709551615, + 18446744073709551615, + 614, + 622, + 614, + 622, + 117, + 118, + true, + "parallel", + "parallel" + ], + [ + "sentence", + "", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 7660575871008244103, + 2316439382736820559, + 18446744073709551615, + 18446744073709551615, + 0, + 133, + 0, + 133, + 0, + 24, + true, + "For each task, we obtain a set of nodes with corresponding weights by identifying the nonzero elements in the associated node vector.", + "For each task, we obtain a set of nodes with corresponding weights by identifying the nonzero elements in the associated node vector." + ], + [ + "sentence", + "", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 6713072116150697613, + 11925397580613056643, + 18446744073709551615, + 18446744073709551615, + 134, + 266, + 134, + 266, + 24, + 49, + true, + "After executing the full workflow, we therefore obtain for each task a list of nodes which can be sorted according to their weights.", + "After executing the full workflow, we therefore obtain for each task a list of nodes which can be sorted according to their weights." + ], + [ + "sentence", + "", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 8216591865615385703, + 5126502945309751658, + 18446744073709551615, + 18446744073709551615, + 267, + 333, + 267, + 333, + 49, + 64, + true, + "The higher the weight of the node, the more relevant this node is.", + "The higher the weight of the node, the more relevant this node is." + ], + [ + "sentence", + "", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 12108525545318378453, + 12728621031671223359, + 18446744073709551615, + 18446744073709551615, + 334, + 420, + 334, + 420, + 64, + 82, + true, + "As such, we can also retrace which nodes were important in each stage of the workflow.", + "As such, we can also retrace which nodes were important in each stage of the workflow." + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 127759864276493913, + 8364393908486708964, + 18446744073709551615, + 18446744073709551615, + 45, + 66, + 45, + 66, + 11, + 13, + true, + "corresponding weights", + "corresponding weights" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 42253711713484855, + 11311968747640274305, + 18446744073709551615, + 18446744073709551615, + 86, + 102, + 86, + 102, + 16, + 18, + true, + "nonzero elements", + "nonzero elements" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 7596240835069815859, + 6506102061445914504, + 18446744073709551615, + 18446744073709551615, + 121, + 132, + 121, + 132, + 21, + 23, + true, + "node vector", + "node vector" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 6060214652839025266, + 11966075192922756464, + 18446744073709551615, + 18446744073709551615, + 154, + 167, + 154, + 167, + 27, + 29, + true, + "full workflow", + "full workflow" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 389609625631210899, + 12733607242456046210, + 18446744073709551615, + 18446744073709551615, + 9, + 13, + 9, + 13, + 2, + 3, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 12178341415895638602, + 12747294058064521499, + 18446744073709551615, + 18446744073709551615, + 27, + 30, + 27, + 30, + 7, + 8, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 329104161758737773, + 2433416455752408490, + 18446744073709551615, + 18446744073709551615, + 34, + 39, + 34, + 39, + 9, + 10, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 389609625631210899, + 12733607242456045036, + 18446744073709551615, + 18446744073709551615, + 198, + 202, + 198, + 202, + 35, + 36, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 389609625633315922, + 12733621748595533263, + 18446744073709551615, + 18446744073709551615, + 205, + 209, + 205, + 209, + 37, + 38, + true, + "list", + "list" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 329104161758737773, + 2433416455752447655, + 18446744073709551615, + 18446744073709551615, + 213, + 218, + 213, + 218, + 39, + 40, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 8106477822555716423, + 7780647823283838272, + 18446744073709551615, + 18446744073709551615, + 258, + 265, + 258, + 265, + 47, + 48, + true, + "weights", + "weights" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 16381206557786164800, + 733620420485756914, + 18446744073709551615, + 18446744073709551615, + 282, + 288, + 282, + 288, + 52, + 53, + true, + "weight", + "weight" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 389609625621164460, + 12733685650958048817, + 18446744073709551615, + 18446744073709551615, + 296, + 300, + 296, + 300, + 55, + 56, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 389609625621164460, + 12733685650958002482, + 18446744073709551615, + 18446744073709551615, + 325, + 329, + 325, + 329, + 61, + 62, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 329104161758737773, + 2433416455752429456, + 18446744073709551615, + 18446744073709551615, + 369, + 374, + 369, + 374, + 72, + 73, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 329104161640029084, + 2392075965011744506, + 18446744073709551615, + 18446744073709551615, + 398, + 403, + 398, + 403, + 77, + 78, + true, + "stage", + "stage" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 14638857990842534974, + 9783826650048006204, + 18446744073709551615, + 18446744073709551615, + 411, + 419, + 411, + 419, + 80, + 81, + true, + "workflow", + "workflow" + ], + [ + "numval", + "ival", + 16505790528099785698, + "TEXT", + "#/texts/112", + 1.0, + 17767354399704235156, + 6951916224121472658, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "4", + "4" + ], + [ + "numval", + "ival", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 15441160910541481039, + 13036410263911933256, + 18446744073709551615, + 18446744073709551615, + 309, + 311, + 309, + 311, + 55, + 55, + false, + "86", + "86" + ], + [ + "expression", + "word-concatenation", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 14042857724397157868, + 7723400572482154889, + 18446744073709551615, + 18446744073709551615, + 255, + 265, + 255, + 265, + 45, + 46, + true, + "on-premise", + "on-premise" + ], + [ + "expression", + "word-concatenation", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 13984993501352220634, + 15736780337580684731, + 18446744073709551615, + 18446744073709551615, + 316, + 327, + 316, + 327, + 56, + 57, + true, + "POWER-based", + "POWER-based" + ], + [ + "expression", + "wtoken-concatenation", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 8106351670279655367, + 7496390199284522039, + 18446744073709551615, + 18446744073709551615, + 308, + 315, + 308, + 315, + 55, + 56, + true, + "x86-and", + "x86-and" + ], + [ + "sentence", + "", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 1636228667275136893, + 6829252488313690685, + 18446744073709551615, + 18446744073709551615, + 0, + 93, + 0, + 93, + 0, + 15, + true, + "The primary deployment target for the CPS is a cloud environment orchestrated via Kubernetes.", + "The primary deployment target for the CPS is a cloud environment orchestrated via Kubernetes." + ], + [ + "sentence", + "", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 1358397889873033065, + 11303318607487088116, + 18446744073709551615, + 18446744073709551615, + 94, + 188, + 94, + 188, + 15, + 32, + true, + "We package the full platform assets with a Helm chart for quick deployment on multiple setups.", + "We package the full platform assets with a Helm chart for quick deployment on multiple setups." + ], + [ + "sentence", + "", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 1429830794837597235, + 13476385461814770755, + 18446744073709551615, + 18446744073709551615, + 189, + 334, + 189, + 334, + 32, + 59, + true, + "For example we can easily deploy the platform on the IBM Cloud or on-premise in an IBM Cloud Private instance, both on x86-and POWER-based nodes.", + "For example we can easily deploy the platform on the IBM Cloud or on-premise in an IBM Cloud Private instance, both on x86-and POWER-based nodes." + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 5618072291515280850, + 3895024354767273975, + 18446744073709551615, + 18446744073709551615, + 4, + 29, + 4, + 29, + 1, + 4, + true, + "primary deployment target", + "primary deployment target" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 8924667775203066445, + 876400847254491566, + 18446744073709551615, + 18446744073709551615, + 47, + 64, + 47, + 64, + 9, + 11, + true, + "cloud environment", + "cloud environment" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 1819480924935159279, + 6577470514585879465, + 18446744073709551615, + 18446744073709551615, + 109, + 129, + 109, + 129, + 18, + 21, + true, + "full platform assets", + "full platform assets" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 4638979376537582648, + 17377998509573799350, + 18446744073709551615, + 18446744073709551615, + 137, + 147, + 137, + 147, + 23, + 25, + true, + "Helm chart", + "Helm chart" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 11418810401555064622, + 17261720540066472098, + 18446744073709551615, + 18446744073709551615, + 152, + 168, + 152, + 168, + 26, + 28, + true, + "quick deployment", + "quick deployment" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 16086744429721457666, + 17482235260769052298, + 18446744073709551615, + 18446744073709551615, + 172, + 187, + 172, + 187, + 29, + 31, + true, + "multiple setups", + "multiple setups" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 6560670568286016569, + 16077107774923354521, + 18446744073709551615, + 18446744073709551615, + 242, + 251, + 242, + 251, + 42, + 44, + true, + "IBM Cloud", + "IBM Cloud" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 11202420113292414044, + 3143243574265492705, + 18446744073709551615, + 18446744073709551615, + 272, + 298, + 272, + 298, + 48, + 52, + true, + "IBM Cloud Private instance", + "IBM Cloud Private instance" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 12226798531077997180, + 16194854884688781015, + 18446744073709551615, + 18446744073709551615, + 308, + 333, + 308, + 333, + 55, + 58, + true, + "x86-and POWER-based nodes", + "x86-and POWER-based nodes" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 12178341415896222428, + 885100098113248695, + 18446744073709551615, + 18446744073709551615, + 38, + 41, + 38, + 41, + 6, + 7, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 7094347613010931393, + 12496963414215338837, + 18446744073709551615, + 18446744073709551615, + 82, + 92, + 82, + 92, + 13, + 14, + true, + "Kubernetes", + "Kubernetes" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 8106397496085150773, + 17340139994990751517, + 18446744073709551615, + 18446744073709551615, + 193, + 200, + 193, + 200, + 33, + 34, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 14814125365076808131, + 7721049804809485492, + 18446744073709551615, + 18446744073709551615, + 226, + 234, + 226, + 234, + 39, + 40, + true, + "platform", + "platform" + ], + [ + "numval", + "ival", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 17767354399704235157, + 8852681642826623127, + 18446744073709551615, + 18446744073709551615, + 10, + 11, + 10, + 11, + 2, + 3, + true, + "5", + "5" + ], + [ + "parenthesis", + "round brackets", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 329104053572287295, + 8245453909560823187, + 18446744073709551615, + 18446744073709551615, + 296, + 301, + 296, + 301, + 53, + 56, + true, + "(KGS)", + "(KGS)" + ], + [ + "parenthesis", + "round brackets", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 329104053210133820, + 9015135107673804315, + 18446744073709551615, + 18446744073709551615, + 429, + 434, + 429, + 434, + 80, + 83, + true, + "(COS)", + "(COS)" + ], + [ + "expression", + "word-concatenation", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 15927123524139923400, + 1285026772075360147, + 18446744073709551615, + 18446744073709551615, + 25, + 35, + 25, + 35, + 7, + 8, + true, + "high-level", + "high-level" + ], + [ + "sentence", + "", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 9940486682659996162, + 7460915234096730767, + 18446744073709551615, + 18446744073709551615, + 0, + 60, + 0, + 60, + 0, + 14, + true, + "In Figure 5, we show the high-level cloud design of the CPS.", + "In Figure 5, we show the high-level cloud design of the CPS." + ], + [ + "sentence", + "", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12549929982155038286, + 8880624972218295958, + 18446744073709551615, + 18446744073709551615, + 61, + 253, + 61, + 253, + 14, + 45, + true, + "The platform allows to manage and instrument the corpus processing in a multitenant fashion, that is, it handles multiple knowledge ingestion pipelines and it serves multiple knowledge graphs.", + "The platform allows to manage and instrument the corpus processing in a multitenant fashion, that is, it handles multiple knowledge ingestion pipelines and it serves multiple knowledge graphs." + ], + [ + "sentence", + "", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12471785161413140906, + 13074093811019311665, + 18446744073709551615, + 18446744073709551615, + 254, + 435, + 254, + 435, + 45, + 84, + true, + "We call each unit a Knowledge Graph Space (KGS), which consists of a dedicated instance of the graph engine, a dedicated MongoDB database and a bucket on a cloud object store (COS).", + "We call each unit a Knowledge Graph Space (KGS), which consists of a dedicated instance of the graph engine, a dedicated MongoDB database and a bucket on a cloud object store (COS)." + ], + [ + "sentence", + "", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 9535520267303438664, + 1803249748326675225, + 18446744073709551615, + 18446744073709551615, + 436, + 522, + 436, + 522, + 84, + 100, + true, + "A dashboard allows each project owner to manage the access and the usage of resources.", + "A dashboard allows each project owner to manage the access and the usage of resources." + ], + [ + "sentence", + "", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 2728497743282239280, + 9042362260686479835, + 18446744073709551615, + 18446744073709551615, + 523, + 621, + 523, + 621, + 100, + 117, + true, + "The KGS can be launched into multiple flavors to optimally balance the utilization of the cluster.", + "The KGS can be launched into multiple flavors to optimally balance the utilization of the cluster." + ], + [ + "sentence", + "", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 18079656800177842635, + 3681146273233398143, + 18446744073709551615, + 18446744073709551615, + 622, + 760, + 622, + 760, + 117, + 140, + true, + "These flavors range from a virtual machine with small amount of memory to a full dedicated node including hardware acceleration with GPUs.", + "These flavors range from a virtual machine with small amount of memory to a full dedicated node including hardware acceleration with GPUs." + ], + [ + "sentence", + "", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 16727938760460489694, + 682464437352803246, + 18446744073709551615, + 18446744073709551615, + 761, + 847, + 761, + 847, + 140, + 159, + true, + "Once a KGS is created, it can be paused and rescaled without loss of data or downtime.", + "Once a KGS is created, it can be paused and rescaled without loss of data or downtime." + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 1655940901214851189, + 4037423276395878952, + 18446744073709551615, + 18446744073709551615, + 25, + 48, + 25, + 48, + 7, + 10, + true, + "high-level cloud design", + "high-level cloud design" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 15626222303133683856, + 15883470027027509782, + 18446744073709551615, + 18446744073709551615, + 110, + 127, + 110, + 127, + 22, + 24, + true, + "corpus processing", + "corpus processing" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 11015260948057552074, + 17580405417276948030, + 18446744073709551615, + 18446744073709551615, + 133, + 152, + 133, + 152, + 26, + 28, + true, + "multitenant fashion", + "multitenant fashion" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 5768949209294808779, + 6442116341236141760, + 18446744073709551615, + 18446744073709551615, + 174, + 212, + 174, + 212, + 34, + 38, + true, + "multiple knowledge ingestion pipelines", + "multiple knowledge ingestion pipelines" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 1523072489578460107, + 18058886189458919981, + 18446744073709551615, + 18446744073709551615, + 227, + 252, + 227, + 252, + 41, + 44, + true, + "multiple knowledge graphs", + "multiple knowledge graphs" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 8279239668964789243, + 8460390903338403472, + 18446744073709551615, + 18446744073709551615, + 274, + 295, + 274, + 295, + 50, + 53, + true, + "Knowledge Graph Space", + "Knowledge Graph Space" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 9231360134118267820, + 4162022022779188999, + 18446744073709551615, + 18446744073709551615, + 323, + 341, + 323, + 341, + 61, + 63, + true, + "dedicated instance", + "dedicated instance" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 2924972194163802578, + 17588639801993416525, + 18446744073709551615, + 18446744073709551615, + 349, + 361, + 349, + 361, + 65, + 67, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 5728204332742385536, + 4123494469593944677, + 18446744073709551615, + 18446744073709551615, + 365, + 391, + 365, + 391, + 69, + 72, + true, + "dedicated MongoDB database", + "dedicated MongoDB database" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 8770614496471502937, + 13041000514974023818, + 18446744073709551615, + 18446744073709551615, + 410, + 428, + 410, + 428, + 77, + 80, + true, + "cloud object store", + "cloud object store" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 14090783558906999057, + 12854087600048197361, + 18446744073709551615, + 18446744073709551615, + 460, + 473, + 460, + 473, + 88, + 90, + true, + "project owner", + "project owner" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 2183265494021555923, + 18896001468121266, + 18446744073709551615, + 18446744073709551615, + 552, + 568, + 552, + 568, + 106, + 108, + true, + "multiple flavors", + "multiple flavors" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 14387950977550393964, + 17001907116386229501, + 18446744073709551615, + 18446744073709551615, + 649, + 664, + 649, + 664, + 122, + 124, + true, + "virtual machine", + "virtual machine" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 7252014402665196659, + 11294021744305298020, + 18446744073709551615, + 18446744073709551615, + 670, + 682, + 670, + 682, + 125, + 127, + true, + "small amount", + "small amount" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 4316842714742551971, + 6737419106451654162, + 18446744073709551615, + 18446744073709551615, + 698, + 717, + 698, + 717, + 131, + 134, + true, + "full dedicated node", + "full dedicated node" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 6527251669177900630, + 6862896336097821307, + 18446744073709551615, + 18446744073709551615, + 728, + 749, + 728, + 749, + 135, + 137, + true, + "hardware acceleration", + "hardware acceleration" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 16381206514091025767, + 16142590003889368025, + 18446744073709551615, + 18446744073709551615, + 3, + 9, + 3, + 9, + 1, + 2, + true, + "Figure", + "Figure" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12178341415896222428, + 11806956420576415143, + 18446744073709551615, + 18446744073709551615, + 56, + 59, + 56, + 59, + 12, + 13, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 14814125365076808131, + 10951131595958552084, + 18446744073709551615, + 18446744073709551615, + 65, + 73, + 65, + 73, + 15, + 16, + true, + "platform", + "platform" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 389609625632190829, + 14110524954774214321, + 18446744073709551615, + 18446744073709551615, + 267, + 271, + 267, + 271, + 48, + 49, + true, + "unit", + "unit" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12178341415896254082, + 11806952638868048232, + 18446744073709551615, + 18446744073709551615, + 297, + 300, + 297, + 300, + 54, + 55, + true, + "KGS", + "KGS" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 16381206569515593601, + 14517360063753258776, + 18446744073709551615, + 18446744073709551615, + 398, + 404, + 398, + 404, + 74, + 75, + true, + "bucket", + "bucket" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12178341415896222365, + 11806957828061927556, + 18446744073709551615, + 18446744073709551615, + 430, + 433, + 430, + 433, + 81, + 82, + true, + "COS", + "COS" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 6165495539515404595, + 4111005573482539739, + 18446744073709551615, + 18446744073709551615, + 438, + 447, + 438, + 447, + 85, + 86, + true, + "dashboard", + "dashboard" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 16381206568743641958, + 14433515699806996137, + 18446744073709551615, + 18446744073709551615, + 488, + 494, + 488, + 494, + 93, + 94, + true, + "access", + "access" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 329104159157898666, + 13068121140499570811, + 18446744073709551615, + 18446744073709551615, + 503, + 508, + 503, + 508, + 96, + 97, + true, + "usage", + "usage" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 6168338487309432467, + 10223983486064089367, + 18446744073709551615, + 18446744073709551615, + 512, + 521, + 512, + 521, + 98, + 99, + true, + "resources", + "resources" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12178341415896254082, + 11806952638868069314, + 18446744073709551615, + 18446744073709551615, + 527, + 530, + 527, + 530, + 101, + 102, + true, + "KGS", + "KGS" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 13498654401954296707, + 13294723622435080231, + 18446744073709551615, + 18446744073709551615, + 594, + 605, + 594, + 605, + 112, + 113, + true, + "utilization", + "utilization" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 8106398485449787361, + 1185060867801324202, + 18446744073709551615, + 18446744073709551615, + 613, + 620, + 613, + 620, + 115, + 116, + true, + "cluster", + "cluster" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 8106397453898023678, + 14703729743636443765, + 18446744073709551615, + 18446744073709551615, + 628, + 635, + 628, + 635, + 118, + 119, + true, + "flavors", + "flavors" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 16381206567042997791, + 14500738963527654425, + 18446744073709551615, + 18446744073709551615, + 686, + 692, + 686, + 692, + 128, + 129, + true, + "memory", + "memory" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 389609625538377862, + 14063335216330195650, + 18446744073709551615, + 18446744073709551615, + 755, + 759, + 755, + 759, + 138, + 139, + true, + "GPUs", + "GPUs" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12178341415896254082, + 11806952638868086348, + 18446744073709551615, + 18446744073709551615, + 768, + 771, + 768, + 771, + 142, + 143, + true, + "KGS", + "KGS" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 389609625633531326, + 14110520053719926279, + 18446744073709551615, + 18446744073709551615, + 822, + 826, + 822, + 826, + 153, + 154, + true, + "loss", + "loss" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 389609625696431489, + 14108321371888967520, + 18446744073709551615, + 18446744073709551615, + 830, + 834, + 830, + 834, + 155, + 156, + true, + "data", + "data" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 14650400971381441271, + 15853770385546829868, + 18446744073709551615, + 18446744073709551615, + 838, + 846, + 838, + 846, + 157, + 158, + true, + "downtime", + "downtime" + ], + [ + "numval", + "ival", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 17767354399704235161, + 9537684729007623, + 18446744073709551615, + 18446744073709551615, + 112, + 113, + 112, + 113, + 20, + 21, + true, + "1", + "1" + ], + [ + "sentence", + "", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 3277226034499692930, + 8257764068713046586, + 18446744073709551615, + 18446744073709551615, + 0, + 111, + 0, + 111, + 0, + 20, + true, + "For the KG creation pipeline, we implemented an asynchronous compute scheme we already use in our CCS solution.", + "For the KG creation pipeline, we implemented an asynchronous compute scheme we already use in our CCS solution." + ], + [ + "sentence", + "", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 17127552860842301864, + 8770197111614698857, + 18446744073709551615, + 18446744073709551615, + 114, + 256, + 114, + 256, + 21, + 47, + true, + "The system is exposed to the user via an API frontend which communicates to the compute workers through a message broker and a result backend.", + "The system is exposed to the user via an API frontend which communicates to the compute workers through a message broker and a result backend." + ], + [ + "sentence", + "", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 8169839874033251818, + 4419439272810104045, + 18446744073709551615, + 18446744073709551615, + 257, + 366, + 257, + 366, + 47, + 70, + true, + "The workers operate on the data, which is hosted on a NoSQL database and a cloud object store for data blobs.", + "The workers operate on the data, which is hosted on a NoSQL database and a cloud object store for data blobs." + ], + [ + "sentence", + "", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 5715822533324454462, + 3737780764433609993, + 18446744073709551615, + 18446744073709551615, + 367, + 477, + 367, + 477, + 70, + 89, + true, + "These workers are dynamically scaled by the cloud orchestrator to best match the current load of the platform.", + "These workers are dynamically scaled by the cloud orchestrator to best match the current load of the platform." + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 14857819661511796263, + 2424012575699788348, + 18446744073709551615, + 18446744073709551615, + 8, + 28, + 8, + 28, + 2, + 5, + true, + "KG creation pipeline", + "KG creation pipeline" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 10007149380535166539, + 1692146958599972346, + 18446744073709551615, + 18446744073709551615, + 48, + 75, + 48, + 75, + 9, + 12, + true, + "asynchronous compute scheme", + "asynchronous compute scheme" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 10465352779691143250, + 3951289883803806710, + 18446744073709551615, + 18446744073709551615, + 98, + 110, + 98, + 110, + 17, + 19, + true, + "CCS solution", + "CCS solution" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 11572792171430282447, + 1021301182691757038, + 18446744073709551615, + 18446744073709551615, + 155, + 167, + 155, + 167, + 30, + 32, + true, + "API frontend", + "API frontend" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 14878173643529361829, + 14456811929852907504, + 18446744073709551615, + 18446744073709551615, + 194, + 209, + 194, + 209, + 36, + 38, + true, + "compute workers", + "compute workers" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 15654770817534947005, + 10287959561185320802, + 18446744073709551615, + 18446744073709551615, + 220, + 234, + 220, + 234, + 40, + 42, + true, + "message broker", + "message broker" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 6406733444931989980, + 2171075967878720181, + 18446744073709551615, + 18446744073709551615, + 241, + 255, + 241, + 255, + 44, + 46, + true, + "result backend", + "result backend" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 16772942504422841315, + 12869590969867328268, + 18446744073709551615, + 18446744073709551615, + 311, + 325, + 311, + 325, + 59, + 61, + true, + "NoSQL database", + "NoSQL database" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 8770614496471502937, + 4425801382089339238, + 18446744073709551615, + 18446744073709551615, + 332, + 350, + 332, + 350, + 63, + 66, + true, + "cloud object store", + "cloud object store" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 5326252220577355949, + 11291971315896721582, + 18446744073709551615, + 18446744073709551615, + 355, + 365, + 355, + 365, + 67, + 69, + true, + "data blobs", + "data blobs" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 4086536176752834180, + 4868222512876603371, + 18446744073709551615, + 18446744073709551615, + 411, + 429, + 411, + 429, + 77, + 79, + true, + "cloud orchestrator", + "cloud orchestrator" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 5679217233562387039, + 12739262502871868839, + 18446744073709551615, + 18446744073709551615, + 448, + 460, + 448, + 460, + 83, + 85, + true, + "current load", + "current load" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 16381206550376895780, + 14690993036419098898, + 18446744073709551615, + 18446744073709551615, + 118, + 124, + 118, + 124, + 22, + 23, + true, + "system", + "system" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 389609625632179162, + 1006423631488985152, + 18446744073709551615, + 18446744073709551615, + 143, + 147, + 143, + 147, + 27, + 28, + true, + "user", + "user" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 8106478059506484182, + 11556117617919976622, + 18446744073709551615, + 18446744073709551615, + 261, + 268, + 261, + 268, + 48, + 49, + true, + "workers", + "workers" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 389609625696431489, + 1125351843508795170, + 18446744073709551615, + 18446744073709551615, + 284, + 288, + 284, + 288, + 52, + 53, + true, + "data", + "data" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 8106478059506484182, + 11556117617919928746, + 18446744073709551615, + 18446744073709551615, + 373, + 380, + 373, + 380, + 71, + 72, + true, + "workers", + "workers" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 14814125365076808131, + 16428429469880175089, + 18446744073709551615, + 18446744073709551615, + 468, + 476, + 468, + 476, + 87, + 88, + true, + "platform", + "platform" + ], + [ + "expression", + "word-concatenation", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 12091012031128966489, + 4422884735184098490, + 18446744073709551615, + 18446744073709551615, + 187, + 199, + 187, + 199, + 33, + 34, + true, + "fine-grained", + "fine-grained" + ], + [ + "sentence", + "", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 3168972455566929645, + 14299943161362031928, + 18446744073709551615, + 18446744073709551615, + 0, + 103, + 0, + 103, + 0, + 19, + true, + "The processing of the KG creation typically starts with the user submitting the DF to the frontend API.", + "The processing of the KG creation typically starts with the user submitting the DF to the frontend API." + ], + [ + "sentence", + "", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 13906228526952191149, + 3075418305413472422, + 18446744073709551615, + 18446744073709551615, + 104, + 301, + 104, + 301, + 19, + 54, + true, + "The DAG of operations is then interpreted as described in the previous section and fine-grained tasks are submitted to the broker, for example, the whole corpus is split in many independent chunks.", + "The DAG of operations is then interpreted as described in the previous section and fine-grained tasks are submitted to the broker, for example, the whole corpus is split in many independent chunks." + ], + [ + "sentence", + "", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 3333414693963617816, + 12948913819424524581, + 18446744073709551615, + 18446744073709551615, + 302, + 404, + 302, + 404, + 54, + 73, + true, + "The user receives an overall status from the API and is notified when the DF processing has completed.", + "The user receives an overall status from the API and is notified when the DF processing has completed." + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 2196851006559809291, + 9830489088948642112, + 18446744073709551615, + 18446744073709551615, + 22, + 33, + 22, + 33, + 4, + 6, + true, + "KG creation", + "KG creation" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 18368248485730797851, + 14211587989421610259, + 18446744073709551615, + 18446744073709551615, + 90, + 102, + 90, + 102, + 16, + 18, + true, + "frontend API", + "frontend API" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 1686262582468728546, + 18069301562578694550, + 18446744073709551615, + 18446744073709551615, + 166, + 182, + 166, + 182, + 30, + 32, + true, + "previous section", + "previous section" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 871590318385078896, + 9964556551293837787, + 18446744073709551615, + 18446744073709551615, + 187, + 205, + 187, + 205, + 33, + 35, + true, + "fine-grained tasks", + "fine-grained tasks" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 7803735128811820247, + 8028207541414251541, + 18446744073709551615, + 18446744073709551615, + 252, + 264, + 252, + 264, + 45, + 47, + true, + "whole corpus", + "whole corpus" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 5635562936121152164, + 16688760703459948603, + 18446744073709551615, + 18446744073709551615, + 277, + 300, + 277, + 300, + 50, + 53, + true, + "many independent chunks", + "many independent chunks" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 8842823732426861704, + 17629510401470609093, + 18446744073709551615, + 18446744073709551615, + 323, + 337, + 323, + 337, + 58, + 60, + true, + "overall status", + "overall status" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 16299080740739724047, + 11080747030069412617, + 18446744073709551615, + 18446744073709551615, + 376, + 389, + 376, + 389, + 68, + 70, + true, + "DF processing", + "DF processing" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 14088627147213114570, + 13854182623796006327, + 18446744073709551615, + 18446744073709551615, + 4, + 14, + 4, + 14, + 1, + 2, + true, + "processing", + "processing" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 389609625632179162, + 8646106573348311993, + 18446744073709551615, + 18446744073709551615, + 60, + 64, + 60, + 64, + 10, + 11, + true, + "user", + "user" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 15441160910541480770, + 13534996573266716272, + 18446744073709551615, + 18446744073709551615, + 80, + 82, + 80, + 82, + 13, + 14, + true, + "DF", + "DF" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 12178341415896112046, + 4574774009824171048, + 18446744073709551615, + 18446744073709551615, + 108, + 111, + 108, + 111, + 20, + 21, + true, + "DAG", + "DAG" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 13985988710970420061, + 12935943121770737018, + 18446744073709551615, + 18446744073709551615, + 115, + 125, + 115, + 125, + 22, + 23, + true, + "operations", + "operations" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 16381206570348587859, + 1833011693751406972, + 18446744073709551615, + 18446744073709551615, + 227, + 233, + 227, + 233, + 39, + 40, + true, + "broker", + "broker" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 8106397496085150773, + 11380064558136438485, + 18446744073709551615, + 18446744073709551615, + 239, + 246, + 239, + 246, + 42, + 43, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 389609625632179162, + 8646106573348360699, + 18446744073709551615, + 18446744073709551615, + 306, + 310, + 306, + 310, + 55, + 56, + true, + "user", + "user" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 12178341415896230896, + 4574893999665149592, + 18446744073709551615, + 18446744073709551615, + 347, + 350, + 347, + 350, + 62, + 63, + true, + "API", + "API" + ], + [ + "parenthesis", + "round brackets", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 329104053210133820, + 10787425083780243479, + 18446744073709551615, + 18446744073709551615, + 102, + 107, + 102, + 107, + 18, + 21, + true, + "(COS)", + "(COS)" + ], + [ + "sentence", + "", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 14300472229212663005, + 14392697989286948225, + 18446744073709551615, + 18446744073709551615, + 0, + 120, + 0, + 120, + 0, + 25, + true, + "The KG data are distributed between three storage solutions: a NoSQL database, a cloud object storage (COS) and the KGS.", + "The KG data are distributed between three storage solutions: a NoSQL database, a cloud object storage (COS) and the KGS." + ], + [ + "sentence", + "", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12082365070187585802, + 16167088524264065499, + 18446744073709551615, + 18446744073709551615, + 121, + 273, + 121, + 273, + 25, + 55, + true, + "Each node is represented as a document in a NoSQL database which contains all the properties attached to the node, for example, the text of a paragraph.", + "Each node is represented as a document in a NoSQL database which contains all the properties attached to the node, for example, the text of a paragraph." + ], + [ + "sentence", + "", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 610319572008007953, + 6746694152812115550, + 18446744073709551615, + 18446744073709551615, + 274, + 393, + 274, + 393, + 55, + 83, + true, + "If there is a binary object attached to the node, for example, the PDF document or an image, this is stored on the COS.", + "If there is a binary object attached to the node, for example, the PDF document or an image, this is stored on the COS." + ], + [ + "sentence", + "", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 4344494126371972185, + 2350781277598128576, + 18446744073709551615, + 18446744073709551615, + 394, + 572, + 394, + 572, + 83, + 115, + true, + "The KGS contains only the minimal information needed to execute the queries, that is, the connectivity of the graph and the properties which are indexed for filtering and search.", + "The KGS contains only the minimal information needed to execute the queries, that is, the connectivity of the graph and the properties which are indexed for filtering and search." + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 8106395850436073177, + 11012666495072522005, + 18446744073709551615, + 18446744073709551615, + 4, + 11, + 4, + 11, + 1, + 3, + true, + "KG data", + "KG data" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12787341663997074868, + 2266405206495988392, + 18446744073709551615, + 18446744073709551615, + 42, + 59, + 42, + 59, + 7, + 9, + true, + "storage solutions", + "storage solutions" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 16772942504422841315, + 2277994577118317964, + 18446744073709551615, + 18446744073709551615, + 63, + 77, + 63, + 77, + 11, + 13, + true, + "NoSQL database", + "NoSQL database" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 16918978243275188323, + 1110270592458797827, + 18446744073709551615, + 18446744073709551615, + 81, + 101, + 81, + 101, + 15, + 18, + true, + "cloud object storage", + "cloud object storage" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 16772942504422841315, + 2277994577118313177, + 18446744073709551615, + 18446744073709551615, + 165, + 179, + 165, + 179, + 34, + 36, + true, + "NoSQL database", + "NoSQL database" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 11152620526374970124, + 14811456031713141955, + 18446744073709551615, + 18446744073709551615, + 288, + 301, + 288, + 301, + 59, + 61, + true, + "binary object", + "binary object" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12366808243217836777, + 5730782230103722615, + 18446744073709551615, + 18446744073709551615, + 341, + 353, + 341, + 353, + 70, + 72, + true, + "PDF document", + "PDF document" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 7129359759299976443, + 13989298049334872036, + 18446744073709551615, + 18446744073709551615, + 420, + 439, + 420, + 439, + 88, + 90, + true, + "minimal information", + "minimal information" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12178341415896222365, + 3228078444842065286, + 18446744073709551615, + 18446744073709551615, + 103, + 106, + 103, + 106, + 19, + 20, + true, + "COS", + "COS" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12178341415896254082, + 3228102135893419567, + 18446744073709551615, + 18446744073709551615, + 116, + 119, + 116, + 119, + 23, + 24, + true, + "KGS", + "KGS" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 389609625621164460, + 11938016511442844888, + 18446744073709551615, + 18446744073709551615, + 126, + 130, + 126, + 130, + 26, + 27, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 14650401089286948001, + 9314496952887964373, + 18446744073709551615, + 18446744073709551615, + 151, + 159, + 151, + 159, + 31, + 32, + true, + "document", + "document" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 14088628410271132453, + 4341732016105186977, + 18446744073709551615, + 18446744073709551615, + 203, + 213, + 203, + 213, + 40, + 41, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 389609625621164460, + 11938016511442851462, + 18446744073709551615, + 18446744073709551615, + 230, + 234, + 230, + 234, + 44, + 45, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 8106397496085150773, + 2009680286188387580, + 18446744073709551615, + 18446744073709551615, + 240, + 247, + 240, + 247, + 47, + 48, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 389609625631325904, + 11937730708172995984, + 18446744073709551615, + 18446744073709551615, + 253, + 257, + 253, + 257, + 50, + 51, + true, + "text", + "text" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 6169141668427353082, + 14431783315368920148, + 18446744073709551615, + 18446744073709551615, + 263, + 272, + 263, + 272, + 53, + 54, + true, + "paragraph", + "paragraph" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 389609625621164460, + 11938016511442849064, + 18446744073709551615, + 18446744073709551615, + 318, + 322, + 318, + 322, + 64, + 65, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 8106397496085150773, + 2009680286188378662, + 18446744073709551615, + 18446744073709551615, + 328, + 335, + 328, + 335, + 67, + 68, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 329104161828335551, + 12330380347516247933, + 18446744073709551615, + 18446744073709551615, + 360, + 365, + 360, + 365, + 74, + 75, + true, + "image", + "image" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12178341415896222365, + 3228078444843066386, + 18446744073709551615, + 18446744073709551615, + 389, + 392, + 389, + 392, + 81, + 82, + true, + "COS", + "COS" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12178341415896254082, + 3228102135893405214, + 18446744073709551615, + 18446744073709551615, + 398, + 401, + 398, + 401, + 84, + 85, + true, + "KGS", + "KGS" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 8106477782290185579, + 19841994463607863, + 18446744073709551615, + 18446744073709551615, + 462, + 469, + 462, + 469, + 94, + 95, + true, + "queries", + "queries" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 11015713444890684392, + 5016937512725344760, + 18446744073709551615, + 18446744073709551615, + 484, + 496, + 484, + 496, + 100, + 101, + true, + "connectivity", + "connectivity" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 329104159211247965, + 4925265353295993783, + 18446744073709551615, + 18446744073709551615, + 504, + 509, + 504, + 509, + 103, + 104, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 14088628410271132453, + 4341732016105174192, + 18446744073709551615, + 18446744073709551615, + 518, + 528, + 518, + 528, + 106, + 107, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 16381206577802837709, + 3639100746832045022, + 18446744073709551615, + 18446744073709551615, + 565, + 571, + 565, + 571, + 113, + 114, + true, + "search", + "search" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 389609625548777262, + 8826555294676663632, + 18446744073709551615, + 18446744073709551615, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 389609625548777251, + 8826555296349648778, + 18446744073709551615, + 18446744073709551615, + 119, + 123, + 119, + 123, + 14, + 14, + false, + "2023", + "2023" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 8104408072666212335, + 13552219042525319352, + 18446744073709551615, + 18446744073709551615, + 71, + 78, + 71, + 78, + 8, + 8, + false, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 389609625548868096, + 8826558551385119058, + 18446744073709551615, + 18446744073709551615, + 82, + 86, + 82, + 86, + 8, + 9, + false, + "2.20", + "2.20" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 14654386914267794441, + 12796143052106760105, + 18446744073709551615, + 18446744073709551615, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 17767354399704235162, + 7753390158484899261, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 15441160910541481791, + 3518619573290839093, + 18446744073709551615, + 18446744073709551615, + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 15441160910541481543, + 3518617976696906498, + 18446744073709551615, + 18446744073709551615, + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 8536069645534292969, + 16063604623463467342, + 18446744073709551615, + 18446744073709551615, + 35, + 87, + 35, + 87, + 8, + 10, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 594099663775968682, + 14698211805947073928, + 18446744073709551615, + 18446744073709551615, + 156, + 208, + 156, + 208, + 22, + 37, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 1697220653346092555, + 8458710314769009562, + 18446744073709551615, + 18446744073709551615, + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 12213187056216195918, + 14309760985361468471, + 18446744073709551615, + 18446744073709551615, + 155, + 209, + 155, + 209, + 21, + 38, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 3856967589249015473, + 3576147774941915841, + 18446744073709551615, + 18446744073709551615, + 35, + 86, + 35, + 86, + 8, + 9, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "sentence", + "", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 10933383461306782608, + 10178418358179275356, + 18446744073709551615, + 18446744073709551615, + 19, + 125, + 19, + 125, + 6, + 16, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 12466457873768409517, + 3430070082404029638, + 18446744073709551615, + 18446744073709551615, + 88, + 108, + 88, + 108, + 10, + 13, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 12466457873768409517, + 3430070082403846184, + 18446744073709551615, + 18446744073709551615, + 213, + 233, + 213, + 233, + 39, + 42, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 10086796047802705645, + 11637015082128438412, + 18446744073709551615, + 18446744073709551615, + 252, + 263, + 252, + 263, + 47, + 49, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 6687370681685741393, + 17939310132506951168, + 18446744073709551615, + 18446744073709551615, + 284, + 319, + 284, + 319, + 53, + 57, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 329104161846385964, + 16017248647642597247, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 18, + 19, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 969969168017005656, + 2961182532179915323, + 18446744073709551615, + 18446744073709551615, + 144, + 154, + 144, + 154, + 20, + 21, + true, + "Conditions", + "Conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 329104161825278214, + 16021621362593374209, + 18446744073709551615, + 18446744073709551615, + 238, + 243, + 238, + 243, + 43, + 44, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 12178341415895516060, + 12061595171928625555, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 45, + 46, + true, + "use", + "use" + ], + [ + "numval", + "ival", + 2144926686518491811, + "TEXT", + "#/texts/119", + 1.0, + 15441160910541481983, + 7629680595941988994, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "11", + "11" + ], + [ + "numval", + "ival", + 2144926686518491811, + "TEXT", + "#/texts/119", + 1.0, + 15441160910541481979, + 7629680596056147236, + 18446744073709551615, + 18446744073709551615, + 4, + 6, + 4, + 6, + 2, + 3, + true, + "15", + "15" + ], + [ + "sentence", + "", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 16074792491060269581, + 16392047884217890893, + 18446744073709551615, + 18446744073709551615, + 0, + 126, + 0, + 126, + 0, + 24, + true, + "The KGS is exposed to the user via a REST API which is able to aggregate results collected from the different storage sources.", + "The KGS is exposed to the user via a REST API which is able to aggregate results collected from the different storage sources." + ], + [ + "sentence", + "", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 13174820656604838662, + 16260063485294524084, + 18446744073709551615, + 18446744073709551615, + 127, + 252, + 127, + 252, + 24, + 44, + true, + "To ensure decent performance when serving queries of multiple users, the graph engine can be dynamically scaled horizontally.", + "To ensure decent performance when serving queries of multiple users, the graph engine can be dynamically scaled horizontally." + ], + [ + "sentence", + "", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 15983440293168997448, + 7963308543571437549, + 18446744073709551615, + 18446744073709551615, + 253, + 354, + 253, + 354, + 44, + 61, + true, + "Most workflow queries execute fast enough such that they can be responded from a synchronous request.", + "Most workflow queries execute fast enough such that they can be responded from a synchronous request." + ], + [ + "sentence", + "", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 2443264249188056591, + 8310367412232015326, + 18446744073709551615, + 18446744073709551615, + 355, + 460, + 355, + 460, + 61, + 79, + true, + "Others, especially the graph analytics computations, are more expensive and return large amounts of data.", + "Others, especially the graph analytics computations, are more expensive and return large amounts of data." + ], + [ + "sentence", + "", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 17420955785017825293, + 4530821838279240147, + 18446744073709551615, + 18446744073709551615, + 461, + 596, + 461, + 596, + 79, + 103, + true, + "Thus, these queries are executed through an asynchronous API and the results are paginated and streamed back to the user on completion.", + "Thus, these queries are executed through an asynchronous API and the results are paginated and streamed back to the user on completion." + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 14652188385274391907, + 2694673140760145603, + 18446744073709551615, + 18446744073709551615, + 37, + 45, + 37, + 45, + 9, + 11, + true, + "REST API", + "REST API" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 9625448109592502994, + 9647746636491739728, + 18446744073709551615, + 18446744073709551615, + 100, + 125, + 100, + 125, + 20, + 23, + true, + "different storage sources", + "different storage sources" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 6709594336853371761, + 11687052192579585167, + 18446744073709551615, + 18446744073709551615, + 137, + 155, + 137, + 155, + 26, + 28, + true, + "decent performance", + "decent performance" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 17200993861033027072, + 1744565019688702260, + 18446744073709551615, + 18446744073709551615, + 180, + 194, + 180, + 194, + 32, + 34, + true, + "multiple users", + "multiple users" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 2924972194163802578, + 10534198242190806077, + 18446744073709551615, + 18446744073709551615, + 200, + 212, + 200, + 212, + 36, + 38, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 16360526185581748651, + 15299875967681492548, + 18446744073709551615, + 18446744073709551615, + 253, + 274, + 253, + 274, + 44, + 47, + true, + "Most workflow queries", + "Most workflow queries" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 7499883450733855176, + 12045430344624045691, + 18446744073709551615, + 18446744073709551615, + 334, + 353, + 334, + 353, + 58, + 60, + true, + "synchronous request", + "synchronous request" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 9728303362208378504, + 11302837117618091413, + 18446744073709551615, + 18446744073709551615, + 378, + 406, + 378, + 406, + 65, + 68, + true, + "graph analytics computations", + "graph analytics computations" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 11805639520798919476, + 7644867692249185725, + 18446744073709551615, + 18446744073709551615, + 438, + 451, + 438, + 451, + 74, + 76, + true, + "large amounts", + "large amounts" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 18427810349028856651, + 2258223692032184753, + 18446744073709551615, + 18446744073709551615, + 505, + 521, + 505, + 521, + 87, + 89, + true, + "asynchronous API", + "asynchronous API" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 12178341415896254082, + 3344072087811986116, + 18446744073709551615, + 18446744073709551615, + 4, + 7, + 4, + 7, + 1, + 2, + true, + "KGS", + "KGS" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 389609625632179162, + 8614714760573598927, + 18446744073709551615, + 18446744073709551615, + 26, + 30, + 26, + 30, + 6, + 7, + true, + "user", + "user" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 8106478445190161533, + 4934944743075669717, + 18446744073709551615, + 18446744073709551615, + 73, + 80, + 73, + 80, + 16, + 17, + true, + "results", + "results" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 8106477782290185579, + 12680231174549344170, + 18446744073709551615, + 18446744073709551615, + 169, + 176, + 169, + 176, + 30, + 31, + true, + "queries", + "queries" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 16381206477857958587, + 4024277227926320818, + 18446744073709551615, + 18446744073709551615, + 355, + 361, + 355, + 361, + 61, + 62, + true, + "Others", + "Others" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 389609625696431489, + 8612777845572934547, + 18446744073709551615, + 18446744073709551615, + 455, + 459, + 455, + 459, + 77, + 78, + true, + "data", + "data" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 8106477782290185579, + 12680231174549324694, + 18446744073709551615, + 18446744073709551615, + 473, + 480, + 473, + 480, + 82, + 83, + true, + "queries", + "queries" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 8106478445190161533, + 4934944743075576931, + 18446744073709551615, + 18446744073709551615, + 530, + 537, + 530, + 537, + 91, + 92, + true, + "results", + "results" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 389609625632179162, + 8614714760573567106, + 18446744073709551615, + 18446744073709551615, + 577, + 581, + 577, + 581, + 99, + 100, + true, + "user", + "user" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 2703018890303469599, + 9579262064613450677, + 18446744073709551615, + 18446744073709551615, + 585, + 595, + 585, + 595, + 101, + 102, + true, + "completion", + "completion" + ], + [ + "numval", + "ival", + 4030998538427149966, + "TEXT", + "#/texts/121", + 1.0, + 17767354399704235157, + 11518089933568466075, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "5", + "5" + ], + [ + "parenthesis", + "round brackets", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 7373849855264861449, + 365116357501177202, + 18446744073709551615, + 18446744073709551615, + 164, + 188, + 164, + 188, + 27, + 32, + true, + "(public and proprietary)", + "(public and proprietary)" + ], + [ + "parenthesis", + "round brackets", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 11638883092501129531, + 10880710974821402837, + 18446744073709551615, + 18446744073709551615, + 520, + 580, + 514, + 574, + 89, + 102, + true, + "(eg, formations with their age, rock-composition, and depth)", + "(eg, formations with their age, rock-composition, and depth)" + ], + [ + "parenthesis", + "round brackets", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 2706078645353543038, + 3854987420067579305, + 18446744073709551615, + 18446744073709551615, + 606, + 636, + 600, + 630, + 106, + 111, + true, + "(governmental and proprietary)", + "(governmental and proprietary)" + ], + [ + "expression", + "word-concatenation", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 10544831253991637042, + 6283171410540301725, + 18446744073709551615, + 18446744073709551615, + 552, + 568, + 546, + 562, + 97, + 98, + true, + "rock-composition", + "rock-composition" + ], + [ + "sentence", + "", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8216705235205389735, + 240810668415429324, + 18446744073709551615, + 18446744073709551615, + 0, + 67, + 0, + 67, + 0, + 13, + true, + "Oil and gas exploration is a complex, technical field of expertise.", + "Oil and gas exploration is a complex, technical field of expertise." + ], + [ + "sentence", + "", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 11961004859214314258, + 4927401767963333110, + 18446744073709551615, + 18446744073709551615, + 68, + 278, + 68, + 278, + 13, + 51, + true, + "Unfortunately, the data of many geological processes and entities is scattered across databases (public and proprietary) and corpora of documents, where it is often deeply embedded in text, tables, and figures.", + "Unfortunately, the data of many geological processes and entities is scattered across databases (public and proprietary) and corpora of documents, where it is often deeply embedded in text, tables, and figures." + ], + [ + "sentence", + "", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 12899401129752394980, + 7318808495444355734, + 18446744073709551615, + 18446744073709551615, + 279, + 367, + 279, + 367, + 51, + 66, + true, + "This is a serious impediment for efficient exploration of new oil and gas opportunities.", + "This is a serious impediment for efficient exploration of new oil and gas opportunities." + ], + [ + "sentence", + "", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 18349903503178560352, + 8780732856130854312, + 18446744073709551615, + 18446744073709551615, + 368, + 661, + 368, + 655, + 66, + 115, + true, + "For example, geographic information of geological structures can be found in NaturalEarthData, \u2021\u2021\u2021 while their history, evolution, and components (eg, formations with their age, rock-composition, and depth) are discussed in reports (governmental and proprietary) and scientific articles.", + "For example, geographic information of geological structures can be found in NaturalEarthData, \u2021\u2021\u2021 while their history, evolution, and components (eg, formations with their age, rock-composition, and depth) are discussed in reports (governmental and proprietary) and scientific articles." + ], + [ + "sentence", + "", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 17752875551499627315, + 2358405120490317342, + 18446744073709551615, + 18446744073709551615, + 662, + 861, + 656, + 855, + 115, + 152, + true, + "As such, experts in oil and gas exploration often need to read many documents in order to find all the information of a certain geographic area and get a good understanding of its underlying geology.", + "As such, experts in oil and gas exploration often need to read many documents in order to find all the information of a certain geographic area and get a good understanding of its underlying geology." + ], + [ + "term", + "enum-term-mark-2", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 6389497618120621824, + 3882287786623873346, + 18446744073709551615, + 18446744073709551615, + 0, + 23, + 0, + 23, + 0, + 4, + true, + "Oil and gas exploration", + "Oil and gas exploration" + ], + [ + "term", + "enum-term-mark-2", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 9418848057117014737, + 5402506232376820687, + 18446744073709551615, + 18446744073709551615, + 341, + 352, + 341, + 352, + 61, + 64, + true, + "oil and gas", + "oil and gas" + ], + [ + "term", + "enum-term-mark-2", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 10982395905665253579, + 13752394750334802599, + 18446744073709551615, + 18446744073709551615, + 547, + 579, + 541, + 573, + 95, + 101, + true, + "age, rock-composition, and depth", + "age, rock-composition, and depth" + ], + [ + "term", + "enum-term-mark-2", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 5515747999597331548, + 985317114331566672, + 18446744073709551615, + 18446744073709551615, + 682, + 705, + 676, + 699, + 120, + 124, + true, + "oil and gas exploration", + "oil and gas exploration" + ], + [ + "term", + "enum-term-mark-3", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 4820933926787397751, + 317282169431161814, + 18446744073709551615, + 18446744073709551615, + 111, + 133, + 111, + 133, + 20, + 23, + true, + "processes and entities", + "processes and entities" + ], + [ + "term", + "enum-term-mark-3", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 9207207424694358454, + 3444507114153246338, + 18446744073709551615, + 18446744073709551615, + 258, + 277, + 258, + 277, + 46, + 50, + true, + "tables, and figures", + "tables, and figures" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 10692163443301812358, + 14369438466538696698, + 18446744073709551615, + 18446744073709551615, + 8, + 23, + 8, + 23, + 2, + 4, + true, + "gas exploration", + "gas exploration" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 6630151693041027733, + 13667311119554727837, + 18446744073709551615, + 18446744073709551615, + 38, + 53, + 38, + 53, + 8, + 10, + true, + "technical field", + "technical field" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 4574553070187117214, + 5477991605964996477, + 18446744073709551615, + 18446744073709551615, + 95, + 120, + 95, + 120, + 18, + 21, + true, + "many geological processes", + "many geological processes" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 16857074076267844634, + 10288321581388192914, + 18446744073709551615, + 18446744073709551615, + 289, + 307, + 289, + 307, + 54, + 56, + true, + "serious impediment", + "serious impediment" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 7888665710418232344, + 13630946120734502191, + 18446744073709551615, + 18446744073709551615, + 312, + 333, + 312, + 333, + 57, + 59, + true, + "efficient exploration", + "efficient exploration" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106342689900901717, + 18192642792638057808, + 18446744073709551615, + 18446744073709551615, + 337, + 344, + 337, + 344, + 60, + 62, + true, + "new oil", + "new oil" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 17842890634558813266, + 10975865632036036027, + 18446744073709551615, + 18446744073709551615, + 349, + 366, + 349, + 366, + 63, + 65, + true, + "gas opportunities", + "gas opportunities" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 7139567255410303030, + 8446684552119017715, + 18446744073709551615, + 18446744073709551615, + 381, + 403, + 381, + 403, + 69, + 71, + true, + "geographic information", + "geographic information" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 15928367849318151150, + 6690657827796758810, + 18446744073709551615, + 18446744073709551615, + 407, + 428, + 407, + 428, + 72, + 74, + true, + "geological structures", + "geological structures" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 7863808487922385366, + 7668215042863067481, + 18446744073709551615, + 18446744073709551615, + 641, + 660, + 635, + 654, + 112, + 114, + true, + "scientific articles", + "scientific articles" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 10692163443301812358, + 14369438466538523960, + 18446744073709551615, + 18446744073709551615, + 690, + 705, + 684, + 699, + 122, + 124, + true, + "gas exploration", + "gas exploration" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 12462088721494412558, + 9590850160516238357, + 18446744073709551615, + 18446744073709551615, + 725, + 739, + 719, + 733, + 128, + 130, + true, + "many documents", + "many documents" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 3808918567046385591, + 12838318632197561077, + 18446744073709551615, + 18446744073709551615, + 782, + 805, + 776, + 799, + 139, + 142, + true, + "certain geographic area", + "certain geographic area" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 5432038927924624855, + 3081417618916509100, + 18446744073709551615, + 18446744073709551615, + 816, + 834, + 810, + 828, + 145, + 147, + true, + "good understanding", + "good understanding" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 15394751475952612729, + 8785520169217661944, + 18446744073709551615, + 18446744073709551615, + 842, + 860, + 836, + 854, + 149, + 151, + true, + "underlying geology", + "underlying geology" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 12178341415896272573, + 479144706274466459, + 18446744073709551615, + 18446744073709551615, + 0, + 3, + 0, + 3, + 0, + 1, + true, + "Oil", + "Oil" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 6168316375272172674, + 7063048620659196911, + 18446744073709551615, + 18446744073709551615, + 57, + 66, + 57, + 66, + 11, + 12, + true, + "expertise", + "expertise" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 389609625696431489, + 7981605993880509721, + 18446744073709551615, + 18446744073709551615, + 87, + 91, + 87, + 91, + 16, + 17, + true, + "data", + "data" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 14652256560445338257, + 6970476607163866107, + 18446744073709551615, + 18446744073709551615, + 125, + 133, + 125, + 133, + 22, + 23, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 6165495739559760741, + 2196775454065369079, + 18446744073709551615, + 18446744073709551615, + 154, + 163, + 154, + 163, + 26, + 27, + true, + "databases", + "databases" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106398483106473371, + 17672855256048813131, + 18446744073709551615, + 18446744073709551615, + 193, + 200, + 193, + 200, + 33, + 34, + true, + "corpora", + "corpora" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 6167933651658664291, + 15693796346909339457, + 18446744073709551615, + 18446744073709551615, + 204, + 213, + 204, + 213, + 35, + 36, + true, + "documents", + "documents" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 389609625631325904, + 7977943539407590204, + 18446744073709551615, + 18446744073709551615, + 252, + 256, + 252, + 256, + 44, + 45, + true, + "text", + "text" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 16381206513098478539, + 16420719447640918384, + 18446744073709551615, + 18446744073709551615, + 258, + 264, + 258, + 264, + 46, + 47, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106397480533647371, + 10093626472338736862, + 18446744073709551615, + 18446744073709551615, + 270, + 277, + 270, + 277, + 49, + 50, + true, + "figures", + "figures" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106397496085150773, + 15308866410956677851, + 18446744073709551615, + 18446744073709551615, + 372, + 379, + 372, + 379, + 67, + 68, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 9567036524588108536, + 1501013677311244740, + 18446744073709551615, + 18446744073709551615, + 445, + 461, + 445, + 461, + 78, + 79, + true, + "NaturalEarthData", + "NaturalEarthData" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106398477819293336, + 17658354524441111071, + 18446744073709551615, + 18446744073709551615, + 485, + 492, + 479, + 486, + 83, + 84, + true, + "history", + "history" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 6172089554353143931, + 17548988686693732639, + 18446744073709551615, + 18446744073709551615, + 494, + 503, + 488, + 497, + 85, + 86, + true, + "evolution", + "evolution" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 2703018952916355661, + 8527312128484531618, + 18446744073709551615, + 18446744073709551615, + 509, + 519, + 503, + 513, + 88, + 89, + true, + "components", + "components" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 15441160910541487324, + 12466872848432344092, + 18446744073709551615, + 18446744073709551615, + 521, + 523, + 515, + 517, + 90, + 91, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 16064217528453934834, + 13674817412400112483, + 18446744073709551615, + 18446744073709551615, + 525, + 535, + 519, + 529, + 92, + 93, + true, + "formations", + "formations" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 12178341415895571749, + 479221805921022809, + 18446744073709551615, + 18446744073709551615, + 547, + 550, + 541, + 544, + 95, + 96, + true, + "age", + "age" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 10544831253991637042, + 6283171410540301725, + 18446744073709551615, + 18446744073709551615, + 552, + 568, + 546, + 562, + 97, + 98, + true, + "rock-composition", + "rock-composition" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 329104162100250438, + 5041034355187623111, + 18446744073709551615, + 18446744073709551615, + 574, + 579, + 568, + 573, + 100, + 101, + true, + "depth", + "depth" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106478449073306569, + 15059182269430793968, + 18446744073709551615, + 18446744073709551615, + 598, + 605, + 592, + 599, + 105, + 106, + true, + "reports", + "reports" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106397495764760483, + 9633777631454873600, + 18446744073709551615, + 18446744073709551615, + 671, + 678, + 665, + 672, + 118, + 119, + true, + "experts", + "experts" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 12178341415895623363, + 479231402707811038, + 18446744073709551615, + 18446744073709551615, + 682, + 685, + 676, + 679, + 120, + 121, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 329104161571401725, + 5024703429178195243, + 18446744073709551615, + 18446744073709551615, + 743, + 748, + 737, + 742, + 131, + 132, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 14388065630035882329, + 13638788162857695945, + 18446744073709551615, + 18446744073709551615, + 765, + 776, + 759, + 770, + 136, + 137, + true, + "information", + "information" + ], + [ + "numval", + "ival", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 15441160910541481863, + 7242502688177594361, + 18446744073709551615, + 18446744073709551615, + 378, + 380, + 378, + 380, + 65, + 66, + true, + "19", + "19" + ], + [ + "parenthesis", + "round brackets", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 17270253253682717487, + 3576935745000233213, + 18446744073709551615, + 18446744073709551615, + 1034, + 1067, + 1034, + 1067, + 195, + 205, + true, + "(ie, reservoir, seal, and source)", + "(ie, reservoir, seal, and source)" + ], + [ + "expression", + "word-concatenation", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 3123226645854154111, + 6696524889970483253, + 18446744073709551615, + 18446744073709551615, + 173, + 184, + 173, + 184, + 32, + 33, + true, + "sub-regions", + "sub-regions" + ], + [ + "expression", + "word-concatenation", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16142298665920251723, + 15780159324829191742, + 18446744073709551615, + 18446744073709551615, + 1133, + 1149, + 1133, + 1149, + 215, + 216, + true, + "well-established", + "well-established" + ], + [ + "sentence", + "", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16452340106229368551, + 11552113153785899906, + 18446744073709551615, + 18446744073709551615, + 0, + 113, + 0, + 113, + 0, + 20, + true, + "The main tasks of the experts working in oil and gas exploration are to identify potential new exploration sites.", + "The main tasks of the experts working in oil and gas exploration are to identify potential new exploration sites." + ], + [ + "sentence", + "", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 11936152223334879196, + 11546009086188025692, + 18446744073709551615, + 18446744073709551615, + 114, + 185, + 114, + 185, + 20, + 34, + true, + "This is typically done by describing a basin or one of its sub-regions.", + "This is typically done by describing a basin or one of its sub-regions." + ], + [ + "sentence", + "", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 6588420048013411488, + 17199279919993921695, + 18446744073709551615, + 18446744073709551615, + 186, + 377, + 186, + 377, + 34, + 65, + true, + "In practice, ' describing a basin ' boils down to identifying all geological formations with their properties in the basin and investigating if these formations constitute a petroleum system.", + "In practice, ' describing a basin ' boils down to identifying all geological formations with their properties in the basin and investigating if these formations constitute a petroleum system." + ], + [ + "sentence", + "", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 3041813118097378718, + 5477849685609028942, + 18446744073709551615, + 18446744073709551615, + 381, + 491, + 381, + 491, + 66, + 88, + true, + "In its most minimalistic form, a petroleum system is defined by three components: source, reservoir, and seal.", + "In its most minimalistic form, a petroleum system is defined by three components: source, reservoir, and seal." + ], + [ + "sentence", + "", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 10695328289155597291, + 4431862688730634730, + 18446744073709551615, + 18446744073709551615, + 492, + 561, + 492, + 561, + 88, + 103, + true, + "The source is the rock formation in which the oil or gas was created.", + "The source is the rock formation in which the oil or gas was created." + ], + [ + "sentence", + "", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 11895421850198242632, + 5509077001248181418, + 18446744073709551615, + 18446744073709551615, + 562, + 666, + 562, + 666, + 103, + 125, + true, + "Once created, the oil or gas typically migrates to a porous reservoir rock, which holds the oil and gas.", + "Once created, the oil or gas typically migrates to a porous reservoir rock, which holds the oil and gas." + ], + [ + "sentence", + "", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 5929789915327270268, + 3037242968728546017, + 18446744073709551615, + 18446744073709551615, + 667, + 803, + 667, + 803, + 125, + 153, + true, + "In order for the oil and gas not to escape, the reservoir needs to be covered by an impermeable rock formation which is called the seal.", + "In order for the oil and gas not to escape, the reservoir needs to be covered by an impermeable rock formation which is called the seal." + ], + [ + "sentence", + "", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12828622475591605017, + 5782454436492965846, + 18446744073709551615, + 18446744073709551615, + 804, + 913, + 804, + 913, + 153, + 174, + true, + "Each one of these components is comprised of one or more formations, with a certain age and rock composition.", + "Each one of these components is comprised of one or more formations, with a certain age and rock composition." + ], + [ + "sentence", + "", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 15154527065859701304, + 10511609858079856946, + 18446744073709551615, + 18446744073709551615, + 914, + 1162, + 914, + 1162, + 174, + 218, + true, + "To identify a petroleum system in a certain geographical area, one has to find a candidate formation for each component (ie, reservoir, seal, and source) and observe that the properties of these components satisfy some well-established constraints.", + "To identify a petroleum system in a certain geographical area, one has to find a candidate formation for each component (ie, reservoir, seal, and source) and observe that the properties of these components satisfy some well-established constraints." + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 5515747999597331548, + 2684891308460414979, + 18446744073709551615, + 18446744073709551615, + 41, + 64, + 41, + 64, + 8, + 12, + true, + "oil and gas exploration", + "oil and gas exploration" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 10890937563763904307, + 10205829752163449428, + 18446744073709551615, + 18446744073709551615, + 463, + 490, + 463, + 490, + 81, + 87, + true, + "source, reservoir, and seal", + "source, reservoir, and seal" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 13992299006832689086, + 10586492155645346827, + 18446744073709551615, + 18446744073709551615, + 538, + 548, + 538, + 548, + 97, + 100, + true, + "oil or gas", + "oil or gas" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 13992299006832689086, + 10586492155645246498, + 18446744073709551615, + 18446744073709551615, + 580, + 590, + 580, + 590, + 107, + 110, + true, + "oil or gas", + "oil or gas" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 9418848057117014737, + 1685050495865720384, + 18446744073709551615, + 18446744073709551615, + 654, + 665, + 654, + 665, + 121, + 124, + true, + "oil and gas", + "oil and gas" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 9418848057117014737, + 1685050495865971311, + 18446744073709551615, + 18446744073709551615, + 684, + 695, + 684, + 695, + 129, + 132, + true, + "oil and gas", + "oil and gas" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 3561668659819452356, + 7599493799741319648, + 18446744073709551615, + 18446744073709551615, + 888, + 912, + 888, + 912, + 169, + 173, + true, + "age and rock composition", + "age and rock composition" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 14224878169535431391, + 4999493091788788351, + 18446744073709551615, + 18446744073709551615, + 1035, + 1066, + 1035, + 1066, + 196, + 204, + true, + "ie, reservoir, seal, and source", + "ie, reservoir, seal, and source" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 8380287248939955654, + 16738377710036347675, + 18446744073709551615, + 18446744073709551615, + 4, + 14, + 4, + 14, + 1, + 3, + true, + "main tasks", + "main tasks" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 10692163443301812358, + 13785073584184276748, + 18446744073709551615, + 18446744073709551615, + 49, + 64, + 49, + 64, + 10, + 12, + true, + "gas exploration", + "gas exploration" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 10658730123397856737, + 8700089933554137116, + 18446744073709551615, + 18446744073709551615, + 81, + 112, + 81, + 112, + 15, + 19, + true, + "potential new exploration sites", + "potential new exploration sites" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 9648537698556423826, + 6911808250044295400, + 18446744073709551615, + 18446744073709551615, + 252, + 273, + 252, + 273, + 47, + 49, + true, + "geological formations", + "geological formations" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 2509987211733796739, + 16811136751141125392, + 18446744073709551615, + 18446744073709551615, + 360, + 376, + 360, + 376, + 62, + 64, + true, + "petroleum system", + "petroleum system" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 7395650178474964697, + 4439868107237037266, + 18446744073709551615, + 18446744073709551615, + 393, + 410, + 393, + 410, + 69, + 71, + true, + "minimalistic form", + "minimalistic form" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 2509987211733796739, + 16811136751141124595, + 18446744073709551615, + 18446744073709551615, + 414, + 430, + 414, + 430, + 73, + 75, + true, + "petroleum system", + "petroleum system" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 80968125733518558, + 7018196435505582631, + 18446744073709551615, + 18446744073709551615, + 510, + 524, + 510, + 524, + 92, + 94, + true, + "rock formation", + "rock formation" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16193957251468249616, + 10512046748046668259, + 18446744073709551615, + 18446744073709551615, + 615, + 636, + 615, + 636, + 114, + 117, + true, + "porous reservoir rock", + "porous reservoir rock" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 9723882825591180683, + 9574840395838806322, + 18446744073709551615, + 18446744073709551615, + 751, + 777, + 751, + 777, + 144, + 147, + true, + "impermeable rock formation", + "impermeable rock formation" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 18043249325066169556, + 6240050066438421142, + 18446744073709551615, + 18446744073709551615, + 880, + 891, + 880, + 891, + 168, + 170, + true, + "certain age", + "certain age" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 18031444749457032388, + 1223000296858575465, + 18446744073709551615, + 18446744073709551615, + 896, + 912, + 896, + 912, + 171, + 173, + true, + "rock composition", + "rock composition" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 2509987211733796739, + 16811136751140628834, + 18446744073709551615, + 18446744073709551615, + 928, + 944, + 928, + 944, + 177, + 179, + true, + "petroleum system", + "petroleum system" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 15928864167064606327, + 6933788371051918523, + 18446744073709551615, + 18446744073709551615, + 950, + 975, + 950, + 975, + 181, + 184, + true, + "certain geographical area", + "certain geographical area" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 6743492865849365205, + 1774365845575004327, + 18446744073709551615, + 18446744073709551615, + 995, + 1014, + 995, + 1014, + 190, + 192, + true, + "candidate formation", + "candidate formation" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 3449270776692652335, + 8149689351956165055, + 18446744073709551615, + 18446744073709551615, + 1133, + 1161, + 1133, + 1161, + 215, + 217, + true, + "well-established constraints", + "well-established constraints" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 8106397495764760483, + 8300505182841605252, + 18446744073709551615, + 18446744073709551615, + 22, + 29, + 22, + 29, + 5, + 6, + true, + "experts", + "experts" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895623363, + 674497968123871980, + 18446744073709551615, + 18446744073709551615, + 41, + 44, + 41, + 44, + 8, + 9, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 329104159219516222, + 14550289508206518778, + 18446744073709551615, + 18446744073709551615, + 153, + 158, + 153, + 158, + 27, + 28, + true, + "basin", + "basin" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 3123226645854154111, + 6696524889970483253, + 18446744073709551615, + 18446744073709551615, + 173, + 184, + 173, + 184, + 32, + 33, + true, + "sub-regions", + "sub-regions" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 14814125472896938138, + 12597080905343650882, + 18446744073709551615, + 18446744073709551615, + 189, + 197, + 189, + 197, + 35, + 36, + true, + "practice", + "practice" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 329104159219516222, + 14550289508206522668, + 18446744073709551615, + 18446744073709551615, + 214, + 219, + 214, + 219, + 40, + 41, + true, + "basin", + "basin" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 329104159326063388, + 14165173228340727668, + 18446744073709551615, + 18446744073709551615, + 222, + 227, + 222, + 227, + 42, + 43, + true, + "boils", + "boils" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 14088628410271132453, + 4251286936900238374, + 18446744073709551615, + 18446744073709551615, + 285, + 295, + 285, + 295, + 51, + 52, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 329104159219516222, + 14550289508206516996, + 18446744073709551615, + 18446744073709551615, + 303, + 308, + 303, + 308, + 54, + 55, + true, + "basin", + "basin" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16064217528453934834, + 15050166161452124834, + 18446744073709551615, + 18446744073709551615, + 336, + 346, + 336, + 346, + 59, + 60, + true, + "formations", + "formations" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 2703018952916355661, + 8840992498594724587, + 18446744073709551615, + 18446744073709551615, + 451, + 461, + 451, + 461, + 79, + 80, + true, + "components", + "components" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16381206579112188113, + 14297335677695974861, + 18446744073709551615, + 18446744073709551615, + 463, + 469, + 463, + 469, + 81, + 82, + true, + "source", + "source" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 6168331670275357579, + 8243421751734568227, + 18446744073709551615, + 18446744073709551615, + 471, + 480, + 471, + 480, + 83, + 84, + true, + "reservoir", + "reservoir" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 389609625741054314, + 410603660043560873, + 18446744073709551615, + 18446744073709551615, + 486, + 490, + 486, + 490, + 86, + 87, + true, + "seal", + "seal" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16381206579112188113, + 14297335677695972757, + 18446744073709551615, + 18446744073709551615, + 496, + 502, + 496, + 502, + 89, + 90, + true, + "source", + "source" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895623363, + 674497968100857275, + 18446744073709551615, + 18446744073709551615, + 538, + 541, + 538, + 541, + 97, + 98, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895464135, + 674505039568697250, + 18446744073709551615, + 18446744073709551615, + 545, + 548, + 545, + 548, + 99, + 100, + true, + "gas", + "gas" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895623363, + 674497968100848533, + 18446744073709551615, + 18446744073709551615, + 580, + 583, + 580, + 583, + 107, + 108, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895464135, + 674505039568698065, + 18446744073709551615, + 18446744073709551615, + 587, + 590, + 587, + 590, + 109, + 110, + true, + "gas", + "gas" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895623363, + 674497968100852102, + 18446744073709551615, + 18446744073709551615, + 654, + 657, + 654, + 657, + 121, + 122, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895464135, + 674505039568694425, + 18446744073709551615, + 18446744073709551615, + 662, + 665, + 662, + 665, + 123, + 124, + true, + "gas", + "gas" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 329104161571401725, + 9497325505729384728, + 18446744073709551615, + 18446744073709551615, + 670, + 675, + 670, + 675, + 126, + 127, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895623363, + 674497968100849695, + 18446744073709551615, + 18446744073709551615, + 684, + 687, + 684, + 687, + 129, + 130, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895464135, + 674505039568442669, + 18446744073709551615, + 18446744073709551615, + 692, + 695, + 692, + 695, + 131, + 132, + true, + "gas", + "gas" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 6168331670275357579, + 8243421751734518002, + 18446744073709551615, + 18446744073709551615, + 715, + 724, + 715, + 724, + 137, + 138, + true, + "reservoir", + "reservoir" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 389609625741054314, + 410603660043570043, + 18446744073709551615, + 18446744073709551615, + 798, + 802, + 798, + 802, + 151, + 152, + true, + "seal", + "seal" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 2703018952916355661, + 8840992498595058324, + 18446744073709551615, + 18446744073709551615, + 822, + 832, + 822, + 832, + 157, + 158, + true, + "components", + "components" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16064217528453934834, + 15050166161452165429, + 18446744073709551615, + 18446744073709551615, + 861, + 871, + 861, + 871, + 164, + 165, + true, + "formations", + "formations" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 5947879501615734370, + 7820482070703241023, + 18446744073709551615, + 18446744073709551615, + 1024, + 1033, + 1024, + 1033, + 194, + 195, + true, + "component", + "component" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 15441160910541486545, + 7242502590681773013, + 18446744073709551615, + 18446744073709551615, + 1035, + 1037, + 1035, + 1037, + 196, + 197, + true, + "ie", + "ie" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 6168331670275357579, + 8243421751734612797, + 18446744073709551615, + 18446744073709551615, + 1039, + 1048, + 1039, + 1048, + 198, + 199, + true, + "reservoir", + "reservoir" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 389609625741054314, + 410603660043461418, + 18446744073709551615, + 18446744073709551615, + 1050, + 1054, + 1050, + 1054, + 200, + 201, + true, + "seal", + "seal" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16381206579112188113, + 14297335677695869230, + 18446744073709551615, + 18446744073709551615, + 1060, + 1066, + 1060, + 1066, + 203, + 204, + true, + "source", + "source" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 14088628410271132453, + 4251286936900251711, + 18446744073709551615, + 18446744073709551615, + 1089, + 1099, + 1089, + 1099, + 209, + 210, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 2703018952916355661, + 8840992498594647547, + 18446744073709551615, + 18446744073709551615, + 1109, + 1119, + 1109, + 1119, + 212, + 213, + true, + "components", + "components" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 8106397496085150773, + 8292722284435493161, + 18446744073709551615, + 18446744073709551615, + 1167, + 1174, + 1167, + 1174, + 219, + 220, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 6168331670275357579, + 8243421751734523008, + 18446744073709551615, + 18446744073709551615, + 1180, + 1189, + 1180, + 1189, + 222, + 223, + true, + "reservoir", + "reservoir" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 389609625548777262, + 8826555294676663632, + 18446744073709551615, + 18446744073709551615, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 389609625548777251, + 8826555296349648778, + 18446744073709551615, + 18446744073709551615, + 119, + 123, + 119, + 123, + 14, + 14, + false, + "2023", + "2023" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 8104408072666212335, + 13552219042525319352, + 18446744073709551615, + 18446744073709551615, + 71, + 78, + 71, + 78, + 8, + 8, + false, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 389609625548868096, + 8826558551385119058, + 18446744073709551615, + 18446744073709551615, + 82, + 86, + 82, + 86, + 8, + 9, + false, + "2.20", + "2.20" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 14654386914267794441, + 12796143052106760105, + 18446744073709551615, + 18446744073709551615, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 17767354399704235162, + 7753390158484899261, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 15441160910541481791, + 3518619573290839093, + 18446744073709551615, + 18446744073709551615, + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 15441160910541481543, + 3518617976696906498, + 18446744073709551615, + 18446744073709551615, + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 8536069645534292969, + 16063604623463467342, + 18446744073709551615, + 18446744073709551615, + 35, + 87, + 35, + 87, + 8, + 10, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 594099663775968682, + 14698211805947073928, + 18446744073709551615, + 18446744073709551615, + 156, + 208, + 156, + 208, + 22, + 37, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 1697220653346092555, + 8458710314769009562, + 18446744073709551615, + 18446744073709551615, + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 12213187056216195918, + 14309760985361468471, + 18446744073709551615, + 18446744073709551615, + 155, + 209, + 155, + 209, + 21, + 38, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 3856967589249015473, + 3576147774941915841, + 18446744073709551615, + 18446744073709551615, + 35, + 86, + 35, + 86, + 8, + 9, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "sentence", + "", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 10933383461306782608, + 10178418358179275356, + 18446744073709551615, + 18446744073709551615, + 19, + 125, + 19, + 125, + 6, + 16, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 12466457873768409517, + 3430070082404029638, + 18446744073709551615, + 18446744073709551615, + 88, + 108, + 88, + 108, + 10, + 13, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 12466457873768409517, + 3430070082403846184, + 18446744073709551615, + 18446744073709551615, + 213, + 233, + 213, + 233, + 39, + 42, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 10086796047802705645, + 11637015082128438412, + 18446744073709551615, + 18446744073709551615, + 252, + 263, + 252, + 263, + 47, + 49, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 6687370681685741393, + 17939310132506951168, + 18446744073709551615, + 18446744073709551615, + 284, + 319, + 284, + 319, + 53, + 57, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 329104161846385964, + 16017248647642597247, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 18, + 19, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 969969168017005656, + 2961182532179915323, + 18446744073709551615, + 18446744073709551615, + 144, + 154, + 144, + 154, + 20, + 21, + true, + "Conditions", + "Conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 329104161825278214, + 16021621362593374209, + 18446744073709551615, + 18446744073709551615, + 238, + 243, + 238, + 243, + 43, + 44, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 12178341415895516060, + 12061595171928625555, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 45, + 46, + true, + "use", + "use" + ], + [ + "numval", + "ival", + 1080447728722590413, + "TEXT", + "#/texts/125", + 1.0, + 15441160910541481976, + 12490743152134877753, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "12", + "12" + ], + [ + "numval", + "ival", + 4361549257087816853, + "TEXT", + "#/texts/126", + 1.0, + 15441160910541481979, + 9983816787922721487, + 18446744073709551615, + 18446744073709551615, + 3, + 5, + 3, + 5, + 1, + 2, + true, + "15", + "15" + ], + [ + "sentence", + "", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 15802078051467200825, + 4593559971105037353, + 18446744073709551615, + 18446744073709551615, + 61, + 171, + 61, + 171, + 12, + 34, + true, + "Another example of such constraints is that the age of the seal and reservoir has to be older than the source.", + "Another example of such constraints is that the age of the seal and reservoir has to be older than the source." + ], + [ + "term", + "enum-term-mark-2", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 16756161176734575985, + 10638546327685693256, + 18446744073709551615, + 18446744073709551615, + 120, + 138, + 120, + 138, + 23, + 26, + true, + "seal and reservoir", + "seal and reservoir" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 8780157828309296089, + 14417774072283982472, + 18446744073709551615, + 18446744073709551615, + 45, + 59, + 45, + 59, + 9, + 11, + true, + "seal formation", + "seal formation" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 13876553311005799387, + 6354882104637684429, + 18446744073709551615, + 18446744073709551615, + 80, + 96, + 80, + 96, + 15, + 17, + true, + "such constraints", + "such constraints" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 6187533480885532545, + 7871889926557155024, + 18446744073709551615, + 18446744073709551615, + 0, + 9, + 0, + 9, + 0, + 1, + true, + "formation", + "formation" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 329104162100250438, + 16247133124509719242, + 18446744073709551615, + 18446744073709551615, + 30, + 35, + 30, + 35, + 6, + 7, + true, + "depth", + "depth" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 8106397496085150773, + 10113590852616268300, + 18446744073709551615, + 18446744073709551615, + 69, + 76, + 69, + 76, + 13, + 14, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 12178341415895571749, + 13993372577473076189, + 18446744073709551615, + 18446744073709551615, + 109, + 112, + 109, + 112, + 20, + 21, + true, + "age", + "age" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 389609625741054314, + 7672668339257182848, + 18446744073709551615, + 18446744073709551615, + 120, + 124, + 120, + 124, + 23, + 24, + true, + "seal", + "seal" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 6168331670275357579, + 17711145650520030642, + 18446744073709551615, + 18446744073709551615, + 129, + 138, + 129, + 138, + 25, + 26, + true, + "reservoir", + "reservoir" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 16381206579112188113, + 6632699352491893262, + 18446744073709551615, + 18446744073709551615, + 164, + 170, + 164, + 170, + 32, + 33, + true, + "source", + "source" + ], + [ + "parenthesis", + "round brackets", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 18003855556286774784, + 5570542864698798195, + 18446744073709551615, + 18446744073709551615, + 326, + 388, + 326, + 388, + 57, + 71, + true, + "(eg, geographical locations, depth, age, and rock composition)", + "(eg, geographical locations, depth, age, and rock composition)" + ], + [ + "expression", + "word-concatenation", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 5576784674045870024, + 13038592085786715991, + 18446744073709551615, + 18446744073709551615, + 81, + 91, + 81, + 91, + 15, + 16, + true, + "day-to-day", + "day-to-day" + ], + [ + "sentence", + "", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 1705537891001951581, + 12643174622978093931, + 18446744073709551615, + 18446744073709551615, + 0, + 141, + 0, + 141, + 0, + 26, + true, + "In order for the CPS platform to help the oil and gas explorationalists in their day-to-day job effectively, it needs to meet two objectives.", + "In order for the CPS platform to help the oil and gas explorationalists in their day-to-day job effectively, it needs to meet two objectives." + ], + [ + "sentence", + "", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 6964031938346981228, + 2679069447819445922, + 18446744073709551615, + 18446744073709551615, + 142, + 230, + 142, + 230, + 26, + 44, + true, + "On the one hand, it needs to create a consistent Knowledge Graph from a document corpus.", + "On the one hand, it needs to create a consistent Knowledge Graph from a document corpus." + ], + [ + "sentence", + "", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 17535387160187070973, + 11137257841253658893, + 18446744073709551615, + 18446744073709551615, + 231, + 389, + 231, + 389, + 44, + 72, + true, + "This Knowledge Graph has to contain all geological formations with their respective properties (eg, geographical locations, depth, age, and rock composition).", + "This Knowledge Graph has to contain all geological formations with their respective properties (eg, geographical locations, depth, age, and rock composition)." + ], + [ + "sentence", + "", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 14586761392565722761, + 12189326748130292090, + 18446744073709551615, + 18446744073709551615, + 390, + 596, + 390, + 596, + 72, + 106, + true, + "On the other hand, CPS needs to provide fast query responses, such that one can automatically retrieve potential components of petroleum systems and apply the constraints to filter out promising candidates.", + "On the other hand, CPS needs to provide fast query responses, such that one can automatically retrieve potential components of petroleum systems and apply the constraints to filter out promising candidates." + ], + [ + "term", + "enum-term-mark-2", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 9418848057117014737, + 12374664467097755932, + 18446744073709551615, + 18446744073709551615, + 42, + 53, + 42, + 53, + 9, + 12, + true, + "oil and gas", + "oil and gas" + ], + [ + "term", + "enum-term-mark-2", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 6860541340972856268, + 17956047888006168723, + 18446744073709551615, + 18446744073709551615, + 355, + 387, + 355, + 387, + 63, + 70, + true, + "depth, age, and rock composition", + "depth, age, and rock composition" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 12779036928191531604, + 2179291459161706341, + 18446744073709551615, + 18446744073709551615, + 17, + 29, + 17, + 29, + 4, + 6, + true, + "CPS platform", + "CPS platform" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 3400754800868514192, + 11934221638746188525, + 18446744073709551615, + 18446744073709551615, + 50, + 71, + 50, + 71, + 11, + 13, + true, + "gas explorationalists", + "gas explorationalists" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 3153173113727511424, + 17636674351298961190, + 18446744073709551615, + 18446744073709551615, + 81, + 95, + 81, + 95, + 15, + 17, + true, + "day-to-day job", + "day-to-day job" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 2568402610530935991, + 14140482534604978282, + 18446744073709551615, + 18446744073709551615, + 180, + 206, + 180, + 206, + 36, + 39, + true, + "consistent Knowledge Graph", + "consistent Knowledge Graph" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 16647308723752369903, + 14738883853389812839, + 18446744073709551615, + 18446744073709551615, + 214, + 229, + 214, + 229, + 41, + 43, + true, + "document corpus", + "document corpus" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 5877539623435777295, + 11286491693883501248, + 18446744073709551615, + 18446744073709551615, + 236, + 251, + 236, + 251, + 45, + 47, + true, + "Knowledge Graph", + "Knowledge Graph" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 9648537698556423826, + 6238309560067342359, + 18446744073709551615, + 18446744073709551615, + 271, + 292, + 271, + 292, + 51, + 53, + true, + "geological formations", + "geological formations" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 10514013392853408912, + 3902356967922595314, + 18446744073709551615, + 18446744073709551615, + 304, + 325, + 304, + 325, + 55, + 57, + true, + "respective properties", + "respective properties" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 15051916633168881147, + 4042421737129645550, + 18446744073709551615, + 18446744073709551615, + 331, + 353, + 331, + 353, + 60, + 62, + true, + "geographical locations", + "geographical locations" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 18031444749457032388, + 12929696333857038410, + 18446744073709551615, + 18446744073709551615, + 371, + 387, + 371, + 387, + 68, + 70, + true, + "rock composition", + "rock composition" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 14046205808324278415, + 9078231314066883308, + 18446744073709551615, + 18446744073709551615, + 397, + 407, + 397, + 407, + 74, + 76, + true, + "other hand", + "other hand" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 2280710246315749839, + 14080640540713416540, + 18446744073709551615, + 18446744073709551615, + 430, + 450, + 430, + 450, + 81, + 84, + true, + "fast query responses", + "fast query responses" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 18259199261543580951, + 14446619061773297986, + 18446744073709551615, + 18446744073709551615, + 493, + 513, + 493, + 513, + 91, + 93, + true, + "potential components", + "potential components" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 1727412062449779824, + 6279132683973492637, + 18446744073709551615, + 18446744073709551615, + 517, + 534, + 517, + 534, + 94, + 96, + true, + "petroleum systems", + "petroleum systems" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 14381095619961675188, + 12908833652765352656, + 18446744073709551615, + 18446744073709551615, + 575, + 595, + 575, + 595, + 103, + 105, + true, + "promising candidates", + "promising candidates" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 329104161571401725, + 10954679443872088477, + 18446744073709551615, + 18446744073709551615, + 3, + 8, + 3, + 8, + 1, + 2, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 12178341415895623363, + 17947552822877265640, + 18446744073709551615, + 18446744073709551615, + 42, + 45, + 42, + 45, + 9, + 10, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 15868223159689591859, + 14476383799407961998, + 18446744073709551615, + 18446744073709551615, + 130, + 140, + 130, + 140, + 24, + 25, + true, + "objectives", + "objectives" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 389609625695385072, + 11440288346750521048, + 18446744073709551615, + 18446744073709551615, + 153, + 157, + 153, + 157, + 29, + 30, + true, + "hand", + "hand" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 15441160910541487324, + 15189263452160795700, + 18446744073709551615, + 18446744073709551615, + 327, + 329, + 327, + 329, + 58, + 59, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 329104162100250438, + 11306285140255820122, + 18446744073709551615, + 18446744073709551615, + 355, + 360, + 355, + 360, + 63, + 64, + true, + "depth", + "depth" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 12178341415895571749, + 17965533017402487143, + 18446744073709551615, + 18446744073709551615, + 362, + 365, + 362, + 365, + 65, + 66, + true, + "age", + "age" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 12178341415896222428, + 17965292451507993990, + 18446744073709551615, + 18446744073709551615, + 409, + 412, + 409, + 412, + 77, + 78, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 2343820404875251124, + 11932898287921936471, + 18446744073709551615, + 18446744073709551615, + 549, + 560, + 549, + 560, + 99, + 100, + true, + "constraints", + "constraints" + ], + [ + "numval", + "ival", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 17767354399704235158, + 13179516689827493860, + 18446744073709551615, + 18446744073709551615, + 345, + 346, + 345, + 346, + 62, + 63, + true, + "6", + "6" + ], + [ + "sentence", + "", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 6825934840363073982, + 5636233081723674917, + 18446744073709551615, + 18446744073709551615, + 0, + 267, + 0, + 267, + 0, + 48, + true, + "During the development and implementation of custom NLU annotators in CPS for oil and gas exploration, the client team worked hand in hand with the IBM Research team to set up a controlled accuracy benchmark in which the key capabilities of the CPS can be quantified.", + "During the development and implementation of custom NLU annotators in CPS for oil and gas exploration, the client team worked hand in hand with the IBM Research team to set up a controlled accuracy benchmark in which the key capabilities of the CPS can be quantified." + ], + [ + "sentence", + "", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 14290172583784870712, + 15927376679214014989, + 18446744073709551615, + 18446744073709551615, + 268, + 410, + 268, + 410, + 48, + 78, + true, + "The goal of the benchmark was to test the entire pipeline depicted in Figure 6, that is, from PDF document ingestion to a final, queryable KG.", + "The goal of the benchmark was to test the entire pipeline depicted in Figure 6, that is, from PDF document ingestion to a final, queryable KG." + ], + [ + "term", + "enum-term-mark-2", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 13768691840532369655, + 11193246741347243418, + 18446744073709551615, + 18446744073709551615, + 11, + 41, + 11, + 41, + 2, + 5, + true, + "development and implementation", + "development and implementation" + ], + [ + "term", + "enum-term-mark-2", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 5515747999597331548, + 7333264710061334239, + 18446744073709551615, + 18446744073709551615, + 78, + 101, + 78, + 101, + 12, + 16, + true, + "oil and gas exploration", + "oil and gas exploration" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 4571012442155812864, + 10278457919107470617, + 18446744073709551615, + 18446744073709551615, + 45, + 66, + 45, + 66, + 6, + 9, + true, + "custom NLU annotators", + "custom NLU annotators" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 10692163443301812358, + 14500118643984057055, + 18446744073709551615, + 18446744073709551615, + 86, + 101, + 86, + 101, + 14, + 16, + true, + "gas exploration", + "gas exploration" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 2350671729723156275, + 15558555620402818749, + 18446744073709551615, + 18446744073709551615, + 107, + 118, + 107, + 118, + 18, + 20, + true, + "client team", + "client team" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 6793739009103508672, + 6386853708445382627, + 18446744073709551615, + 18446744073709551615, + 148, + 165, + 148, + 165, + 26, + 29, + true, + "IBM Research team", + "IBM Research team" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 12051423055985186664, + 17929859788470785671, + 18446744073709551615, + 18446744073709551615, + 189, + 207, + 189, + 207, + 34, + 36, + true, + "accuracy benchmark", + "accuracy benchmark" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 10510333910970178582, + 1646640126800442890, + 18446744073709551615, + 18446744073709551615, + 221, + 237, + 221, + 237, + 39, + 41, + true, + "key capabilities", + "key capabilities" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 1949498199725672567, + 18146522528945762773, + 18446744073709551615, + 18446744073709551615, + 310, + 325, + 310, + 325, + 57, + 59, + true, + "entire pipeline", + "entire pipeline" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 1297756121731734348, + 7733414860626740361, + 18446744073709551615, + 18446744073709551615, + 362, + 384, + 362, + 384, + 68, + 71, + true, + "PDF document ingestion", + "PDF document ingestion" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 13017693093872726154, + 10279474879437280242, + 18446744073709551615, + 18446744073709551615, + 397, + 409, + 397, + 409, + 75, + 77, + true, + "queryable KG", + "queryable KG" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 6381120898605443461, + 17504710944887042389, + 18446744073709551615, + 18446744073709551615, + 415, + 429, + 415, + 429, + 79, + 81, + true, + "key components", + "key components" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 10127059533904232000, + 15955758164972743780, + 18446744073709551615, + 18446744073709551615, + 438, + 455, + 438, + 455, + 83, + 85, + true, + "specific pipeline", + "specific pipeline" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 1525875096007260836, + 12171024053502893659, + 18446744073709551615, + 18446744073709551615, + 11, + 22, + 11, + 22, + 2, + 3, + true, + "development", + "development" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 16770038681622514616, + 13377631723935101023, + 18446744073709551615, + 18446744073709551615, + 27, + 41, + 27, + 41, + 4, + 5, + true, + "implementation", + "implementation" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 12178341415896222428, + 18419530096214942692, + 18446744073709551615, + 18446744073709551615, + 70, + 73, + 70, + 73, + 10, + 11, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 12178341415895623363, + 18419432310300434890, + 18446744073709551615, + 18446744073709551615, + 78, + 81, + 78, + 81, + 12, + 13, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 389609625695385072, + 15858014558261910158, + 18446744073709551615, + 18446744073709551615, + 126, + 130, + 126, + 130, + 21, + 22, + true, + "hand", + "hand" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 389609625695385072, + 15858014558262095076, + 18446744073709551615, + 18446744073709551615, + 134, + 138, + 134, + 138, + 23, + 24, + true, + "hand", + "hand" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 12178341415896222428, + 18419530096214998128, + 18446744073709551615, + 18446744073709551615, + 245, + 248, + 245, + 248, + 43, + 44, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 389609625699055241, + 15857990529270421118, + 18446744073709551615, + 18446744073709551615, + 272, + 276, + 272, + 276, + 49, + 50, + true, + "goal", + "goal" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 5948160876453582848, + 11544211964230154399, + 18446744073709551615, + 18446744073709551615, + 284, + 293, + 284, + 293, + 52, + 53, + true, + "benchmark", + "benchmark" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 16381206514091025767, + 1446474516282156831, + 18446744073709551615, + 18446744073709551615, + 338, + 344, + 338, + 344, + 61, + 62, + true, + "Figure", + "Figure" + ], + [ + "numval", + "ival", + 4162783521620221579, + "TEXT", + "#/texts/130", + 1.0, + 17767354399704235161, + 16668792304570951258, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 5135259059216244866, + "TEXT", + "#/texts/131", + 1.0, + 17767354399704235162, + 17330663619054335778, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 17767354399704235163, + 14373480556157138435, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "3", + "3" + ], + [ + "sentence", + "", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 13977281577432050469, + 10430722213677949058, + 18446744073709551615, + 18446744073709551615, + 23, + 101, + 23, + 101, + 6, + 19, + true, + "KG served by CPS to identify petroleum systems elements with their properties.", + "KG served by CPS to identify petroleum systems elements with their properties." + ], + [ + "term", + "single-term", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 715794125007296180, + 12795993613527751497, + 18446744073709551615, + 18446744073709551615, + 52, + 78, + 52, + 78, + 12, + 15, + true, + "petroleum systems elements", + "petroleum systems elements" + ], + [ + "term", + "single-term", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 14637920980029577773, + 100201578071126401, + 18446744073709551615, + 18446744073709551615, + 7, + 15, + 7, + 15, + 3, + 4, + true, + "querying", + "querying" + ], + [ + "term", + "single-term", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 15441160910541480204, + 3387988993019039764, + 18446744073709551615, + 18446744073709551615, + 23, + 25, + 23, + 25, + 6, + 7, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 12178341415896222428, + 12208991191022865237, + 18446744073709551615, + 18446744073709551615, + 36, + 39, + 36, + 39, + 9, + 10, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 14088628410271132453, + 11692902669902558965, + 18446744073709551615, + 18446744073709551615, + 90, + 100, + 90, + 100, + 17, + 18, + true, + "properties", + "properties" + ], + [ + "numval", + "ival", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 389609625536078676, + 4142990959296314501, + 18446744073709551615, + 18446744073709551615, + 88, + 92, + 88, + 92, + 18, + 19, + true, + "1051", + "1051" + ], + [ + "expression", + "word-concatenation", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 3753411203337468488, + 1771253405748692981, + 18446744073709551615, + 18446744073709551615, + 805, + 817, + 802, + 814, + 146, + 147, + true, + "ground-truth", + "ground-truth" + ], + [ + "expression", + "word-concatenation", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16502743316004277231, + 16326952748376776172, + 18446744073709551615, + 18446744073709551615, + 876, + 891, + 873, + 888, + 159, + 160, + true, + "well-controlled", + "well-controlled" + ], + [ + "expression", + "word-concatenation", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 5305843656069465459, + 342229054274366707, + 18446744073709551615, + 18446744073709551615, + 893, + 903, + 890, + 900, + 161, + 162, + true, + "end-to-end", + "end-to-end" + ], + [ + "sentence", + "", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 7581777221421061873, + 14978907642458194092, + 18446744073709551615, + 18446744073709551615, + 0, + 157, + 0, + 154, + 0, + 29, + true, + "On the suggestion of the experts in the client team, the entire pipeline was run on the 1051 Field Evaluation Reports from the C&C Reservoirs \u00a7\u00a7\u00a7 dataset.", + "On the suggestion of the experts in the client team, the entire pipeline was run on the 1051 Field Evaluation Reports from the C&C Reservoirs \u00a7\u00a7\u00a7 dataset." + ], + [ + "sentence", + "", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 5445384521585333988, + 6675391978778171870, + 18446744073709551615, + 18446744073709551615, + 158, + 259, + 155, + 256, + 29, + 47, + true, + "The advantage of using this dataset for an accuracy benchmark is that each report includes two parts.", + "The advantage of using this dataset for an accuracy benchmark is that each report includes two parts." + ], + [ + "sentence", + "", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 9483359608935776482, + 14368716291555426974, + 18446744073709551615, + 18446744073709551615, + 260, + 350, + 257, + 347, + 47, + 64, + true, + "One part is verbose text describing the history, evolution, and composition of the fields.", + "One part is verbose text describing the history, evolution, and composition of the fields." + ], + [ + "sentence", + "", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 11210731674048684277, + 54501834201149568, + 18446744073709551615, + 18446744073709551615, + 351, + 490, + 348, + 487, + 64, + 86, + true, + "The language used is of similar complexity to standard geological publications and thus a realistic challenge for our KG creation pipeline.", + "The language used is of similar complexity to standard geological publications and thus a realistic challenge for our KG creation pipeline." + ], + [ + "sentence", + "", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 4149301380723366406, + 2966292617771160741, + 18446744073709551615, + 18446744073709551615, + 491, + 656, + 488, + 653, + 86, + 116, + true, + "The second part at the end of each report is comprised of tables which summarize the text and provide us the elements of the petroleum systems with their properties.", + "The second part at the end of each report is comprised of tables which summarize the text and provide us the elements of the petroleum systems with their properties." + ], + [ + "sentence", + "", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16090785368672247706, + 15812168781097489089, + 18446744073709551615, + 18446744073709551615, + 657, + 734, + 654, + 731, + 116, + 131, + true, + "Therefore, we ingest these reports into CCS and extract both text and tables.", + "Therefore, we ingest these reports into CCS and extract both text and tables." + ], + [ + "sentence", + "", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 3726384155577721767, + 2194562319932255346, + 18446744073709551615, + 18446744073709551615, + 735, + 923, + 732, + 920, + 131, + 165, + true, + "Then, by generating a KG only from the text and keeping the tables as ground-truth to compare answers of the KG queries against, we obtain a well-controlled, end-to-end accuracy benchmark.", + "Then, by generating a KG only from the text and keeping the tables as ground-truth to compare answers of the KG queries against, we obtain a well-controlled, end-to-end accuracy benchmark." + ], + [ + "term", + "enum-term-mark-2", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 3005704999990963973, + 4834246923337609034, + 18446744073709551615, + 18446744073709551615, + 300, + 335, + 297, + 332, + 54, + 60, + true, + "history, evolution, and composition", + "history, evolution, and composition" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 2350671729723156275, + 11828949847746348501, + 18446744073709551615, + 18446744073709551615, + 40, + 51, + 40, + 51, + 8, + 10, + true, + "client team", + "client team" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 1949498199725672567, + 5862741285729980896, + 18446744073709551615, + 18446744073709551615, + 57, + 72, + 57, + 72, + 12, + 14, + true, + "entire pipeline", + "entire pipeline" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 6071418746771287636, + 13853529843892164023, + 18446744073709551615, + 18446744073709551615, + 93, + 117, + 93, + 117, + 19, + 22, + true, + "Field Evaluation Reports", + "Field Evaluation Reports" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16836628269428371418, + 18082386177451915318, + 18446744073709551615, + 18446744073709551615, + 127, + 141, + 127, + 141, + 24, + 26, + true, + "C&C Reservoirs", + "C&C Reservoirs" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 12051423055985186664, + 5239327927281905732, + 18446744073709551615, + 18446744073709551615, + 201, + 219, + 198, + 216, + 37, + 39, + true, + "accuracy benchmark", + "accuracy benchmark" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14652437434623351008, + 17767392433408645928, + 18446744073709551615, + 18446744073709551615, + 260, + 268, + 257, + 265, + 47, + 49, + true, + "One part", + "One part" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 3514630383766601368, + 16756472043394057148, + 18446744073709551615, + 18446744073709551615, + 272, + 284, + 269, + 281, + 50, + 52, + true, + "verbose text", + "verbose text" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14203028525020218648, + 7178394498907141554, + 18446744073709551615, + 18446744073709551615, + 375, + 393, + 372, + 390, + 69, + 71, + true, + "similar complexity", + "similar complexity" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 5292062602866596698, + 13702278590678261576, + 18446744073709551615, + 18446744073709551615, + 397, + 429, + 394, + 426, + 72, + 75, + true, + "standard geological publications", + "standard geological publications" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 420313777628708468, + 3530977542162910034, + 18446744073709551615, + 18446744073709551615, + 441, + 460, + 438, + 457, + 78, + 80, + true, + "realistic challenge", + "realistic challenge" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14857819661511796263, + 17596023666875428212, + 18446744073709551615, + 18446744073709551615, + 469, + 489, + 466, + 486, + 82, + 85, + true, + "KG creation pipeline", + "KG creation pipeline" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 2169253085456814700, + 15744012511127381696, + 18446744073709551615, + 18446744073709551615, + 495, + 506, + 492, + 503, + 87, + 89, + true, + "second part", + "second part" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 1727412062449779824, + 13214726213044247883, + 18446744073709551615, + 18446744073709551615, + 616, + 633, + 613, + 630, + 110, + 112, + true, + "petroleum systems", + "petroleum systems" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8339342696999135929, + 196460050606710926, + 18446744073709551615, + 18446744073709551615, + 844, + 854, + 841, + 851, + 152, + 154, + true, + "KG queries", + "KG queries" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 3562853857806540674, + 14014477599829561907, + 18446744073709551615, + 18446744073709551615, + 893, + 922, + 890, + 919, + 161, + 164, + true, + "end-to-end accuracy benchmark", + "end-to-end accuracy benchmark" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14105815281071030459, + 8762836771533552880, + 18446744073709551615, + 18446744073709551615, + 7, + 17, + 7, + 17, + 2, + 3, + true, + "suggestion", + "suggestion" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8106397495764760483, + 5542432659059451382, + 18446744073709551615, + 18446744073709551615, + 25, + 32, + 25, + 32, + 5, + 6, + true, + "experts", + "experts" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8106396676716241904, + 3065370629985052298, + 18446744073709551615, + 18446744073709551615, + 149, + 156, + 146, + 153, + 27, + 28, + true, + "dataset", + "dataset" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 5946904284821171904, + 7296542983005317042, + 18446744073709551615, + 18446744073709551615, + 162, + 171, + 159, + 168, + 30, + 31, + true, + "advantage", + "advantage" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8106396676716241904, + 3065370629985057888, + 18446744073709551615, + 18446744073709551615, + 186, + 193, + 183, + 190, + 34, + 35, + true, + "dataset", + "dataset" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16381206521507679731, + 6908716631963244547, + 18446744073709551615, + 18446744073709551615, + 233, + 239, + 230, + 236, + 42, + 43, + true, + "report", + "report" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 329104161667984155, + 6486912841380755591, + 18446744073709551615, + 18446744073709551615, + 253, + 258, + 250, + 255, + 45, + 46, + true, + "parts", + "parts" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8106398477819293336, + 16490512145767497936, + 18446744073709551615, + 18446744073709551615, + 300, + 307, + 297, + 304, + 54, + 55, + true, + "history", + "history" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 6172089554353143931, + 17169604842729845897, + 18446744073709551615, + 18446744073709551615, + 309, + 318, + 306, + 315, + 56, + 57, + true, + "evolution", + "evolution" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14749101077007455096, + 15429073757137335434, + 18446744073709551615, + 18446744073709551615, + 324, + 335, + 321, + 332, + 59, + 60, + true, + "composition", + "composition" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16381206548906499597, + 366300192401501745, + 18446744073709551615, + 18446744073709551615, + 343, + 349, + 340, + 346, + 62, + 63, + true, + "fields", + "fields" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14639581537964510688, + 9158240592054921045, + 18446744073709551615, + 18446744073709551615, + 355, + 363, + 352, + 360, + 65, + 66, + true, + "language", + "language" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 12178341415895456504, + 3484447794018666097, + 18446744073709551615, + 18446744073709551615, + 514, + 517, + 511, + 514, + 91, + 92, + true, + "end", + "end" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16381206521507679731, + 6908716631964218344, + 18446744073709551615, + 18446744073709551615, + 526, + 532, + 523, + 529, + 94, + 95, + true, + "report", + "report" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16381206513098478539, + 624667531677655957, + 18446744073709551615, + 18446744073709551615, + 549, + 555, + 546, + 552, + 98, + 99, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 389609625631325904, + 4140854756525230310, + 18446744073709551615, + 18446744073709551615, + 576, + 580, + 573, + 577, + 102, + 103, + true, + "text", + "text" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14652262331391540004, + 10527382338693157586, + 18446744073709551615, + 18446744073709551615, + 600, + 608, + 597, + 605, + 107, + 108, + true, + "elements", + "elements" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14088628410271132453, + 4995216546732910011, + 18446744073709551615, + 18446744073709551615, + 645, + 655, + 642, + 652, + 114, + 115, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8106478449073306569, + 9696718968531674549, + 18446744073709551615, + 18446744073709551615, + 684, + 691, + 681, + 688, + 121, + 122, + true, + "reports", + "reports" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 12178341415896221596, + 3486979654156411937, + 18446744073709551615, + 18446744073709551615, + 697, + 700, + 694, + 697, + 123, + 124, + true, + "CCS", + "CCS" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 389609625631325904, + 4140854756525233018, + 18446744073709551615, + 18446744073709551615, + 718, + 722, + 715, + 719, + 127, + 128, + true, + "text", + "text" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16381206513098478539, + 624667531677603449, + 18446744073709551615, + 18446744073709551615, + 727, + 733, + 724, + 730, + 129, + 130, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 15441160910541480204, + 3458773221742021286, + 18446744073709551615, + 18446744073709551615, + 757, + 759, + 754, + 756, + 136, + 137, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 389609625631325904, + 4140854756525217952, + 18446744073709551615, + 18446744073709551615, + 774, + 778, + 771, + 775, + 140, + 141, + true, + "text", + "text" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16381206513098478539, + 624667531677607536, + 18446744073709551615, + 18446744073709551615, + 795, + 801, + 792, + 798, + 144, + 145, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 3753411203337468488, + 1771253405748692981, + 18446744073709551615, + 18446744073709551615, + 805, + 817, + 802, + 814, + 146, + 147, + true, + "ground-truth", + "ground-truth" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8106397678203715209, + 2241732074401283122, + 18446744073709551615, + 18446744073709551615, + 829, + 836, + 826, + 833, + 149, + 150, + true, + "answers", + "answers" + ], + [ + "numval", + "fval", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 389609625534532312, + 11597792617376893235, + 18446744073709551615, + 18446744073709551615, + 230, + 234, + 230, + 234, + 47, + 47, + false, + "99.7", + "99.7" + ], + [ + "numval", + "fval", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 389609625534532316, + 11597792631633065669, + 18446744073709551615, + 18446744073709551615, + 247, + 251, + 247, + 251, + 50, + 50, + false, + "99.3", + "99.3" + ], + [ + "numval", + "ival", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 17767354399704235161, + 10009347220024759156, + 18446744073709551615, + 18446744073709551615, + 10, + 11, + 10, + 11, + 3, + 4, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 389609625536078676, + 11597977105526404591, + 18446744073709551615, + 18446744073709551615, + 46, + 50, + 46, + 50, + 12, + 13, + true, + "1051", + "1051" + ], + [ + "numval", + "ival", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 12178341415896435064, + 2035594838057841276, + 18446744073709551615, + 18446744073709551615, + 114, + 117, + 114, + 117, + 23, + 24, + true, + "300", + "300" + ], + [ + "numval", + "ival", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 15441160910541486270, + 17171794145981856951, + 18446744073709551615, + 18446744073709551615, + 126, + 128, + 126, + 128, + 27, + 28, + true, + "46", + "46" + ], + [ + "numval", + "ival", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 12178341415896430817, + 2035594491968753454, + 18446744073709551615, + 18446744073709551615, + 129, + 132, + 129, + 132, + 28, + 29, + true, + "019", + "019" + ], + [ + "numval", + "ival", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 17767354399704235161, + 10009347220024810595, + 18446744073709551615, + 18446744073709551615, + 360, + 361, + 360, + 361, + 70, + 71, + true, + "1", + "1" + ], + [ + "parenthesis", + "reference", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 12178341415896395122, + 2035619839814699426, + 18446744073709551615, + 18446744073709551615, + 9, + 12, + 9, + 12, + 2, + 5, + true, + "(1)", + "(1)" + ], + [ + "parenthesis", + "round brackets", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 11766986595381952604, + 6865818640579361072, + 18446744073709551615, + 18446744073709551615, + 118, + 133, + 118, + 133, + 24, + 30, + true, + "(out of 46 019)", + "(out of 46 019)" + ], + [ + "expression", + "wtoken-concatenation", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 329104147618004591, + 11574052002680847144, + 18446744073709551615, + 18446744073709551615, + 230, + 235, + 230, + 235, + 47, + 48, + true, + "99.7%", + "99.7%" + ], + [ + "expression", + "wtoken-concatenation", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 329104147617972580, + 11574098081966624121, + 18446744073709551615, + 18446744073709551615, + 247, + 252, + 247, + 252, + 50, + 51, + true, + "99.3%", + "99.3%" + ], + [ + "sentence", + "", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 6264093697057942470, + 11535554290393356965, + 18446744073709551615, + 18446744073709551615, + 0, + 140, + 0, + 140, + 0, + 32, + true, + "For step (1) of the pipeline, we ingested all 1051 PDFs into CCS and visually annotated the document structure on 300 (out of 46 019) pages.", + "For step (1) of the pipeline, we ingested all 1051 PDFs into CCS and visually annotated the document structure on 300 (out of 46 019) pages." + ], + [ + "sentence", + "", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 5193352564005743212, + 15434324392582832918, + 18446744073709551615, + 18446744073709551615, + 141, + 290, + 141, + 290, + 32, + 57, + true, + "This yielded a page model which accurately converted all documents to JSON format with a 99.7% recall and 99.3% precision in the converted structure.", + "This yielded a page model which accurately converted all documents to JSON format with a 99.7% recall and 99.3% precision in the converted structure." + ], + [ + "sentence", + "", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 6552107417335024489, + 5237260083709821013, + 18446744073709551615, + 18446744073709551615, + 291, + 359, + 291, + 359, + 57, + 70, + true, + "These numbers are in line with those reported in our previous works.", + "These numbers are in line with those reported in our previous works." + ], + [ + "sentence", + "", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 8820460630403895169, + 17593037612846824792, + 18446744073709551615, + 18446744073709551615, + 362, + 569, + 362, + 569, + 71, + 103, + true, + "Importantly, very accurate conversion results are key to the resulting quality, since otherwise the language annotators will process incomplete data and eventually the relevance of query results will suffer.", + "Importantly, very accurate conversion results are key to the resulting quality, since otherwise the language annotators will process incomplete data and eventually the relevance of query results will suffer." + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 86072878302920231, + 17579708504691528419, + 18446744073709551615, + 18446744073709551615, + 92, + 110, + 92, + 110, + 20, + 22, + true, + "document structure", + "document structure" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 13968810273524073925, + 1064748835305933718, + 18446744073709551615, + 18446744073709551615, + 156, + 166, + 156, + 166, + 35, + 37, + true, + "page model", + "page model" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 12014656692274133724, + 13617999399581485717, + 18446744073709551615, + 18446744073709551615, + 270, + 289, + 270, + 289, + 54, + 56, + true, + "converted structure", + "converted structure" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 270007398742696754, + 7824217601819143418, + 18446744073709551615, + 18446744073709551615, + 344, + 358, + 344, + 358, + 67, + 69, + true, + "previous works", + "previous works" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 5715923267297430455, + 797212159439962662, + 18446744073709551615, + 18446744073709551615, + 380, + 407, + 380, + 407, + 74, + 77, + true, + "accurate conversion results", + "accurate conversion results" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 2136116818459714255, + 1164324304037097344, + 18446744073709551615, + 18446744073709551615, + 462, + 481, + 462, + 481, + 87, + 89, + true, + "language annotators", + "language annotators" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 2655104503757432456, + 6956823932481722640, + 18446744073709551615, + 18446744073709551615, + 495, + 510, + 495, + 510, + 91, + 93, + true, + "incomplete data", + "incomplete data" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 16172227578405589462, + 15021035824619292483, + 18446744073709551615, + 18446744073709551615, + 543, + 556, + 543, + 556, + 98, + 100, + true, + "query results", + "query results" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 389609625741100019, + 12273254624188070437, + 18446744073709551615, + 18446744073709551615, + 4, + 8, + 4, + 8, + 1, + 2, + true, + "step", + "step" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 14814125852840540191, + 1277876793308303433, + 18446744073709551615, + 18446744073709551615, + 20, + 28, + 20, + 28, + 7, + 8, + true, + "pipeline", + "pipeline" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 389609625526197745, + 11614011427733790335, + 18446744073709551615, + 18446744073709551615, + 51, + 55, + 51, + 55, + 13, + 14, + true, + "PDFs", + "PDFs" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 12178341415896221596, + 2034623361247365679, + 18446744073709551615, + 18446744073709551615, + 61, + 64, + 61, + 64, + 15, + 16, + true, + "CCS", + "CCS" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 329104161667992688, + 12056718547458487792, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 30, + 31, + true, + "pages", + "pages" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 6167933651658664291, + 10817420032196180216, + 18446744073709551615, + 18446744073709551615, + 198, + 207, + 198, + 207, + 41, + 42, + true, + "documents", + "documents" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 16381206521531485437, + 6366073862401203528, + 18446744073709551615, + 18446744073709551615, + 236, + 242, + 236, + 242, + 48, + 49, + true, + "recall", + "recall" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 6184954595655792282, + 4038487965885002947, + 18446744073709551615, + 18446744073709551615, + 253, + 262, + 253, + 262, + 51, + 52, + true, + "precision", + "precision" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 8106352625329644634, + 6390816065172833166, + 18446744073709551615, + 18446744073709551615, + 297, + 304, + 297, + 304, + 58, + 59, + true, + "numbers", + "numbers" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 389609625633316261, + 12301982887694167440, + 18446744073709551615, + 18446744073709551615, + 312, + 316, + 312, + 316, + 61, + 62, + true, + "line", + "line" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 8106477781724488761, + 10618946517388681676, + 18446744073709551615, + 18446744073709551615, + 433, + 440, + 433, + 440, + 82, + 83, + true, + "quality", + "quality" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 6165970819764784401, + 16497601699560813235, + 18446744073709551615, + 18446744073709551615, + 530, + 539, + 530, + 539, + 96, + 97, + true, + "relevance", + "relevance" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 17767354399704235162, + 9584179333675572235, + 18446744073709551615, + 18446744073709551615, + 9, + 10, + 9, + 10, + 3, + 4, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625655454200, + 9968482244883150940, + 18446744073709551615, + 18446744073709551615, + 396, + 400, + 396, + 400, + 81, + 82, + true, + "4597", + "4597" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625533565630, + 9993658546277119180, + 18446744073709551615, + 18446744073709551615, + 407, + 411, + 407, + 411, + 84, + 85, + true, + "8811", + "8811" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896307158, + 3013762769356241010, + 18446744073709551615, + 18446744073709551615, + 424, + 427, + 424, + 427, + 87, + 88, + true, + "471", + "471" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15441160910541481167, + 17325109575647682885, + 18446744073709551615, + 18446744073709551615, + 449, + 451, + 449, + 451, + 92, + 93, + true, + "64", + "64" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896424078, + 3013760380687037623, + 18446744073709551615, + 18446744073709551615, + 539, + 542, + 539, + 542, + 111, + 112, + true, + "130", + "130" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896199548, + 3013765380002608726, + 18446744073709551615, + 18446744073709551615, + 659, + 662, + 659, + 662, + 136, + 137, + true, + "679", + "679" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896436418, + 3013773685141369379, + 18446744073709551615, + 18446744073709551615, + 663, + 666, + 663, + 666, + 137, + 138, + true, + "296", + "296" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896426647, + 3013760979502921720, + 18446744073709551615, + 18446744073709551615, + 684, + 687, + 684, + 687, + 140, + 141, + true, + "116", + "116" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896199474, + 3013765420737865825, + 18446744073709551615, + 18446744073709551615, + 688, + 691, + 688, + 691, + 141, + 142, + true, + "662", + "662" + ], + [ + "parenthesis", + "reference", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896395187, + 3013785139620202598, + 18446744073709551615, + 18446744073709551615, + 8, + 11, + 8, + 11, + 2, + 5, + true, + "(2)", + "(2)" + ], + [ + "parenthesis", + "round brackets", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 8493668881347689613, + 1830285721893425217, + 18446744073709551615, + 18446744073709551615, + 245, + 257, + 245, + 257, + 42, + 47, + true, + "(eg, basins)", + "(eg, basins)" + ], + [ + "parenthesis", + "round brackets", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 329104053344678624, + 11470830176304757031, + 18446744073709551615, + 18446744073709551615, + 322, + 327, + 322, + 327, + 60, + 63, + true, + "(PSE)", + "(PSE)" + ], + [ + "parenthesis", + "round brackets", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 7354314755257879414, + 2977258323293868752, + 18446744073709551615, + 18446744073709551615, + 328, + 361, + 328, + 361, + 63, + 73, + true, + "(eg, seal, source, and reservoir)", + "(eg, seal, source, and reservoir)" + ], + [ + "parenthesis", + "round brackets", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 18038023841784269746, + 5076911958286528785, + 18446744073709551615, + 18446744073709551615, + 463, + 485, + 463, + 485, + 95, + 101, + true, + "(relevant to the PSEs)", + "(relevant to the PSEs)" + ], + [ + "sentence", + "", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 18115937449900154415, + 5770214175319405773, + 18446744073709551615, + 18446744073709551615, + 0, + 146, + 0, + 146, + 0, + 29, + true, + "In step (2), we create the Knowledge Graph by executing a DF that will generate all the entities and relationships relevant to the geology domain.", + "In step (2), we create the Knowledge Graph by executing a DF that will generate all the entities and relationships relevant to the geology domain." + ], + [ + "sentence", + "", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 371194023314892743, + 8157120343645603021, + 18446744073709551615, + 18446744073709551615, + 147, + 362, + 147, + 362, + 29, + 74, + true, + "Our language annotator models trained for geology extract geographic areas, geological structures (eg, basins), formations, ages, rocks, petroleum systems, and their elements (PSE) (eg, seal, source, and reservoir).", + "Our language annotator models trained for geology extract geographic areas, geological structures (eg, basins), formations, ages, rocks, petroleum systems, and their elements (PSE) (eg, seal, source, and reservoir)." + ], + [ + "sentence", + "", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 60590952645855625, + 9133748810633132730, + 18446744073709551615, + 18446744073709551615, + 363, + 486, + 363, + 486, + 74, + 102, + true, + "Overall, we extracted a total of 4597 PSEs, 8811 formations, 471 geological ages, and 64 rock types (relevant to the PSEs).", + "Overall, we extracted a total of 4597 PSEs, 8811 formations, 471 geological ages, and 64 rock types (relevant to the PSEs)." + ], + [ + "sentence", + "", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 14434924169405353907, + 4188615434259114638, + 18446744073709551615, + 18446744073709551615, + 487, + 630, + 487, + 630, + 102, + 131, + true, + "The full processing performed at an average rate of 130 ms per page per worker core, on a system with three worker nodes each using four cores.", + "The full processing performed at an average rate of 130 ms per page per worker core, on a system with three worker nodes each using four cores." + ], + [ + "sentence", + "", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 14947571950312043839, + 14882716054638140801, + 18446744073709551615, + 18446744073709551615, + 631, + 698, + 631, + 698, + 131, + 144, + true, + "Eventually, the KG included 679 296 edges connecting 116 662 nodes.", + "Eventually, the KG included 679 296 edges connecting 116 662 nodes." + ], + [ + "term", + "enum-term-mark-2", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 10684476924193845757, + 17883303725652713608, + 18446744073709551615, + 18446744073709551615, + 329, + 360, + 329, + 360, + 64, + 72, + true, + "eg, seal, source, and reservoir", + "eg, seal, source, and reservoir" + ], + [ + "term", + "enum-term-mark-3", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 13335488353876392384, + 7462007142456652917, + 18446744073709551615, + 18446744073709551615, + 88, + 114, + 88, + 114, + 20, + 23, + true, + "entities and relationships", + "entities and relationships" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 5877539623435777295, + 7936713845012512320, + 18446744073709551615, + 18446744073709551615, + 27, + 42, + 27, + 42, + 9, + 11, + true, + "Knowledge Graph", + "Knowledge Graph" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 17565352035883069521, + 7876518838855380190, + 18446744073709551615, + 18446744073709551615, + 131, + 145, + 131, + 145, + 26, + 28, + true, + "geology domain", + "geology domain" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 11168880613841244046, + 3738855882894420837, + 18446744073709551615, + 18446744073709551615, + 151, + 176, + 151, + 176, + 30, + 33, + true, + "language annotator models", + "language annotator models" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 5929451728855710538, + 8499314494626515114, + 18446744073709551615, + 18446744073709551615, + 189, + 221, + 189, + 221, + 35, + 39, + true, + "geology extract geographic areas", + "geology extract geographic areas" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15928367849318151150, + 16059037534864104668, + 18446744073709551615, + 18446744073709551615, + 223, + 244, + 223, + 244, + 40, + 42, + true, + "geological structures", + "geological structures" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 1727412062449779824, + 8798729288605233062, + 18446744073709551615, + 18446744073709551615, + 284, + 301, + 284, + 301, + 54, + 56, + true, + "petroleum systems", + "petroleum systems" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 9663226904190425014, + 17830790977056937497, + 18446744073709551615, + 18446744073709551615, + 428, + 443, + 428, + 443, + 88, + 90, + true, + "geological ages", + "geological ages" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15981982758112403734, + 12205103080513700946, + 18446744073709551615, + 18446744073709551615, + 452, + 462, + 452, + 462, + 93, + 95, + true, + "rock types", + "rock types" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 16555760578902726317, + 12858294934694547868, + 18446744073709551615, + 18446744073709551615, + 491, + 506, + 491, + 506, + 103, + 105, + true, + "full processing", + "full processing" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 4795376748618017018, + 1519274522679485529, + 18446744073709551615, + 18446744073709551615, + 523, + 535, + 523, + 535, + 108, + 110, + true, + "average rate", + "average rate" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 9601975787076761252, + 1652016010664406025, + 18446744073709551615, + 18446744073709551615, + 559, + 570, + 559, + 570, + 116, + 118, + true, + "worker core", + "worker core" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12400507963759742880, + 15180031475753541242, + 18446744073709551615, + 18446744073709551615, + 595, + 607, + 595, + 607, + 124, + 126, + true, + "worker nodes", + "worker nodes" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625741100019, + 9977938209978319206, + 18446744073709551615, + 18446744073709551615, + 3, + 7, + 3, + 7, + 1, + 2, + true, + "step", + "step" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15441160910541480770, + 17325109556329758529, + 18446744073709551615, + 18446744073709551615, + 58, + 60, + 58, + 60, + 14, + 15, + true, + "DF", + "DF" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 14652256560445338257, + 1416445963682787535, + 18446744073709551615, + 18446744073709551615, + 88, + 96, + 88, + 96, + 20, + 21, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 8279380567349713241, + 12542603645636875495, + 18446744073709551615, + 18446744073709551615, + 101, + 114, + 101, + 114, + 22, + 23, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15441160910541487324, + 17325105369339186228, + 18446744073709551615, + 18446744073709551615, + 246, + 248, + 246, + 248, + 43, + 44, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 16381206570221872041, + 7952037600581639736, + 18446744073709551615, + 18446744073709551615, + 250, + 256, + 250, + 256, + 45, + 46, + true, + "basins", + "basins" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 16064217528453934834, + 8232308350310476871, + 18446744073709551615, + 18446744073709551615, + 259, + 269, + 259, + 269, + 48, + 49, + true, + "formations", + "formations" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625700777197, + 9978557737437573440, + 18446744073709551615, + 18446744073709551615, + 271, + 275, + 271, + 275, + 50, + 51, + true, + "ages", + "ages" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 329104161637315394, + 10899605415371114622, + 18446744073709551615, + 18446744073709551615, + 277, + 282, + 277, + 282, + 52, + 53, + true, + "rocks", + "rocks" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 14652262331391540004, + 4323520615039764616, + 18446744073709551615, + 18446744073709551615, + 313, + 321, + 313, + 321, + 59, + 60, + true, + "elements", + "elements" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896290846, + 3013759111972227093, + 18446744073709551615, + 18446744073709551615, + 323, + 326, + 323, + 326, + 61, + 62, + true, + "PSE", + "PSE" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15441160910541487324, + 17325105369339181085, + 18446744073709551615, + 18446744073709551615, + 329, + 331, + 329, + 331, + 64, + 65, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625741054314, + 9977936215248775844, + 18446744073709551615, + 18446744073709551615, + 333, + 337, + 333, + 337, + 66, + 67, + true, + "seal", + "seal" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 16381206579112188113, + 8408987821758031825, + 18446744073709551615, + 18446744073709551615, + 339, + 345, + 339, + 345, + 68, + 69, + true, + "source", + "source" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 6168331670275357579, + 3530228493066473595, + 18446744073709551615, + 18446744073709551615, + 351, + 360, + 351, + 360, + 71, + 72, + true, + "reservoir", + "reservoir" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 329104159242619871, + 10528040562907345525, + 18446744073709551615, + 18446744073709551615, + 387, + 392, + 387, + 392, + 79, + 80, + true, + "total", + "total" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625526136278, + 9992496440036513980, + 18446744073709551615, + 18446744073709551615, + 401, + 405, + 401, + 405, + 82, + 83, + true, + "PSEs", + "PSEs" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 16064217528453934834, + 8232308350310450335, + 18446744073709551615, + 18446744073709551615, + 412, + 422, + 412, + 422, + 85, + 86, + true, + "formations", + "formations" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625526136278, + 9992496440035043113, + 18446744073709551615, + 18446744073709551615, + 480, + 484, + 480, + 484, + 99, + 100, + true, + "PSEs", + "PSEs" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15441160910541486786, + 17325105373990583372, + 18446744073709551615, + 18446744073709551615, + 543, + 545, + 543, + 545, + 112, + 113, + true, + "ms", + "ms" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625632301461, + 9968221116647419565, + 18446744073709551615, + 18446744073709551615, + 550, + 554, + 550, + 554, + 114, + 115, + true, + "page", + "page" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 16381206550376895780, + 7682460348962189650, + 18446744073709551615, + 18446744073709551615, + 577, + 583, + 577, + 583, + 121, + 122, + true, + "system", + "system" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 329104161555640697, + 10904197285886473134, + 18446744073709551615, + 18446744073709551615, + 624, + 629, + 624, + 629, + 129, + 130, + true, + "cores", + "cores" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15441160910541480204, + 17325109606585833008, + 18446744073709551615, + 18446744073709551615, + 647, + 649, + 647, + 649, + 134, + 135, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 329104162186494203, + 10532267587085008644, + 18446744073709551615, + 18446744073709551615, + 667, + 672, + 667, + 672, + 138, + 139, + true, + "edges", + "edges" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 329104161758737773, + 10900831603847294449, + 18446744073709551615, + 18446744073709551615, + 692, + 697, + 692, + 697, + 142, + 143, + true, + "nodes", + "nodes" + ], + [ + "numval", + "ival", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 17767354399704235163, + 96563067760012599, + 18446744073709551615, + 18446744073709551615, + 9, + 10, + 9, + 10, + 3, + 4, + true, + "3", + "3" + ], + [ + "parenthesis", + "reference", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 12178341415896394992, + 6455622738827563926, + 18446744073709551615, + 18446744073709551615, + 8, + 11, + 8, + 11, + 2, + 5, + true, + "(3)", + "(3)" + ], + [ + "sentence", + "", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 997268821951707686, + 14496834891993701721, + 18446744073709551615, + 18446744073709551615, + 0, + 79, + 0, + 79, + 0, + 17, + true, + "In step (3), we query the Knowledge Graph using a tailored evaluation workflow.", + "In step (3), we query the Knowledge Graph using a tailored evaluation workflow." + ], + [ + "term", + "enum-term-mark-2", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 6764907971677770258, + 6963840925279480990, + 18446744073709551615, + 18446744073709551615, + 195, + 218, + 195, + 218, + 37, + 42, + true, + "age, formation and rock", + "age, formation and rock" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 5877539623435777295, + 1548140198302342719, + 18446744073709551615, + 18446744073709551615, + 26, + 41, + 26, + 41, + 9, + 11, + true, + "Knowledge Graph", + "Knowledge Graph" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 11745701326970380985, + 14524400766422166580, + 18446744073709551615, + 18446744073709551615, + 59, + 78, + 59, + 78, + 14, + 16, + true, + "evaluation workflow", + "evaluation workflow" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 5877539623435777295, + 1548140198302432862, + 18446744073709551615, + 18446744073709551615, + 159, + 174, + 159, + 174, + 30, + 32, + true, + "Knowledge Graph", + "Knowledge Graph" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 389609625741100019, + 9811489708275752315, + 18446744073709551615, + 18446744073709551615, + 3, + 7, + 3, + 7, + 1, + 2, + true, + "step", + "step" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 14638857990842534974, + 11824267461300128868, + 18446744073709551615, + 18446744073709551615, + 85, + 93, + 85, + 93, + 18, + 19, + true, + "workflow", + "workflow" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 389609625526136278, + 10206463490393311472, + 18446744073709551615, + 18446744073709551615, + 116, + 120, + 116, + 120, + 23, + 24, + true, + "PSEs", + "PSEs" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 14088628410271132453, + 16581151048247701778, + 18446744073709551615, + 18446744073709551615, + 141, + 151, + 141, + 151, + 27, + 28, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 8106397496085150773, + 2627697033995097526, + 18446744073709551615, + 18446744073709551615, + 180, + 187, + 180, + 187, + 34, + 35, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 12178341415895571749, + 6455357909209920985, + 18446744073709551615, + 18446744073709551615, + 195, + 198, + 195, + 198, + 37, + 38, + true, + "age", + "age" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 6187533480885532545, + 3663640608662331706, + 18446744073709551615, + 18446744073709551615, + 200, + 209, + 200, + 209, + 39, + 40, + true, + "formation", + "formation" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 389609625632802170, + 10177362053775881094, + 18446744073709551615, + 18446744073709551615, + 214, + 218, + 214, + 218, + 41, + 42, + true, + "rock", + "rock" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 389609625548777262, + 8826555294676663632, + 18446744073709551615, + 18446744073709551615, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 389609625548777251, + 8826555296349648778, + 18446744073709551615, + 18446744073709551615, + 119, + 123, + 119, + 123, + 14, + 14, + false, + "2023", + "2023" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 8104408072666212335, + 13552219042525319352, + 18446744073709551615, + 18446744073709551615, + 71, + 78, + 71, + 78, + 8, + 8, + false, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 389609625548868096, + 8826558551385119058, + 18446744073709551615, + 18446744073709551615, + 82, + 86, + 82, + 86, + 8, + 9, + false, + "2.20", + "2.20" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 14654386914267794441, + 12796143052106760105, + 18446744073709551615, + 18446744073709551615, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 17767354399704235162, + 7753390158484899261, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 15441160910541481791, + 3518619573290839093, + 18446744073709551615, + 18446744073709551615, + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 15441160910541481543, + 3518617976696906498, + 18446744073709551615, + 18446744073709551615, + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 8536069645534292969, + 16063604623463467342, + 18446744073709551615, + 18446744073709551615, + 35, + 87, + 35, + 87, + 8, + 10, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 594099663775968682, + 14698211805947073928, + 18446744073709551615, + 18446744073709551615, + 156, + 208, + 156, + 208, + 22, + 37, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 1697220653346092555, + 8458710314769009562, + 18446744073709551615, + 18446744073709551615, + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 12213187056216195918, + 14309760985361468471, + 18446744073709551615, + 18446744073709551615, + 155, + 209, + 155, + 209, + 21, + 38, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 3856967589249015473, + 3576147774941915841, + 18446744073709551615, + 18446744073709551615, + 35, + 86, + 35, + 86, + 8, + 9, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "sentence", + "", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 10933383461306782608, + 10178418358179275356, + 18446744073709551615, + 18446744073709551615, + 19, + 125, + 19, + 125, + 6, + 16, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 12466457873768409517, + 3430070082404029638, + 18446744073709551615, + 18446744073709551615, + 88, + 108, + 88, + 108, + 10, + 13, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 12466457873768409517, + 3430070082403846184, + 18446744073709551615, + 18446744073709551615, + 213, + 233, + 213, + 233, + 39, + 42, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 10086796047802705645, + 11637015082128438412, + 18446744073709551615, + 18446744073709551615, + 252, + 263, + 252, + 263, + 47, + 49, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 6687370681685741393, + 17939310132506951168, + 18446744073709551615, + 18446744073709551615, + 284, + 319, + 284, + 319, + 53, + 57, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 329104161846385964, + 16017248647642597247, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 18, + 19, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 969969168017005656, + 2961182532179915323, + 18446744073709551615, + 18446744073709551615, + 144, + 154, + 144, + 154, + 20, + 21, + true, + "Conditions", + "Conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 329104161825278214, + 16021621362593374209, + 18446744073709551615, + 18446744073709551615, + 238, + 243, + 238, + 243, + 43, + 44, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 12178341415895516060, + 12061595171928625555, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 45, + 46, + true, + "use", + "use" + ], + [ + "numval", + "ival", + 1080447728722590402, + "TEXT", + "#/texts/138", + 1.0, + 15441160910541481977, + 12490742773547210041, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "13", + "13" + ], + [ + "numval", + "ival", + 4361549257087816853, + "TEXT", + "#/texts/139", + 1.0, + 15441160910541481979, + 9983816787922721487, + 18446744073709551615, + 18446744073709551615, + 3, + 5, + 3, + 5, + 1, + 2, + true, + "15", + "15" + ], + [ + "numval", + "ival", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 17767354399704235159, + 15458436803011088578, + 18446744073709551615, + 18446744073709551615, + 23, + 24, + 23, + 24, + 4, + 5, + true, + "7", + "7" + ], + [ + "sentence", + "", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 1519458104665017357, + 10083376948813375189, + 18446744073709551615, + 18446744073709551615, + 13, + 64, + 13, + 64, + 2, + 14, + true, + "In Figure 7, we visualize the DAG of this workflow.", + "In Figure 7, we visualize the DAG of this workflow." + ], + [ + "sentence", + "", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 875194381256283721, + 17161126138395688234, + 18446744073709551615, + 18446744073709551615, + 65, + 191, + 65, + 191, + 14, + 35, + true, + "The final node weights are accumulated throughout the branches on the workflow and represent the relevance score of each node.", + "The final node weights are accumulated throughout the branches on the workflow and represent the relevance score of each node." + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 2709262247996496944, + 7256068078148418519, + 18446744073709551615, + 18446744073709551615, + 69, + 87, + 69, + 87, + 15, + 18, + true, + "final node weights", + "final node weights" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 14475039354487345031, + 12703529367274285661, + 18446744073709551615, + 18446744073709551615, + 162, + 177, + 162, + 177, + 29, + 31, + true, + "relevance score", + "relevance score" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 14749101077007455096, + 13375337667618460743, + 18446744073709551615, + 18446744073709551615, + 0, + 11, + 0, + 11, + 0, + 1, + true, + "composition", + "composition" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 16381206514091025767, + 977586802525207516, + 18446744073709551615, + 18446744073709551615, + 16, + 22, + 16, + 22, + 3, + 4, + true, + "Figure", + "Figure" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 12178341415896112046, + 5461004591321450263, + 18446744073709551615, + 18446744073709551615, + 43, + 46, + 43, + 46, + 9, + 10, + true, + "DAG", + "DAG" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 14638857990842534974, + 11234311347164808960, + 18446744073709551615, + 18446744073709551615, + 55, + 63, + 55, + 63, + 12, + 13, + true, + "workflow", + "workflow" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 14652253554560347064, + 10490266172391457967, + 18446744073709551615, + 18446744073709551615, + 119, + 127, + 119, + 127, + 22, + 23, + true, + "branches", + "branches" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 14638857990842534974, + 11234311347164820372, + 18446744073709551615, + 18446744073709551615, + 135, + 143, + 135, + 143, + 25, + 26, + true, + "workflow", + "workflow" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 389609625621164460, + 10220904674049331646, + 18446744073709551615, + 18446744073709551615, + 186, + 190, + 186, + 190, + 33, + 34, + true, + "node", + "node" + ], + [ + "numval", + "fval", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14652250303396477617, + 6263954298368962822, + 18446744073709551615, + 18446744073709551615, + 457, + 465, + 457, + 465, + 87, + 88, + true, + "0.75-0.9", + "0.75-0.9" + ], + [ + "numval", + "fval", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 389609625535995626, + 11162238664629223042, + 18446744073709551615, + 18446744073709551615, + 631, + 635, + 629, + 633, + 124, + 125, + true, + "0.97", + "0.97" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 17767354399704235161, + 17845175019612967856, + 18446744073709551615, + 18446744073709551615, + 264, + 265, + 264, + 265, + 47, + 48, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541482672, + 15292900460193668121, + 18446744073709551615, + 18446744073709551615, + 282, + 284, + 282, + 284, + 52, + 53, + false, + "-1", + "-1" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541482673, + 15292900459317583926, + 18446744073709551615, + 18446744073709551615, + 289, + 291, + 289, + 291, + 54, + 55, + false, + "-2", + "-2" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541482674, + 15292900461018240016, + 18446744073709551615, + 18446744073709551615, + 296, + 298, + 296, + 298, + 56, + 57, + false, + "-3", + "-3" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541482676, + 15292900461174373895, + 18446744073709551615, + 18446744073709551615, + 307, + 309, + 307, + 309, + 59, + 60, + false, + "-5", + "-5" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541482672, + 15292900460193644573, + 18446744073709551615, + 18446744073709551615, + 426, + 428, + 426, + 428, + 80, + 81, + false, + "-1", + "-1" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 17767354399704235163, + 17845175019597634812, + 18446744073709551615, + 18446744073709551615, + 484, + 485, + 484, + 485, + 92, + 93, + true, + "3", + "3" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 17767354399704235156, + 17845175019331480896, + 18446744073709551615, + 18446744073709551615, + 489, + 490, + 489, + 490, + 94, + 95, + true, + "4", + "4" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541482676, + 15292900461174286862, + 18446744073709551615, + 18446744073709551615, + 601, + 603, + 601, + 603, + 117, + 118, + false, + "-5", + "-5" + ], + [ + "parenthesis", + "round brackets", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 1151740806635216288, + 9949844321651855821, + 18446744073709551615, + 18446744073709551615, + 555, + 566, + 555, + 566, + 107, + 110, + true, + "(precision)", + "(precision)" + ], + [ + "parenthesis", + "round brackets", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14824366717978656546, + 12784736972371149059, + 18446744073709551615, + 18446744073709551615, + 626, + 636, + 626, + 634, + 122, + 126, + true, + "(\u2265 0.97)", + "(\u2265 0.97)" + ], + [ + "parenthesis", + "round brackets", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14654064136955905430, + 8140184277037689536, + 18446744073709551615, + 18446744073709551615, + 730, + 738, + 728, + 736, + 143, + 146, + true, + "(recall)", + "(recall)" + ], + [ + "expression", + "word-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678051, + 18090963244175576847, + 18446744073709551615, + 18446744073709551615, + 110, + 115, + 110, + 115, + 18, + 19, + true, + "top-k", + "top-k" + ], + [ + "expression", + "wtoken-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678245, + 18090963127902817900, + 18446744073709551615, + 18446744073709551615, + 279, + 284, + 279, + 284, + 52, + 53, + true, + "top-1", + "top-1" + ], + [ + "expression", + "wtoken-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678244, + 18090963127852540080, + 18446744073709551615, + 18446744073709551615, + 286, + 291, + 286, + 291, + 54, + 55, + true, + "top-2", + "top-2" + ], + [ + "expression", + "wtoken-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678251, + 18090963018010454873, + 18446744073709551615, + 18446744073709551615, + 293, + 298, + 293, + 298, + 56, + 57, + true, + "top-3", + "top-3" + ], + [ + "expression", + "wtoken-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678249, + 18090963122544700654, + 18446744073709551615, + 18446744073709551615, + 304, + 309, + 304, + 309, + 59, + 60, + true, + "top-5", + "top-5" + ], + [ + "expression", + "wtoken-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678245, + 18090963127902815000, + 18446744073709551615, + 18446744073709551615, + 423, + 428, + 423, + 428, + 80, + 81, + true, + "top-1", + "top-1" + ], + [ + "expression", + "wtoken-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678249, + 18090963122544576128, + 18446744073709551615, + 18446744073709551615, + 598, + 603, + 598, + 603, + 117, + 118, + true, + "top-5", + "top-5" + ], + [ + "sentence", + "", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 4387565148892077336, + 10851632580506485696, + 18446744073709551615, + 18446744073709551615, + 0, + 125, + 0, + 125, + 0, + 21, + true, + "To evaluate the correctness of the predicted PSE properties, we follow the standard practice of reporting the top-k accuracy.", + "To evaluate the correctness of the predicted PSE properties, we follow the standard practice of reporting the top-k accuracy." + ], + [ + "sentence", + "", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 16884271044112956615, + 14180978077955603249, + 18446744073709551615, + 18446744073709551615, + 126, + 254, + 126, + 254, + 21, + 45, + true, + "This is computed as the percentage in which any of the k highest ranked answers matches the expected answer, over all documents.", + "This is computed as the percentage in which any of the k highest ranked answers matches the expected answer, over all documents." + ], + [ + "sentence", + "", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 7112523875671286026, + 15753540610980998668, + 18446744073709551615, + 18446744073709551615, + 255, + 371, + 255, + 371, + 45, + 70, + true, + "In Table 1, we show the top-1, top-2, top-3, and top-5 accuracy for all properties of each petroleum system element.", + "In Table 1, we show the top-1, top-2, top-3, and top-5 accuracy for all properties of each petroleum system element." + ], + [ + "sentence", + "", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 9700386374170371940, + 17921036849237798431, + 18446744073709551615, + 18446744073709551615, + 372, + 411, + 372, + 411, + 70, + 77, + true, + "One can make two distinct observations.", + "One can make two distinct observations." + ], + [ + "sentence", + "", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 1441259272582849102, + 16488672922824239869, + 18446744073709551615, + 18446744073709551615, + 412, + 567, + 412, + 567, + 77, + 111, + true, + "First, the top-1 numbers are in the range of 0.75-0.9, meaning that for 3 in 4 cases, the most relevant result predicted by the KG was correct (precision).", + "First, the top-1 numbers are in the range of 0.75-0.9, meaning that for 3 in 4 cases, the most relevant result predicted by the KG was correct (precision)." + ], + [ + "sentence", + "", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 6488415355581121222, + 4997443328230932797, + 18446744073709551615, + 18446744073709551615, + 568, + 739, + 568, + 737, + 111, + 147, + true, + "Secondly, we observe that the top-5 numbers are very high (\u2265 0.97), showing that the system was able detect and aggregate most of the PSEs and their properties (recall).", + "Secondly, we observe that the top-5 numbers are very high (\u2265 0.97), showing that the system was able detect and aggregate most of the PSEs and their properties (recall)." + ], + [ + "sentence", + "", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 9883022576161827356, + 11048410564078668147, + 18446744073709551615, + 18446744073709551615, + 740, + 834, + 738, + 832, + 147, + 164, + true, + "Thus, the recall of the language annotators in the KG creation pipeline was very satisfactory.", + "Thus, the recall of the language annotators in the KG creation pipeline was very satisfactory." + ], + [ + "term", + "enum-term-mark-1", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 9070903762842328562, + 13113852696122446084, + 18446744073709551615, + 18446744073709551615, + 293, + 318, + 293, + 318, + 56, + 61, + true, + "top-3, and top-5 accuracy", + "top-3, and top-5 accuracy" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 5371881787938650225, + 1603959675351734932, + 18446744073709551615, + 18446744073709551615, + 45, + 59, + 45, + 59, + 7, + 9, + true, + "PSE properties", + "PSE properties" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 388046855546136742, + 9120352735722685290, + 18446744073709551615, + 18446744073709551615, + 75, + 92, + 75, + 92, + 13, + 15, + true, + "standard practice", + "standard practice" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 1680528496688141818, + 17588474493437002500, + 18446744073709551615, + 18446744073709551615, + 110, + 124, + 110, + 124, + 18, + 20, + true, + "top-k accuracy", + "top-k accuracy" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 13028318207594970709, + 2990468382393660774, + 18446744073709551615, + 18446744073709551615, + 304, + 318, + 304, + 318, + 59, + 61, + true, + "top-5 accuracy", + "top-5 accuracy" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15085703780898398044, + 6927514159527930982, + 18446744073709551615, + 18446744073709551615, + 346, + 370, + 346, + 370, + 66, + 69, + true, + "petroleum system element", + "petroleum system element" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 9212537002518769220, + 3673646388873906846, + 18446744073709551615, + 18446744073709551615, + 389, + 410, + 389, + 410, + 74, + 76, + true, + "distinct observations", + "distinct observations" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 2903041817043822353, + 5491223959490431705, + 18446744073709551615, + 18446744073709551615, + 423, + 436, + 423, + 436, + 80, + 82, + true, + "top-1 numbers", + "top-1 numbers" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14476305150084091928, + 4901476961065610348, + 18446744073709551615, + 18446744073709551615, + 507, + 522, + 507, + 522, + 99, + 101, + true, + "relevant result", + "relevant result" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 2912823700701416523, + 18408240535293277046, + 18446744073709551615, + 18446744073709551615, + 598, + 611, + 598, + 611, + 117, + 119, + true, + "top-5 numbers", + "top-5 numbers" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 417457895991466544, + 13846422900098246222, + 18446744073709551615, + 18446744073709551615, + 666, + 677, + 664, + 675, + 132, + 134, + true, + "able detect", + "able detect" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 2136116818459714255, + 8001485684986399574, + 18446744073709551615, + 18446744073709551615, + 764, + 783, + 762, + 781, + 153, + 155, + true, + "language annotators", + "language annotators" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14857819661511796263, + 10963692960997527590, + 18446744073709551615, + 18446744073709551615, + 791, + 811, + 789, + 809, + 157, + 160, + true, + "KG creation pipeline", + "KG creation pipeline" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 2993400436143573854, + 495563557915533550, + 18446744073709551615, + 18446744073709551615, + 16, + 27, + 16, + 27, + 3, + 4, + true, + "correctness", + "correctness" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 13928971162448274670, + 12605656053258808723, + 18446744073709551615, + 18446744073709551615, + 150, + 160, + 150, + 160, + 26, + 27, + true, + "percentage", + "percentage" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 8106397678203715209, + 10309807798569118015, + 18446744073709551615, + 18446744073709551615, + 198, + 205, + 198, + 205, + 35, + 36, + true, + "answers", + "answers" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 16381206574646599727, + 10453069960511565431, + 18446744073709551615, + 18446744073709551615, + 227, + 233, + 227, + 233, + 39, + 40, + true, + "answer", + "answer" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 6167933651658664291, + 18428108737827032217, + 18446744073709551615, + 18446744073709551615, + 244, + 253, + 244, + 253, + 43, + 44, + true, + "documents", + "documents" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678245, + 18090963127902817900, + 18446744073709551615, + 18446744073709551615, + 279, + 284, + 279, + 284, + 52, + 53, + true, + "top-1", + "top-1" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678244, + 18090963127852540080, + 18446744073709551615, + 18446744073709551615, + 286, + 291, + 286, + 291, + 54, + 55, + true, + "top-2", + "top-2" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14088628410271132453, + 1572692648601073579, + 18446744073709551615, + 18446744073709551615, + 327, + 337, + 327, + 337, + 63, + 64, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104161634702433, + 2234794017392814741, + 18446744073709551615, + 18446744073709551615, + 448, + 453, + 448, + 453, + 85, + 86, + true, + "range", + "range" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104161511786824, + 2268234006473983274, + 18446744073709551615, + 18446744073709551615, + 491, + 496, + 491, + 496, + 95, + 96, + true, + "cases", + "cases" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541480204, + 15292900207337913499, + 18446744073709551615, + 18446744073709551615, + 540, + 542, + 540, + 542, + 104, + 105, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 6184954595655792282, + 10326926050568403160, + 18446744073709551615, + 18446744073709551615, + 556, + 565, + 556, + 565, + 108, + 109, + true, + "precision", + "precision" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 16381206550376895780, + 15564683093048068331, + 18446744073709551615, + 18446744073709551615, + 655, + 661, + 653, + 659, + 130, + 131, + true, + "system", + "system" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 389609625526136278, + 11163534876152642859, + 18446744073709551615, + 18446744073709551615, + 704, + 708, + 702, + 706, + 139, + 140, + true, + "PSEs", + "PSEs" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14088628410271132453, + 1572692648597504797, + 18446744073709551615, + 18446744073709551615, + 719, + 729, + 717, + 727, + 142, + 143, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 16381206521531485437, + 2410946269934605693, + 18446744073709551615, + 18446744073709551615, + 731, + 737, + 729, + 735, + 144, + 145, + true, + "recall", + "recall" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 16381206521531485437, + 2410946269934606663, + 18446744073709551615, + 18446744073709551615, + 750, + 756, + 748, + 754, + 150, + 151, + true, + "recall", + "recall" + ], + [ + "numval", + "ival", + 16446129547721407877, + "TEXT", + "#/texts/142", + 1.0, + 17767354399704235158, + 11362596522813034737, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "6", + "6" + ], + [ + "numval", + "ival", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 17767354399704235161, + 16606870843966802051, + 18446744073709551615, + 18446744073709551615, + 521, + 522, + 521, + 522, + 80, + 81, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 17767354399704235162, + 16606870838110795262, + 18446744073709551615, + 18446744073709551615, + 579, + 580, + 579, + 580, + 93, + 94, + true, + "2", + "2" + ], + [ + "parenthesis", + "reference", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 12178341415896395122, + 5534148918627002152, + 18446744073709551615, + 18446744073709551615, + 520, + 523, + 520, + 523, + 79, + 82, + true, + "(1)", + "(1)" + ], + [ + "parenthesis", + "reference", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 12178341415896395187, + 5534148917561236863, + 18446744073709551615, + 18446744073709551615, + 578, + 581, + 578, + 581, + 92, + 95, + true, + "(2)", + "(2)" + ], + [ + "expression", + "word-concatenation", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 1053045968880146688, + 12583633977815123246, + 18446744073709551615, + 18446744073709551615, + 427, + 438, + 427, + 438, + 67, + 68, + true, + "graph-scale", + "graph-scale" + ], + [ + "sentence", + "", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 16415944396843588595, + 17709466530502406570, + 18446744073709551615, + 18446744073709551615, + 0, + 221, + 0, + 221, + 0, + 35, + true, + "With the introduction of the CPS platform, we demonstrate substantial benefit for domain experts and data scientists in exercising deep exploration of published knowledge in a fully integrated, yet modular cloud solution.", + "With the introduction of the CPS platform, we demonstrate substantial benefit for domain experts and data scientists in exercising deep exploration of published knowledge in a fully integrated, yet modular cloud solution." + ], + [ + "sentence", + "", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 337455657200974030, + 8226213723352014521, + 18446744073709551615, + 18446744073709551615, + 222, + 449, + 222, + 449, + 35, + 70, + true, + "CPS seamlessly connects to the CSS, complementing it with a highly scalable, automated pipeline to build consistent domain knowledge models and an intuitive, powerful approach to explorational queries and graph-scale analytics.", + "CPS seamlessly connects to the CSS, complementing it with a highly scalable, automated pipeline to build consistent domain knowledge models and an intuitive, powerful approach to explorational queries and graph-scale analytics." + ], + [ + "term", + "enum-term-mark-2", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 16305600260127055870, + 8195345917782449202, + 18446744073709551615, + 18446744073709551615, + 554, + 576, + 554, + 576, + 88, + 91, + true, + "curation or annotation", + "curation or annotation" + ], + [ + "term", + "enum-term-mark-2", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 7917122769361737138, + 17010355979462668621, + 18446744073709551615, + 18446744073709551615, + 641, + 672, + 641, + 672, + 105, + 110, + true, + "ingestion, processing and query", + "ingestion, processing and query" + ], + [ + "term", + "enum-term-mark-3", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 15152392191731287429, + 5208425395920976424, + 18446744073709551615, + 18446744073709551615, + 89, + 116, + 89, + 116, + 14, + 18, + true, + "experts and data scientists", + "experts and data scientists" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 12779036928191531604, + 9594818965137662456, + 18446744073709551615, + 18446744073709551615, + 29, + 41, + 29, + 41, + 5, + 7, + true, + "CPS platform", + "CPS platform" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 13236236219972254996, + 10365732013549486229, + 18446744073709551615, + 18446744073709551615, + 58, + 77, + 58, + 77, + 10, + 12, + true, + "substantial benefit", + "substantial benefit" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 4156286750856532243, + 5197485465821099672, + 18446744073709551615, + 18446744073709551615, + 82, + 96, + 82, + 96, + 13, + 15, + true, + "domain experts", + "domain experts" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 6736565927644210758, + 5282440466057373776, + 18446744073709551615, + 18446744073709551615, + 101, + 116, + 101, + 116, + 16, + 18, + true, + "data scientists", + "data scientists" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 8856108142217449705, + 18116497161907130755, + 18446744073709551615, + 18446744073709551615, + 131, + 147, + 131, + 147, + 20, + 22, + true, + "deep exploration", + "deep exploration" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 18397948429710667913, + 14377408563795531173, + 18446744073709551615, + 18446744073709551615, + 198, + 220, + 198, + 220, + 31, + 34, + true, + "modular cloud solution", + "modular cloud solution" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 2924827465470055507, + 17840272590051434356, + 18446744073709551615, + 18446744073709551615, + 299, + 317, + 299, + 317, + 49, + 51, + true, + "automated pipeline", + "automated pipeline" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 5640550190838251117, + 9242927206023493986, + 18446744073709551615, + 18446744073709551615, + 327, + 361, + 327, + 361, + 53, + 57, + true, + "consistent domain knowledge models", + "consistent domain knowledge models" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 16567127297746127145, + 14122132328015379420, + 18446744073709551615, + 18446744073709551615, + 380, + 397, + 380, + 397, + 61, + 63, + true, + "powerful approach", + "powerful approach" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 13481069231801630849, + 3957307499739387254, + 18446744073709551615, + 18446744073709551615, + 401, + 422, + 401, + 422, + 64, + 66, + true, + "explorational queries", + "explorational queries" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 970780854150342161, + 7855464794916656195, + 18446744073709551615, + 18446744073709551615, + 427, + 448, + 427, + 448, + 67, + 69, + true, + "graph-scale analytics", + "graph-scale analytics" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 14155725816972569762, + 3313864890251993459, + 18446744073709551615, + 18446744073709551615, + 485, + 518, + 485, + 518, + 75, + 78, + true, + "fundamental design considerations", + "fundamental design considerations" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 3841439629787535208, + 4030642280425566118, + 18446744073709551615, + 18446744073709551615, + 542, + 562, + 542, + 562, + 86, + 89, + true, + "manual data curation", + "manual data curation" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 4430075463994275386, + 1531064733678072666, + 18446744073709551615, + 18446744073709551615, + 603, + 625, + 603, + 625, + 100, + 102, + true, + "efficient architecture", + "efficient architecture" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 15144348319402645349, + 2995094926762721744, + 18446744073709551615, + 18446744073709551615, + 667, + 682, + 667, + 682, + 109, + 111, + true, + "query workloads", + "query workloads" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 597480423109041411, + 10291295016493852070, + 18446744073709551615, + 18446744073709551615, + 9, + 21, + 9, + 21, + 2, + 3, + true, + "introduction", + "introduction" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 6184122545182835014, + 15596336968217381296, + 18446744073709551615, + 18446744073709551615, + 161, + 170, + 161, + 170, + 24, + 25, + true, + "knowledge", + "knowledge" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 12178341415896222428, + 5534090683430116629, + 18446744073709551615, + 18446744073709551615, + 222, + 225, + 222, + 225, + 35, + 36, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 12178341415896222616, + 5534090651231733521, + 18446744073709551615, + 18446744073709551615, + 253, + 256, + 253, + 256, + 40, + 41, + true, + "CSS", + "CSS" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 15359807916847495711, + 16353024261898901635, + 18446744073709551615, + 18446744073709551615, + 566, + 576, + 566, + 576, + 90, + 91, + true, + "annotation", + "annotation" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 6182654480499682241, + 10279250109304112765, + 18446744073709551615, + 18446744073709551615, + 641, + 650, + 641, + 650, + 105, + 106, + true, + "ingestion", + "ingestion" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 14088627147213114570, + 14322058830662955649, + 18446744073709551615, + 18446744073709551615, + 652, + 662, + 652, + 662, + 107, + 108, + true, + "processing", + "processing" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 389609625548777262, + 8826555294676663632, + 18446744073709551615, + 18446744073709551615, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 389609625548777251, + 8826555296349648778, + 18446744073709551615, + 18446744073709551615, + 119, + 123, + 119, + 123, + 14, + 14, + false, + "2023", + "2023" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 8104408072666212335, + 13552219042525319352, + 18446744073709551615, + 18446744073709551615, + 71, + 78, + 71, + 78, + 8, + 8, + false, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 389609625548868096, + 8826558551385119058, + 18446744073709551615, + 18446744073709551615, + 82, + 86, + 82, + 86, + 8, + 9, + false, + "2.20", + "2.20" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 14654386914267794441, + 12796143052106760105, + 18446744073709551615, + 18446744073709551615, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 17767354399704235162, + 7753390158484899261, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 15441160910541481791, + 3518619573290839093, + 18446744073709551615, + 18446744073709551615, + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 15441160910541481543, + 3518617976696906498, + 18446744073709551615, + 18446744073709551615, + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 8536069645534292969, + 16063604623463467342, + 18446744073709551615, + 18446744073709551615, + 35, + 87, + 35, + 87, + 8, + 10, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 594099663775968682, + 14698211805947073928, + 18446744073709551615, + 18446744073709551615, + 156, + 208, + 156, + 208, + 22, + 37, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 1697220653346092555, + 8458710314769009562, + 18446744073709551615, + 18446744073709551615, + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 12213187056216195918, + 14309760985361468471, + 18446744073709551615, + 18446744073709551615, + 155, + 209, + 155, + 209, + 21, + 38, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 3856967589249015473, + 3576147774941915841, + 18446744073709551615, + 18446744073709551615, + 35, + 86, + 35, + 86, + 8, + 9, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "sentence", + "", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 10933383461306782608, + 10178418358179275356, + 18446744073709551615, + 18446744073709551615, + 19, + 125, + 19, + 125, + 6, + 16, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 12466457873768409517, + 3430070082404029638, + 18446744073709551615, + 18446744073709551615, + 88, + 108, + 88, + 108, + 10, + 13, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 12466457873768409517, + 3430070082403846184, + 18446744073709551615, + 18446744073709551615, + 213, + 233, + 213, + 233, + 39, + 42, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 10086796047802705645, + 11637015082128438412, + 18446744073709551615, + 18446744073709551615, + 252, + 263, + 252, + 263, + 47, + 49, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 6687370681685741393, + 17939310132506951168, + 18446744073709551615, + 18446744073709551615, + 284, + 319, + 284, + 319, + 53, + 57, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 329104161846385964, + 16017248647642597247, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 18, + 19, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 969969168017005656, + 2961182532179915323, + 18446744073709551615, + 18446744073709551615, + 144, + 154, + 144, + 154, + 20, + 21, + true, + "Conditions", + "Conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 329104161825278214, + 16021621362593374209, + 18446744073709551615, + 18446744073709551615, + 238, + 243, + 238, + 243, + 43, + 44, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 12178341415895516060, + 12061595171928625555, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 45, + 46, + true, + "use", + "use" + ], + [ + "numval", + "ival", + 2144926730621142072, + "TEXT", + "#/texts/145", + 1.0, + 15441160910541481978, + 18064563043183731132, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "14", + "14" + ], + [ + "numval", + "ival", + 2144926730621142072, + "TEXT", + "#/texts/145", + 1.0, + 15441160910541481979, + 18064563042796865823, + 18446744073709551615, + 18446744073709551615, + 4, + 6, + 4, + 6, + 2, + 3, + true, + "15", + "15" + ], + [ + "numval", + "ival", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 17767354399704235163, + 2699991593779864855, + 18446744073709551615, + 18446744073709551615, + 24, + 25, + 24, + 25, + 6, + 7, + true, + "3", + "3" + ], + [ + "parenthesis", + "reference", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 12178341415896394992, + 13000428721171190822, + 18446744073709551615, + 18446744073709551615, + 23, + 26, + 23, + 26, + 5, + 8, + true, + "(3)", + "(3)" + ], + [ + "sentence", + "", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 22046687723110617, + 16139393736301290316, + 18446744073709551615, + 18446744073709551615, + 27, + 119, + 27, + 119, + 8, + 22, + true, + "We expose the capabilities through an intuitively consumable API and complementary UI tools.", + "We expose the capabilities through an intuitively consumable API and complementary UI tools." + ], + [ + "term", + "single-term", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 17809956737872564404, + 98872371406955147, + 18446744073709551615, + 18446744073709551615, + 2, + 17, + 2, + 17, + 1, + 3, + true, + "single platform", + "single platform" + ], + [ + "term", + "single-term", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 2165839110633348678, + 11437800686337083589, + 18446744073709551615, + 18446744073709551615, + 77, + 91, + 77, + 91, + 15, + 17, + true, + "consumable API", + "consumable API" + ], + [ + "term", + "single-term", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 17732874984494701283, + 1188095097533333023, + 18446744073709551615, + 18446744073709551615, + 96, + 118, + 96, + 118, + 18, + 21, + true, + "complementary UI tools", + "complementary UI tools" + ], + [ + "term", + "single-term", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 11892545746641362388, + 4268311181255501740, + 18446744073709551615, + 18446744073709551615, + 41, + 53, + 41, + 53, + 11, + 12, + true, + "capabilities", + "capabilities" + ], + [ + "expression", + "word-concatenation", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 15984801488078789848, + 8130345344279106999, + 18446744073709551615, + 18446744073709551615, + 75, + 85, + 75, + 85, + 15, + 16, + true, + "real-world", + "real-world" + ], + [ + "sentence", + "", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 7919487032278953085, + 6331733154504057997, + 18446744073709551615, + 18446744073709551615, + 0, + 157, + 0, + 157, + 0, + 29, + true, + "In our oil and gas case study, we successfully verified our solution for a real-world application with the help of subject matter experts from a client team.", + "In our oil and gas case study, we successfully verified our solution for a real-world application with the help of subject matter experts from a client team." + ], + [ + "sentence", + "", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 13179783236811628643, + 10576686414353350640, + 18446744073709551615, + 18446744073709551615, + 158, + 322, + 158, + 322, + 29, + 61, + true, + "Currently, CCS and CPS are actively used in more than five client engagements, most notably in the oil and gas industry as well as in the material science industry.", + "Currently, CCS and CPS are actively used in more than five client engagements, most notably in the oil and gas industry as well as in the material science industry." + ], + [ + "term", + "enum-term-mark-2", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 3124601704379826877, + 4362564571906744374, + 18446744073709551615, + 18446744073709551615, + 7, + 29, + 7, + 29, + 2, + 7, + true, + "oil and gas case study", + "oil and gas case study" + ], + [ + "term", + "enum-term-mark-2", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 848781837929279741, + 13179566797715975811, + 18446744073709551615, + 18446744073709551615, + 257, + 277, + 257, + 277, + 48, + 52, + true, + "oil and gas industry", + "oil and gas industry" + ], + [ + "term", + "enum-term-mark-4", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 2824345713217859749, + 9704847614342538686, + 18446744073709551615, + 18446744073709551615, + 169, + 180, + 169, + 180, + 31, + 34, + true, + "CCS and CPS", + "CCS and CPS" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 10318284848910968979, + 1295911181594562505, + 18446744073709551615, + 18446744073709551615, + 15, + 29, + 15, + 29, + 4, + 7, + true, + "gas case study", + "gas case study" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 8973266897479869153, + 7123298173656310256, + 18446744073709551615, + 18446744073709551615, + 75, + 97, + 75, + 97, + 15, + 17, + true, + "real-world application", + "real-world application" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 2532084510793506348, + 12340694780983444669, + 18446744073709551615, + 18446744073709551615, + 115, + 137, + 115, + 137, + 21, + 24, + true, + "subject matter experts", + "subject matter experts" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 2350671729723156275, + 15376321056085788759, + 18446744073709551615, + 18446744073709551615, + 145, + 156, + 145, + 156, + 26, + 28, + true, + "client team", + "client team" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 11025819392462273971, + 1247396544615538597, + 18446744073709551615, + 18446744073709551615, + 217, + 235, + 217, + 235, + 41, + 43, + true, + "client engagements", + "client engagements" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 17613546823892249124, + 8946947737051436961, + 18446744073709551615, + 18446744073709551615, + 265, + 277, + 265, + 277, + 50, + 52, + true, + "gas industry", + "gas industry" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 1620835639831122355, + 4723302290987844432, + 18446744073709551615, + 18446744073709551615, + 296, + 321, + 296, + 321, + 57, + 60, + true, + "material science industry", + "material science industry" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 12178341415895623363, + 8268162173441645749, + 18446744073709551615, + 18446744073709551615, + 7, + 10, + 7, + 10, + 2, + 3, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 14635106751859230946, + 13564055847176212531, + 18446744073709551615, + 18446744073709551615, + 60, + 68, + 60, + 68, + 12, + 13, + true, + "solution", + "solution" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 389609625695143886, + 6422601049208570234, + 18446744073709551615, + 18446744073709551615, + 107, + 111, + 107, + 111, + 19, + 20, + true, + "help", + "help" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 12178341415896221596, + 8268263923333827885, + 18446744073709551615, + 18446744073709551615, + 169, + 172, + 169, + 172, + 31, + 32, + true, + "CCS", + "CCS" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 12178341415896222428, + 8268263906205242250, + 18446744073709551615, + 18446744073709551615, + 177, + 180, + 177, + 180, + 33, + 34, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 12178341415895623363, + 8268162173441630103, + 18446744073709551615, + 18446744073709551615, + 257, + 260, + 257, + 260, + 48, + 49, + true, + "oil", + "oil" + ], + [ + "expression", + "wtoken-concatenation", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 5945163521127932196, + 8132279328728560937, + 18446744073709551615, + 18446744073709551615, + 69, + 78, + 69, + 78, + 11, + 12, + true, + "arXiv.org", + "arXiv.org" + ], + [ + "sentence", + "", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 4472913868502496196, + 2721699422055737565, + 18446744073709551615, + 18446744073709551615, + 0, + 172, + 0, + 172, + 0, + 30, + true, + "Future work will focus on processing public repositories such as the arXiv.org library, USPTO, and PubMed in order to make their content available to deep data exploration.", + "Future work will focus on processing public repositories such as the arXiv.org library, USPTO, and PubMed in order to make their content available to deep data exploration." + ], + [ + "term", + "enum-term-mark-4", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 15838174910682029174, + 3269528259737521863, + 18446744073709551615, + 18446744073709551615, + 88, + 105, + 88, + 105, + 14, + 18, + true, + "USPTO, and PubMed", + "USPTO, and PubMed" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 2018557288699233431, + 16975902875644112037, + 18446744073709551615, + 18446744073709551615, + 0, + 11, + 0, + 11, + 0, + 2, + true, + "Future work", + "Future work" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 13352531518695846369, + 12144575405222745087, + 18446744073709551615, + 18446744073709551615, + 37, + 56, + 37, + 56, + 6, + 8, + true, + "public repositories", + "public repositories" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 5292914289406644200, + 11970976502791126463, + 18446744073709551615, + 18446744073709551615, + 69, + 86, + 69, + 86, + 11, + 13, + true, + "arXiv.org library", + "arXiv.org library" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 13671659409933113155, + 9272823095563995053, + 18446744073709551615, + 18446744073709551615, + 150, + 171, + 150, + 171, + 26, + 29, + true, + "deep data exploration", + "deep data exploration" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 329104162018760499, + 18012444193898764026, + 18446744073709551615, + 18446744073709551615, + 88, + 93, + 88, + 93, + 14, + 15, + true, + "USPTO", + "USPTO" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 16381206483336886705, + 13596604403738397760, + 18446744073709551615, + 18446744073709551615, + 99, + 105, + 99, + 105, + 17, + 18, + true, + "PubMed", + "PubMed" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 329104161571401725, + 18386641959556324131, + 18446744073709551615, + 18446744073709551615, + 109, + 114, + 109, + 114, + 19, + 20, + true, + "order", + "order" + ], + [ + "sentence", + "", + 4778022085288441371, + "TEXT", + "#/texts/150", + 1.0, + 11662592888764396578, + 14754781215187398204, + 18446744073709551615, + 18446744073709551615, + 0, + 41, + 0, + 41, + 0, + 7, + true, + "Data subject to third party restrictions.", + "Data subject to third party restrictions." + ], + [ + "term", + "single-term", + 4778022085288441371, + "TEXT", + "#/texts/150", + 1.0, + 7076010952609514944, + 4435271120668497918, + 18446744073709551615, + 18446744073709551615, + 16, + 40, + 16, + 40, + 3, + 6, + true, + "third party restrictions", + "third party restrictions" + ], + [ + "term", + "single-term", + 4778022085288441371, + "TEXT", + "#/texts/150", + 1.0, + 389609625537659398, + 6127806615430218387, + 18446744073709551615, + 18446744073709551615, + 0, + 4, + 0, + 4, + 0, + 1, + true, + "Data", + "Data" + ], + [ + "numval", + "irng", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 10302035827600178331, + 6710097973531677104, + 18446744073709551615, + 18446744073709551615, + 36, + 45, + 36, + 45, + 6, + 6, + false, + "0000-0002", + "0000-0002" + ], + [ + "numval", + "irng", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 6624857390961351666, + 3541555616013892515, + 18446744073709551615, + 18446744073709551615, + 46, + 55, + 46, + 55, + 6, + 7, + false, + "8088-0823", + "8088-0823" + ], + [ + "numval", + "irng", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 10302035827600178332, + 6710097973532471075, + 18446744073709551615, + 18446744073709551615, + 88, + 97, + 88, + 97, + 9, + 9, + false, + "0000-0001", + "0000-0001" + ], + [ + "numval", + "irng", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 6560223242063427106, + 13609528576140932418, + 18446744073709551615, + 18446744073709551615, + 98, + 107, + 98, + 107, + 9, + 10, + false, + "7216-8505", + "7216-8505" + ], + [ + "numval", + "irng", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 10302035827600178332, + 6710097973532498930, + 18446744073709551615, + 18446744073709551615, + 141, + 150, + 141, + 150, + 20, + 21, + true, + "0000-0001", + "0000-0001" + ], + [ + "numval", + "irng", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 6573923715856392023, + 13497670743408223376, + 18446744073709551615, + 18446744073709551615, + 151, + 160, + 151, + 160, + 22, + 23, + true, + "5761-0422", + "5761-0422" + ], + [ + "link", + "url", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 7086030415698247677, + 10516035679311822965, + 18446744073709551615, + 18446744073709551615, + 18, + 55, + 18, + 55, + 6, + 7, + true, + "https://orcid.org/0000-0002-8088-0823", + "https://orcid.org/0000-0002-8088-0823" + ], + [ + "link", + "url", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 2033258390552333901, + 14596379607593903375, + 18446744073709551615, + 18446744073709551615, + 70, + 107, + 70, + 107, + 9, + 10, + true, + "https://orcid.org/0000-0001-7216-8505", + "https://orcid.org/0000-0001-7216-8505" + ], + [ + "link", + "url", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 2031879929749239141, + 13323569836539834175, + 18446744073709551615, + 18446744073709551615, + 123, + 160, + 123, + 160, + 12, + 23, + true, + "https://orcid.org/0000-0001-5761-0422", + "https://orcid.org/0000-0001-5761-0422" + ], + [ + "name", + "person-name", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 4686361850733567621, + 8628324652592599079, + 18446744073709551615, + 18446744073709551615, + 0, + 17, + 0, + 17, + 0, + 6, + true, + "Peter W J Staar", + "Peter W. J. Staar" + ], + [ + "expression", + "wtoken-concatenation", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 7086030415698247677, + 10516035679311822965, + 18446744073709551615, + 18446744073709551615, + 18, + 55, + 18, + 55, + 6, + 7, + true, + "https://orcid.org/0000-0002-8088-0823", + "https://orcid.org/0000-0002-8088-0823" + ], + [ + "expression", + "wtoken-concatenation", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 2033258390552333901, + 14596379607593903375, + 18446744073709551615, + 18446744073709551615, + 70, + 107, + 70, + 107, + 9, + 10, + true, + "https://orcid.org/0000-0001-7216-8505", + "https://orcid.org/0000-0001-7216-8505" + ], + [ + "link", + "url", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 3527101060180289873, + 4288347075719597580, + 18446744073709551615, + 18446744073709551615, + 30, + 52, + 30, + 52, + 6, + 15, + true, + "https://www.elastic.co", + "https://www.elastic.co" + ], + [ + "link", + "url", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 7381438071617048818, + 3762754436696500331, + 18446744073709551615, + 18446744073709551615, + 72, + 97, + 72, + 97, + 19, + 28, + true, + "https://lucene.apache.org", + "https://lucene.apache.org" + ], + [ + "link", + "url", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 7699234159584878934, + 8720273332387288393, + 18446744073709551615, + 18446744073709551615, + 38, + 52, + 38, + 52, + 10, + 15, + true, + "www.elastic.co", + "www.elastic.co" + ], + [ + "parenthesis", + "round brackets", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 569129533218351355, + 3470387564381472056, + 18446744073709551615, + 18446744073709551615, + 29, + 53, + 29, + 53, + 5, + 16, + true, + "(https://www.elastic.co)", + "(https://www.elastic.co)" + ], + [ + "parenthesis", + "round brackets", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 9861891912574044258, + 4499735700376823345, + 18446744073709551615, + 18446744073709551615, + 71, + 98, + 71, + 98, + 18, + 29, + true, + "(https://lucene.apache.org)", + "(https://lucene.apache.org)" + ], + [ + "sentence", + "", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 15289232076819477879, + 7514275424619623119, + 18446744073709551615, + 18446744073709551615, + 2, + 99, + 2, + 99, + 1, + 30, + true, + "For example, ElasticSearch (https://www.elastic.co) and ApacheLucene (https://lucene.apache.org).", + "For example, ElasticSearch (https://www.elastic.co) and ApacheLucene (https://lucene.apache.org)." + ], + [ + "term", + "single-term", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 8106397496085150773, + 634835345710543557, + 18446744073709551615, + 18446744073709551615, + 6, + 13, + 6, + 13, + 2, + 3, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 7002898201903728267, + 1737821260812359285, + 18446744073709551615, + 18446744073709551615, + 15, + 28, + 15, + 28, + 4, + 5, + true, + "ElasticSearch", + "ElasticSearch" + ], + [ + "term", + "single-term", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 18329142643795090602, + 2655325726805406767, + 18446744073709551615, + 18446744073709551615, + 58, + 70, + 58, + 70, + 17, + 18, + true, + "ApacheLucene", + "ApacheLucene" + ], + [ + "sentence", + "", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 12458532663664098281, + 15414412942250901023, + 18446744073709551615, + 18446744073709551615, + 4, + 160, + 2, + 158, + 1, + 27, + true, + "Most language entities from a technical field are typically represented in a very specific, rigorous way that can be easily captured by regular expressions.", + "Most language entities from a technical field are typically represented in a very specific, rigorous way that can be easily captured by regular expressions." + ], + [ + "sentence", + "", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 931259114935419412, + 2966611005798001879, + 18446744073709551615, + 18446744073709551615, + 161, + 285, + 159, + 283, + 27, + 48, + true, + "We found that in practice, regular expressions often outperform DL models, since we can simply encode these representations.", + "We found that in practice, regular expressions often outperform DL models, since we can simply encode these representations." + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 5234082820457819963, + 10637531498360814115, + 18446744073709551615, + 18446744073709551615, + 4, + 26, + 2, + 24, + 1, + 4, + true, + "Most language entities", + "Most language entities" + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 6630151693041027733, + 5310121539758151013, + 18446744073709551615, + 18446744073709551615, + 34, + 49, + 32, + 47, + 6, + 8, + true, + "technical field", + "technical field" + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 5273909445408112658, + 2278695577032735159, + 18446744073709551615, + 18446744073709551615, + 96, + 108, + 94, + 106, + 16, + 18, + true, + "rigorous way", + "rigorous way" + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 17163002546996330472, + 3748678944934416450, + 18446744073709551615, + 18446744073709551615, + 140, + 159, + 138, + 157, + 24, + 26, + true, + "regular expressions", + "regular expressions" + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 17163002546996330472, + 3748678944934419646, + 18446744073709551615, + 18446744073709551615, + 188, + 207, + 186, + 205, + 33, + 35, + true, + "regular expressions", + "regular expressions" + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 6557955699305751580, + 14416158030891845149, + 18446744073709551615, + 18446744073709551615, + 225, + 234, + 223, + 232, + 37, + 39, + true, + "DL models", + "DL models" + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 14814125472896938138, + 13430040721706784836, + 18446744073709551615, + 18446744073709551615, + 178, + 186, + 176, + 184, + 31, + 32, + true, + "practice", + "practice" + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 12118184688624410579, + 7523104278049565649, + 18446744073709551615, + 18446744073709551615, + 269, + 284, + 267, + 282, + 46, + 47, + true, + "representations", + "representations" + ], + [ + "link", + "url", + 1997735398126013155, + "TEXT", + "#/texts/156", + 1.0, + 11080755855567888942, + 12138756017738546093, + 18446744073709551615, + 18446744073709551615, + 4, + 24, + 2, + 22, + 1, + 10, + true, + "https://www.nltk.org", + "https://www.nltk.org" + ], + [ + "link", + "url", + 1997735398126013155, + "TEXT", + "#/texts/156", + 1.0, + 7030452472279930374, + 3139262024232962844, + 18446744073709551615, + 18446744073709551615, + 12, + 24, + 10, + 22, + 5, + 10, + true, + "www.nltk.org", + "www.nltk.org" + ], + [ + "expression", + "word-concatenation", + 13566764974477978642, + "TEXT", + "#/texts/157", + 1.0, + 11674671916710033839, + 13118355578687598339, + 18446744073709551615, + 18446744073709551615, + 26, + 37, + 25, + 36, + 5, + 6, + true, + "JSON-schema", + "JSON-schema" + ], + [ + "sentence", + "", + 13566764974477978642, + "TEXT", + "#/texts/157", + 1.0, + 12149225629366182819, + 13287297407560091582, + 18446744073709551615, + 18446744073709551615, + 3, + 53, + 2, + 52, + 1, + 9, + true, + "We follow the standard JSON-schema for references.", + "We follow the standard JSON-schema for references." + ], + [ + "term", + "single-term", + 13566764974477978642, + "TEXT", + "#/texts/157", + 1.0, + 5670807822075147198, + 8836245391557497524, + 18446744073709551615, + 18446744073709551615, + 17, + 37, + 16, + 36, + 4, + 6, + true, + "standard JSON-schema", + "standard JSON-schema" + ], + [ + "term", + "single-term", + 13566764974477978642, + "TEXT", + "#/texts/157", + 1.0, + 15984565858548749625, + 721337063821589131, + 18446744073709551615, + 18446744073709551615, + 42, + 52, + 41, + 51, + 7, + 8, + true, + "references", + "references" + ], + [ + "numval", + "ival", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 17767354399704235161, + 13902073100028876379, + 18446744073709551615, + 18446744073709551615, + 148, + 149, + 147, + 148, + 29, + 30, + true, + "1", + "1" + ], + [ + "parenthesis", + "round brackets", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 9828412089918712334, + 6215717056563871310, + 18446744073709551615, + 18446744073709551615, + 111, + 150, + 110, + 149, + 20, + 31, + true, + "(ie, the name field found in Listing 1)", + "(ie, the name field found in Listing 1)" + ], + [ + "sentence", + "", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 7380356609967428771, + 11039135432617650461, + 18446744073709551615, + 18446744073709551615, + 3, + 151, + 2, + 150, + 1, + 32, + true, + "A rather simple similarity metric is to perform a fuzzy comparison of the names of the newly found entities (ie, the name field found in Listing 1).", + "A rather simple similarity metric is to perform a fuzzy comparison of the names of the newly found entities (ie, the name field found in Listing 1)." + ], + [ + "sentence", + "", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 8544327399273637150, + 76400959495078138, + 18446744073709551615, + 18446744073709551615, + 152, + 248, + 151, + 247, + 32, + 49, + true, + "A more sophisticated approach is to use word embeddings to identify if two concepts are similar.", + "A more sophisticated approach is to use word embeddings to identify if two concepts are similar." + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 14238812658426593966, + 13924064151272705753, + 18446744073709551615, + 18446744073709551615, + 12, + 36, + 11, + 35, + 3, + 6, + true, + "simple similarity metric", + "simple similarity metric" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 5203229829211163848, + 10805139012141424660, + 18446744073709551615, + 18446744073709551615, + 53, + 69, + 52, + 68, + 10, + 12, + true, + "fuzzy comparison", + "fuzzy comparison" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 2451855113324595828, + 18220665665570411090, + 18446744073709551615, + 18446744073709551615, + 120, + 130, + 119, + 129, + 24, + 26, + true, + "name field", + "name field" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 8980863917750970521, + 8109873253237463962, + 18446744073709551615, + 18446744073709551615, + 159, + 181, + 158, + 180, + 34, + 36, + true, + "sophisticated approach", + "sophisticated approach" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 16942949857064565838, + 2076435900086379767, + 18446744073709551615, + 18446744073709551615, + 192, + 207, + 191, + 206, + 39, + 41, + true, + "word embeddings", + "word embeddings" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 329104161568027276, + 7648688669911791224, + 18446744073709551615, + 18446744073709551615, + 77, + 82, + 76, + 81, + 14, + 15, + true, + "names", + "names" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 14652256560445338257, + 10525189337855255576, + 18446744073709551615, + 18446744073709551615, + 102, + 110, + 101, + 109, + 19, + 20, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 15441160910541486545, + 16190146737237010835, + 18446744073709551615, + 18446744073709551615, + 112, + 114, + 111, + 113, + 21, + 22, + true, + "ie", + "ie" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 14652282388618227426, + 11300706950781769100, + 18446744073709551615, + 18446744073709551615, + 227, + 235, + 226, + 234, + 45, + 46, + true, + "concepts", + "concepts" + ], + [ + "numval", + "ival", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 17767354399704235156, + 1305421191768306174, + 18446744073709551615, + 18446744073709551615, + 18, + 19, + 18, + 19, + 3, + 3, + false, + "4", + "4" + ], + [ + "expression", + "wtoken-concatenation", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 329104162105779366, + 13727282245178536763, + 18446744073709551615, + 18446744073709551615, + 15, + 20, + 15, + 20, + 3, + 4, + true, + "Neo4J", + "Neo4J" + ], + [ + "sentence", + "", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 12200322802088853735, + 5911179317975529042, + 18446744073709551615, + 18446744073709551615, + 3, + 70, + 3, + 70, + 1, + 15, + true, + "For example Neo4J, Titan, JanusGraph, Amazon Neptune, and Arangodb.", + "For example Neo4J, Titan, JanusGraph, Amazon Neptune, and Arangodb." + ], + [ + "term", + "enum-term-mark-4", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 6870264612961802772, + 10403498607481025217, + 18446744073709551615, + 18446744073709551615, + 15, + 69, + 15, + 69, + 3, + 14, + true, + "Neo4J, Titan, JanusGraph, Amazon Neptune, and Arangodb", + "Neo4J, Titan, JanusGraph, Amazon Neptune, and Arangodb" + ], + [ + "term", + "single-term", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 153508376111218070, + 10521979097015614348, + 18446744073709551615, + 18446744073709551615, + 7, + 20, + 7, + 20, + 2, + 4, + true, + "example Neo4J", + "example Neo4J" + ], + [ + "term", + "single-term", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 18066135526428419828, + 16363518137721762265, + 18446744073709551615, + 18446744073709551615, + 41, + 55, + 41, + 55, + 9, + 11, + true, + "Amazon Neptune", + "Amazon Neptune" + ], + [ + "term", + "single-term", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 329104161841320944, + 13364385693051315282, + 18446744073709551615, + 18446744073709551615, + 22, + 27, + 22, + 27, + 5, + 6, + true, + "Titan", + "Titan" + ], + [ + "term", + "single-term", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 1737775650888870515, + 5145686494756741983, + 18446744073709551615, + 18446744073709551615, + 29, + 39, + 29, + 39, + 7, + 8, + true, + "JanusGraph", + "JanusGraph" + ], + [ + "term", + "single-term", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 14650296439291036599, + 17985905875417800583, + 18446744073709551615, + 18446744073709551615, + 61, + 69, + 61, + 69, + 13, + 14, + true, + "Arangodb", + "Arangodb" + ], + [ + "numval", + "year", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 389609625548777059, + 14748978429801291102, + 18446744073709551615, + 18446744073709551615, + 178, + 182, + 174, + 178, + 50, + 51, + true, + "2015", + "2015" + ], + [ + "numval", + "ival", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 17767354399704235163, + 14663762662264921246, + 18446744073709551615, + 18446744073709551615, + 73, + 74, + 69, + 70, + 15, + 16, + true, + "3", + "3" + ], + [ + "numval", + "ival", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 17767354399704235156, + 14663762663007797994, + 18446744073709551615, + 18446744073709551615, + 136, + 137, + 132, + 133, + 34, + 34, + false, + "4", + "4" + ], + [ + "numval", + "ival", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 15441160910541481913, + 12659057306413090614, + 18446744073709551615, + 18446744073709551615, + 183, + 185, + 179, + 181, + 52, + 53, + true, + "02", + "02" + ], + [ + "numval", + "ival", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 17767354399704235156, + 14663762663007808920, + 18446744073709551615, + 18446744073709551615, + 189, + 190, + 185, + 186, + 55, + 56, + true, + "4", + "4" + ], + [ + "link", + "url", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 3438649888016089446, + 14315872303660489441, + 18446744073709551615, + 18446744073709551615, + 65, + 127, + 61, + 123, + 10, + 32, + true, + "http://s3.thinkaurelius.com/docs/titan/current/data-model.html", + "http://s3.thinkaurelius.com/docs/titan/current/data-model.html" + ], + [ + "link", + "url", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 9361941850829391161, + 1324878578738734655, + 18446744073709551615, + 18446744073709551615, + 140, + 209, + 136, + 205, + 36, + 61, + true, + "http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html", + "http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html" + ], + [ + "parenthesis", + "round brackets", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 253594065264500809, + 15498824685726423077, + 18446744073709551615, + 18446744073709551615, + 64, + 128, + 60, + 124, + 9, + 33, + true, + "(http://s3.thinkaurelius.com/docs/titan/current/data-model.html)", + "(http://s3.thinkaurelius.com/docs/titan/current/data-model.html)" + ], + [ + "parenthesis", + "round brackets", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 2281494353586706787, + 8157085761115684525, + 18446744073709551615, + 18446744073709551615, + 139, + 210, + 135, + 206, + 35, + 62, + true, + "(http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html)", + "(http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html)" + ], + [ + "expression", + "wtoken-concatenation", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 329104162105779366, + 6977200025242982444, + 18446744073709551615, + 18446744073709551615, + 133, + 138, + 129, + 134, + 34, + 35, + true, + "Neo4J", + "Neo4J" + ], + [ + "sentence", + "", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 12617989556159965278, + 16648537282536463551, + 18446744073709551615, + 18446744073709551615, + 7, + 211, + 3, + 207, + 1, + 63, + true, + "This memory architecture is clearly documented for Titan (http://s3.thinkaurelius.com/docs/titan/current/data-model.html) and Neo4J (http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html).", + "This memory architecture is clearly documented for Titan (http://s3.thinkaurelius.com/docs/titan/current/data-model.html) and Neo4J (http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html)." + ], + [ + "term", + "single-term", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 3927729088961860971, + 10861366598444773863, + 18446744073709551615, + 18446744073709551615, + 12, + 31, + 8, + 27, + 2, + 4, + true, + "memory architecture", + "memory architecture" + ], + [ + "term", + "single-term", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 329104161841320944, + 6995505371408985384, + 18446744073709551615, + 18446744073709551615, + 58, + 63, + 54, + 59, + 8, + 9, + true, + "Titan", + "Titan" + ], + [ + "term", + "single-term", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 329104162105779366, + 6977200025242982444, + 18446744073709551615, + 18446744073709551615, + 133, + 138, + 129, + 134, + 34, + 35, + true, + "Neo4J", + "Neo4J" + ], + [ + "numval", + "ival", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 17767354399704235156, + 17688058591094674309, + 18446744073709551615, + 18446744073709551615, + 19, + 20, + 15, + 16, + 3, + 3, + false, + "4", + "4" + ], + [ + "link", + "url", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 12568677210829628871, + 1680746501251640588, + 18446744073709551615, + 18446744073709551615, + 105, + 139, + 101, + 135, + 19, + 20, + true, + "https://db-engines.com/en/ranking_", + "https://db-engines.com/en/ranking_" + ], + [ + "expression", + "wtoken-concatenation", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 329104162105779366, + 17682593486665884844, + 18446744073709551615, + 18446744073709551615, + 16, + 21, + 12, + 17, + 3, + 4, + true, + "Neo4J", + "Neo4J" + ], + [ + "expression", + "wtoken-concatenation", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 12568677210829628871, + 1680746501251640588, + 18446744073709551615, + 18446744073709551615, + 105, + 139, + 101, + 135, + 19, + 20, + true, + "https://db-engines.com/en/ranking_", + "https://db-engines.com/en/ranking_" + ], + [ + "numval", + "ival", + 11085577343317113173, + "TEXT", + "#/texts/162", + 1.0, + 12178341415896310600, + 9970685264370540412, + 18446744073709551615, + 18446744073709551615, + 17, + 20, + 15, + 18, + 6, + 7, + true, + "500", + "500" + ], + [ + "link", + "url", + 11085577343317113173, + "TEXT", + "#/texts/162", + 1.0, + 1244385257359010144, + 3127203609822040452, + 18446744073709551615, + 18446744073709551615, + 5, + 25, + 3, + 23, + 1, + 10, + true, + "http://graph500.org/", + "http://graph500.org/" + ], + [ + "reference", + "url", + 1792096630133661292, + "TEXT", + "#/texts/163", + 1.0, + 16747146533825186967, + 2165348395015827092, + 18446744073709551615, + 18446744073709551615, + 0, + 54, + 0, + 52, + 0, + 18, + true, + "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html", + "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html" + ], + [ + "sentence", + "", + 11462638369524745676, + "TEXT", + "#/texts/164", + 1.0, + 8767715734654495558, + 12563470467547715840, + 18446744073709551615, + 18446744073709551615, + 4, + 61, + 4, + 61, + 1, + 13, + true, + "We assume the weight can be represented by a float value.", + "We assume the weight can be represented by a float value." + ], + [ + "term", + "single-term", + 11462638369524745676, + "TEXT", + "#/texts/164", + 1.0, + 1473558314070085366, + 13523311624596995819, + 18446744073709551615, + 18446744073709551615, + 49, + 60, + 49, + 60, + 10, + 12, + true, + "float value", + "float value" + ], + [ + "term", + "single-term", + 11462638369524745676, + "TEXT", + "#/texts/164", + 1.0, + 16381206557786164800, + 5728702803374294286, + 18446744073709551615, + 18446744073709551615, + 18, + 24, + 18, + 24, + 4, + 5, + true, + "weight", + "weight" + ], + [ + "reference", + "url", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 4512570954370983408, + 11763158631698282386, + 18446744073709551615, + 18446744073709551615, + 0, + 75, + 0, + 69, + 0, + 23, + true, + "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", + "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/" + ], + [ + "reference", + "url", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 16922240937803157180, + 3329452043224775053, + 18446744073709551615, + 18446744073709551615, + 0, + 43, + 0, + 37, + 0, + 11, + true, + "\u2021\u2021\u2021 https://www.naturalearthdata.com/", + "\u2021\u2021\u2021 https://www.naturalearthdata.com/" + ], + [ + "reference", + "url", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 2845896203864732456, + 4760469342904968768, + 18446744073709551615, + 18446744073709551615, + 0, + 36, + 0, + 33, + 0, + 11, + true, + "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/", + "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/" + ], + [ + "reference", + "author", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 11879540473470058199, + 12427853451193245392, + 18446744073709551615, + 18446744073709551615, + 3, + 17, + 3, + 17, + 2, + 5, + true, + "Staar Peter WJ", + "Staar Peter WJ" + ], + [ + "reference", + "author", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 6613162031266505134, + 16138057201536909006, + 18446744073709551615, + 18446744073709551615, + 19, + 28, + 19, + 28, + 6, + 8, + true, + "Michele D", + "Michele D" + ], + [ + "reference", + "author", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 4457167794784606628, + 16487730286724222122, + 18446744073709551615, + 18446744073709551615, + 30, + 41, + 30, + 41, + 9, + 11, + true, + "Christoph A", + "Christoph A" + ], + [ + "reference", + "author", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 6560601913145533820, + 12701816617387729389, + 18446744073709551615, + 18446744073709551615, + 43, + 52, + 43, + 52, + 12, + 15, + true, + "Costas B.", + "Costas B." + ], + [ + "reference", + "citation-number", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 17767354399704235161, + 16208788960124925205, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "1", + "1" + ], + [ + "reference", + "container-title", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 8106351470704634736, + 17995829417296331915, + 18446744073709551615, + 18446744073709551615, + 138, + 145, + 138, + 145, + 29, + 32, + true, + "KDD '18", + "KDD '18" + ], + [ + "reference", + "date", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 8104408419226439021, + 7524634383995046949, + 18446744073709551615, + 18446744073709551615, + 164, + 171, + 164, + 171, + 39, + 42, + true, + "; 2018:", + "; 2018:" + ], + [ + "reference", + "location", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 6517026456739326224, + 8283202906327186871, + 18446744073709551615, + 18446744073709551615, + 147, + 160, + 147, + 160, + 33, + 38, + true, + "New York, NY:", + "New York, NY:" + ], + [ + "reference", + "title", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 3346237141252876309, + 13011534883222988606, + 18446744073709551615, + 18446744073709551615, + 53, + 136, + 53, + 136, + 15, + 28, + true, + "Corpus conversion service: a machine learning platform to ingest documents at scale", + "Corpus conversion service: a machine learning platform to ingest documents at scale" + ], + [ + "name", + "person-name", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 455015420489078976, + 1285989015139281970, + 18446744073709551615, + 18446744073709551615, + 43, + 59, + 43, + 59, + 12, + 16, + true, + "Costas B Corpus", + "Costas B. Corpus" + ], + [ + "sentence", + "", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 2064192796631964770, + 16259707831430260005, + 18446744073709551615, + 18446744073709551615, + 3, + 137, + 3, + 137, + 2, + 29, + true, + "Staar Peter WJ, Michele D, Christoph A, Costas B. Corpus conversion service: a machine learning platform to ingest documents at scale.", + "Staar Peter WJ, Michele D, Christoph A, Costas B. Corpus conversion service: a machine learning platform to ingest documents at scale." + ], + [ + "sentence", + "", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 14650432707062542657, + 11273901130686106136, + 18446744073709551615, + 18446744073709551615, + 138, + 146, + 138, + 146, + 29, + 33, + true, + "KDD '18.", + "KDD '18." + ], + [ + "sentence", + "", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 5922527567909474023, + 10848884210820586202, + 18446744073709551615, + 18446744073709551615, + 147, + 179, + 147, + 179, + 33, + 44, + true, + "New York, NY: ACM; 2018:774-782.", + "New York, NY: ACM; 2018:774-782." + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 11879540473470058199, + 12427853451193245392, + 18446744073709551615, + 18446744073709551615, + 3, + 17, + 3, + 17, + 2, + 5, + true, + "Staar Peter WJ", + "Staar Peter WJ" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 6613162031266505134, + 16138057201536909006, + 18446744073709551615, + 18446744073709551615, + 19, + 28, + 19, + 28, + 6, + 8, + true, + "Michele D", + "Michele D" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 4457167794784606628, + 16487730286724222122, + 18446744073709551615, + 18446744073709551615, + 30, + 41, + 30, + 41, + 9, + 11, + true, + "Christoph A", + "Christoph A" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 7881558880483647069, + 4390339138058947972, + 18446744073709551615, + 18446744073709551615, + 53, + 78, + 53, + 78, + 15, + 18, + true, + "Corpus conversion service", + "Corpus conversion service" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 14650948201816210252, + 5761777774481409935, + 18446744073709551615, + 18446744073709551615, + 147, + 155, + 147, + 155, + 33, + 35, + true, + "New York", + "New York" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 8106464587473865376, + 4936495746156049501, + 18446744073709551615, + 18446744073709551615, + 82, + 89, + 82, + 89, + 20, + 21, + true, + "machine", + "machine" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 14814125365076808131, + 17712433670698462707, + 18446744073709551615, + 18446744073709551615, + 99, + 107, + 99, + 107, + 22, + 23, + true, + "platform", + "platform" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 6167933651658664291, + 12661206916294760912, + 18446744073709551615, + 18446744073709551615, + 118, + 127, + 118, + 127, + 25, + 26, + true, + "documents", + "documents" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 329104161785194305, + 16612919954372115180, + 18446744073709551615, + 18446744073709551615, + 131, + 136, + 131, + 136, + 27, + 28, + true, + "scale", + "scale" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 12178341415896253943, + 16661690143811416648, + 18446744073709551615, + 18446744073709551615, + 138, + 141, + 138, + 141, + 29, + 30, + true, + "KDD", + "KDD" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 15441160910541487804, + 8386387571143486082, + 18446744073709551615, + 18446744073709551615, + 157, + 159, + 157, + 159, + 36, + 37, + true, + "NY", + "NY" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 12178341415896228980, + 16661682738511655292, + 18446744073709551615, + 18446744073709551615, + 161, + 164, + 161, + 164, + 38, + 39, + true, + "ACM", + "ACM" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 8104408789160133341, + 11698475954970405279, + 18446744073709551615, + 18446744073709551615, + 171, + 178, + 171, + 178, + 42, + 43, + true, + "774-782", + "774-782" + ], + [ + "reference", + "author", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 11879540473470058199, + 6818801233014041471, + 18446744073709551615, + 18446744073709551615, + 3, + 17, + 3, + 17, + 2, + 5, + true, + "Staar Peter WJ", + "Staar Peter WJ" + ], + [ + "reference", + "author", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 329104159232588720, + 1186563503698797045, + 18446744073709551615, + 18446744073709551615, + 19, + 24, + 19, + 24, + 6, + 8, + true, + "Kl BP", + "Kl BP" + ], + [ + "reference", + "author", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 14652187939873997159, + 718674333250886747, + 18446744073709551615, + 18446744073709551615, + 26, + 34, + 26, + 34, + 9, + 11, + true, + "Roxana I", + "Roxana I" + ], + [ + "reference", + "citation-number", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 17767354399704235162, + 7639029136784882071, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "2", + "2" + ], + [ + "reference", + "date", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 325347433255123998, + 9431696322833619113, + 18446744073709551615, + 18446744073709551615, + 150, + 162, + 150, + 162, + 34, + 37, + true, + "2016:812-821", + "2016:812-821" + ], + [ + "reference", + "journal", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 8106350741667376964, + 2037770047407614341, + 18446744073709551615, + 18446744073709551615, + 131, + 138, + 131, + 138, + 28, + 29, + true, + "Chicago", + "Chicago" + ], + [ + "reference", + "title", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 7105706713138331748, + 8882313339767931673, + 18446744073709551615, + 18446744073709551615, + 43, + 129, + 43, + 129, + 13, + 27, + true, + "Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance", + "Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance" + ], + [ + "name", + "name-concatenation", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 4549168941779565045, + 3298984056937140542, + 18446744073709551615, + 18446744073709551615, + 54, + 69, + 54, + 69, + 14, + 17, + true, + "Matrix-Function", + "Matrix-Function" + ], + [ + "name", + "name-concatenation", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 14650423007673892384, + 14713090862316278550, + 18446744073709551615, + 18446744073709551615, + 91, + 99, + 91, + 99, + 20, + 23, + true, + "Big-Data", + "Big-Data" + ], + [ + "expression", + "common", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 329104162180805867, + 2101201624583688644, + 18446744073709551615, + 18446744073709551615, + 36, + 42, + 36, + 42, + 12, + 13, + true, + "et al", + "et al." + ], + [ + "sentence", + "", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 5941740972901141891, + 8159805389130902539, + 18446744073709551615, + 18446744073709551615, + 3, + 130, + 3, + 130, + 2, + 28, + true, + "Staar Peter WJ, Kl BP, Roxana I, et al. Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance.", + "Staar Peter WJ, Kl BP, Roxana I, et al. Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance." + ], + [ + "sentence", + "", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 14086539689317188783, + 9623126981639275921, + 18446744073709551615, + 18446744073709551615, + 131, + 163, + 131, + 163, + 28, + 38, + true, + "Chicago, IL: IEEE; 2016:812-821.", + "Chicago, IL: IEEE; 2016:812-821." + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 11879540473470058199, + 6818801233014041471, + 18446744073709551615, + 18446744073709551615, + 3, + 17, + 3, + 17, + 2, + 5, + true, + "Staar Peter WJ", + "Staar Peter WJ" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 329104159232588720, + 1186563503698797045, + 18446744073709551615, + 18446744073709551615, + 19, + 24, + 19, + 24, + 6, + 8, + true, + "Kl BP", + "Kl BP" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 14652187939873997159, + 718674333250886747, + 18446744073709551615, + 18446744073709551615, + 26, + 34, + 26, + 34, + 9, + 11, + true, + "Roxana I", + "Roxana I" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 16657546371048263169, + 1321774612206483248, + 18446744073709551615, + 18446744073709551615, + 36, + 60, + 36, + 60, + 12, + 15, + true, + "et al Stochastic Matrix", + "et al. Stochastic Matrix" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 8793329599149202578, + 318724713861878505, + 18446744073709551615, + 18446744073709551615, + 61, + 80, + 61, + 80, + 16, + 18, + true, + "Function Estimators", + "Function Estimators" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 9598948590671100886, + 9715436593583823660, + 18446744073709551615, + 18446744073709551615, + 82, + 94, + 82, + 94, + 19, + 21, + true, + "Scalable Big", + "Scalable Big" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 9448405554431222338, + 2183763151164676504, + 18446744073709551615, + 18446744073709551615, + 95, + 107, + 95, + 107, + 22, + 24, + true, + "Data Kernels", + "Data Kernels" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 4824230233499551867, + 7853881416888487608, + 18446744073709551615, + 18446744073709551615, + 113, + 129, + 113, + 129, + 25, + 27, + true, + "High Performance", + "High Performance" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 8106350741667376964, + 2037770047407614341, + 18446744073709551615, + 18446744073709551615, + 131, + 138, + 131, + 138, + 28, + 29, + true, + "Chicago", + "Chicago" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 15441160910541480320, + 9679667231859700756, + 18446744073709551615, + 18446744073709551615, + 140, + 142, + 140, + 142, + 30, + 31, + true, + "IL", + "IL" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 389609625537951687, + 17963625958813759677, + 18446744073709551615, + 18446744073709551615, + 144, + 148, + 144, + 148, + 32, + 33, + true, + "IEEE", + "IEEE" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 8104411926417755372, + 14212489819603114026, + 18446744073709551615, + 18446744073709551615, + 155, + 162, + 155, + 162, + 36, + 37, + true, + "812-821", + "812-821" + ], + [ + "reference", + "author", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 14650311461945683358, + 1978144735469983705, + 18446744073709551615, + 18446744073709551615, + 3, + 11, + 3, + 11, + 2, + 4, + true, + "Matteo M", + "Matteo M" + ], + [ + "reference", + "author", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 4457167794784606628, + 3737697229009384388, + 18446744073709551615, + 18446744073709551615, + 13, + 24, + 13, + 24, + 5, + 7, + true, + "Christoph A", + "Christoph A" + ], + [ + "reference", + "author", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 6183363009296336817, + 2886377010043332845, + 18446744073709551615, + 18446744073709551615, + 26, + 35, + 26, + 35, + 8, + 10, + true, + "Val'ery W", + "Val'ery W" + ], + [ + "reference", + "citation-number", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 17767354399704235163, + 13510159049290326510, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "3", + "3" + ], + [ + "reference", + "date", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 16381206542172555288, + 10693536807570486686, + 18446744073709551615, + 18446744073709551615, + 161, + 167, + 161, + 167, + 25, + 27, + true, + "; 2019", + "; 2019" + ], + [ + "reference", + "journal", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 7543597897356589805, + 187532807533800461, + 18446744073709551615, + 18446744073709551615, + 141, + 151, + 141, + 151, + 24, + 24, + false, + "ArXiv.abs/", + "ArXiv.abs/" + ], + [ + "reference", + "title", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 14518759528420507379, + 35296972575901155, + 18446744073709551615, + 18446744073709551615, + 44, + 139, + 44, + 139, + 12, + 23, + true, + "An information extraction and knowledge graph platform for accelerating biochemical discoveries", + "An information extraction and knowledge graph platform for accelerating biochemical discoveries" + ], + [ + "expression", + "common", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 329104162180805867, + 17409365616313413437, + 18446744073709551615, + 18446744073709551615, + 37, + 43, + 37, + 43, + 11, + 12, + true, + "et al", + "et al." + ], + [ + "expression", + "wtoken-concatenation", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 8106351859305413568, + 12876005663384173407, + 18446744073709551615, + 18446744073709551615, + 26, + 33, + 26, + 33, + 8, + 9, + true, + "Val'ery", + "Val'ery" + ], + [ + "expression", + "wtoken-concatenation", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 11904308365999439423, + 5480725305735692275, + 18446744073709551615, + 18446744073709551615, + 141, + 161, + 141, + 161, + 24, + 25, + true, + "ArXiv.abs/1907.08400", + "ArXiv.abs/1907.08400" + ], + [ + "sentence", + "", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 10366826046347151057, + 3201175641693388735, + 18446744073709551615, + 18446744073709551615, + 3, + 140, + 3, + 140, + 2, + 24, + true, + "Matteo M, Christoph A, Val'ery W, et al. An information extraction and knowledge graph platform for accelerating biochemical discoveries.", + "Matteo M, Christoph A, Val'ery W, et al. An information extraction and knowledge graph platform for accelerating biochemical discoveries." + ], + [ + "sentence", + "", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 4521272801426400378, + 9677593514014566176, + 18446744073709551615, + 18446744073709551615, + 141, + 168, + 141, + 168, + 24, + 28, + true, + "ArXiv.abs/1907.08400; 2019.", + "ArXiv.abs/1907.08400; 2019." + ], + [ + "term", + "enum-term-mark-2", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 16358141361454762264, + 16975235676379792590, + 18446744073709551615, + 18446744073709551615, + 47, + 98, + 47, + 98, + 13, + 19, + true, + "information extraction and knowledge graph platform", + "information extraction and knowledge graph platform" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 14650311461945683358, + 1978144735469983705, + 18446744073709551615, + 18446744073709551615, + 3, + 11, + 3, + 11, + 2, + 4, + true, + "Matteo M", + "Matteo M" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 4457167794784606628, + 3737697229009384388, + 18446744073709551615, + 18446744073709551615, + 13, + 24, + 13, + 24, + 5, + 7, + true, + "Christoph A", + "Christoph A" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 6183363009296336817, + 2886377010043332845, + 18446744073709551615, + 18446744073709551615, + 26, + 35, + 26, + 35, + 8, + 10, + true, + "Val'ery W", + "Val'ery W" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 8220196561360771086, + 11976237431337447962, + 18446744073709551615, + 18446744073709551615, + 47, + 69, + 47, + 69, + 13, + 15, + true, + "information extraction", + "information extraction" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 9096096466746800436, + 299601853962247456, + 18446744073709551615, + 18446744073709551615, + 74, + 98, + 74, + 98, + 16, + 19, + true, + "knowledge graph platform", + "knowledge graph platform" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 16380427451761946440, + 1088776306081422918, + 18446744073709551615, + 18446744073709551615, + 116, + 139, + 116, + 139, + 21, + 23, + true, + "biochemical discoveries", + "biochemical discoveries" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 11904308365999439423, + 5480725305735692275, + 18446744073709551615, + 18446744073709551615, + 141, + 161, + 141, + 161, + 24, + 25, + true, + "ArXiv.abs/1907.08400", + "ArXiv.abs/1907.08400" + ], + [ + "reference", + "author", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8106352039693059414, + 189526913306248274, + 18446744073709551615, + 18446744073709551615, + 3, + 10, + 3, + 10, + 2, + 4, + true, + "Paolo R", + "Paolo R" + ], + [ + "reference", + "author", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8106471247241844081, + 12829126084417792103, + 18446744073709551615, + 18446744073709551615, + 12, + 19, + 12, + 19, + 5, + 7, + true, + "Marco P", + "Marco P" + ], + [ + "reference", + "author", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 15356089124994678984, + 18000216761919637454, + 18446744073709551615, + 18446744073709551615, + 21, + 31, + 21, + 31, + 8, + 10, + true, + "Floriana B", + "Floriana B" + ], + [ + "reference", + "author", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8106352035144611657, + 2775049790770760163, + 18446744073709551615, + 18446744073709551615, + 33, + 40, + 33, + 40, + 11, + 13, + true, + "Peter S", + "Peter S" + ], + [ + "reference", + "author", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 6560601913145533820, + 12130024709208567744, + 18446744073709551615, + 18446744073709551615, + 42, + 51, + 42, + 51, + 14, + 17, + true, + "Costas B.", + "Costas B." + ], + [ + "reference", + "citation-number", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 17767354399704235156, + 2787669627718018145, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "4", + "4" + ], + [ + "reference", + "container-title", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 4292761212337338605, + 773134743697376497, + 18446744073709551615, + 18446744073709551615, + 177, + 245, + 177, + 245, + 38, + 48, + true, + "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi", + "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi" + ], + [ + "reference", + "location", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 16381206478137548706, + 9744551904329916157, + 18446744073709551615, + 18446744073709551615, + 247, + 253, + 247, + 253, + 49, + 51, + false, + "UAE, :", + "UAE, :" + ], + [ + "reference", + "title", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 14371818679908732529, + 10294554605073457499, + 18446744073709551615, + 18446744073709551615, + 52, + 174, + 52, + 174, + 17, + 36, + true, + "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019", + "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019" + ], + [ + "reference", + "url", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 7742135058095281026, + 17571544217117981683, + 18446744073709551615, + 18446744073709551615, + 257, + 268, + 257, + 268, + 53, + 54, + true, + "https://doi", + "https://doi" + ], + [ + "reference", + "url", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 14023706993569865773, + 12197548886916811054, + 18446744073709551615, + 18446744073709551615, + 270, + 291, + 270, + 291, + 55, + 62, + true, + "org/10.2118/197610-MS", + "org/10.2118/197610-MS" + ], + [ + "name", + "person-name", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8085653376282374091, + 8263479519718862087, + 18446744073709551615, + 18446744073709551615, + 42, + 63, + 42, + 63, + 14, + 18, + true, + "Costas B Application", + "Costas B. Application" + ], + [ + "expression", + "wtoken-concatenation", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 12178341415896216312, + 16626963629120408490, + 18446744073709551615, + 18446744073709551615, + 252, + 255, + 252, + 255, + 51, + 52, + true, + ":10", + ":10" + ], + [ + "expression", + "wtoken-concatenation", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 7742135058095281026, + 17571544217117981683, + 18446744073709551615, + 18446744073709551615, + 257, + 268, + 257, + 268, + 53, + 54, + true, + "https://doi", + "https://doi" + ], + [ + "sentence", + "", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 18201779004646765015, + 6525534015679273683, + 18446744073709551615, + 18446744073709551615, + 3, + 176, + 3, + 176, + 2, + 38, + true, + "Paolo R, Marco P, Floriana B, Peter S, Costas B. Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019).", + "Paolo R, Marco P, Floriana B, Peter S, Costas B. Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019)." + ], + [ + "sentence", + "", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8047292080261477252, + 3759318704396486843, + 18446744073709551615, + 18446744073709551615, + 177, + 256, + 177, + 256, + 38, + 53, + true, + "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10.", + "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10." + ], + [ + "sentence", + "", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 5857244370669890274, + 17990747492643866277, + 18446744073709551615, + 18446744073709551615, + 257, + 269, + 257, + 269, + 53, + 55, + true, + "https://doi.", + "https://doi." + ], + [ + "sentence", + "", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 12178341415896278272, + 16627063529584884912, + 18446744073709551615, + 18446744073709551615, + 289, + 292, + 289, + 292, + 61, + 63, + true, + "MS.", + "MS." + ], + [ + "term", + "enum-term-mark-4", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 16886334362487979110, + 15663185702252571998, + 18446744073709551615, + 18446744073709551615, + 96, + 129, + 96, + 129, + 22, + 27, + true, + "Basin & Petroleum System Analyses", + "Basin & Petroleum System Analyses" + ], + [ + "term", + "enum-term-mark-4", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 5050260885807546595, + 15713736725428197202, + 18446744073709551615, + 18446744073709551615, + 177, + 234, + 177, + 234, + 38, + 45, + true, + "Abu Dhabi International Petroleum Exhibition & Conference", + "Abu Dhabi International Petroleum Exhibition & Conference" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8106352039693059414, + 189526913306248274, + 18446744073709551615, + 18446744073709551615, + 3, + 10, + 3, + 10, + 2, + 4, + true, + "Paolo R", + "Paolo R" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8106471247241844081, + 12829126084417792103, + 18446744073709551615, + 18446744073709551615, + 12, + 19, + 12, + 19, + 5, + 7, + true, + "Marco P", + "Marco P" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 15356089124994678984, + 18000216761919637454, + 18446744073709551615, + 18446744073709551615, + 21, + 31, + 21, + 31, + 8, + 10, + true, + "Floriana B", + "Floriana B" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8106352035144611657, + 2775049790770760163, + 18446744073709551615, + 18446744073709551615, + 33, + 40, + 33, + 40, + 11, + 13, + true, + "Peter S", + "Peter S" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 10490888699425605498, + 7759298453024144101, + 18446744073709551615, + 18446744073709551615, + 67, + 92, + 67, + 92, + 19, + 21, + true, + "Geocognitive Technologies", + "Geocognitive Technologies" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 6282754256473030155, + 5108804280048681346, + 18446744073709551615, + 18446744073709551615, + 104, + 129, + 104, + 129, + 24, + 27, + true, + "Petroleum System Analyses", + "Petroleum System Analyses" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 1957667287048702282, + 14636254714870854935, + 18446744073709551615, + 18446744073709551615, + 149, + 168, + 149, + 168, + 32, + 34, + true, + "Petroleum Engineers", + "Petroleum Engineers" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 1607524687542961615, + 11251152050070639401, + 18446744073709551615, + 18446744073709551615, + 177, + 221, + 177, + 221, + 38, + 43, + true, + "Abu Dhabi International Petroleum Exhibition", + "Abu Dhabi International Petroleum Exhibition" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 6563080480676520350, + 6473658907824821571, + 18446744073709551615, + 18446744073709551615, + 236, + 245, + 236, + 245, + 46, + 48, + true, + "Abu Dhabi", + "Abu Dhabi" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 329104162065456823, + 743590677500510925, + 18446744073709551615, + 18446744073709551615, + 96, + 101, + 96, + 101, + 22, + 23, + true, + "Basin", + "Basin" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 329104161846736203, + 3663224801483387974, + 18446744073709551615, + 18446744073709551615, + 131, + 136, + 131, + 136, + 28, + 29, + true, + "Texas", + "Texas" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8106352717733900272, + 2525138794754357211, + 18446744073709551615, + 18446744073709551615, + 138, + 145, + 138, + 145, + 30, + 31, + true, + "Society", + "Society" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 969963630422387313, + 12193752692984421564, + 18446744073709551615, + 18446744073709551615, + 224, + 234, + 224, + 234, + 44, + 45, + true, + "Conference", + "Conference" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 12178341415895651112, + 16626961418662622345, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 49, + 50, + true, + "UAE", + "UAE" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 7742135058095281026, + 17571544217117981683, + 18446744073709551615, + 18446744073709551615, + 257, + 268, + 257, + 268, + 53, + 54, + true, + "https://doi", + "https://doi" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 12178341415895623052, + 16626965306587567269, + 18446744073709551615, + 18446744073709551615, + 270, + 273, + 270, + 273, + 55, + 56, + true, + "org", + "org" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 15441160910541480634, + 13393758459708113122, + 18446744073709551615, + 18446744073709551615, + 289, + 291, + 289, + 291, + 61, + 62, + true, + "MS", + "MS" + ], + [ + "reference", + "author", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 3027248490321213074, + 16283814403211008850, + 18446744073709551615, + 18446744073709551615, + 3, + 14, + 3, + 14, + 2, + 4, + true, + "Guillaume L", + "Guillaume L" + ], + [ + "reference", + "author", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 14650310996645589292, + 14357325801323977565, + 18446744073709551615, + 18446744073709551615, + 16, + 24, + 16, + 24, + 5, + 7, + true, + "Miguel B", + "Miguel B" + ], + [ + "reference", + "author", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 6049415556904669075, + 4491667145265607561, + 18446744073709551615, + 18446744073709551615, + 26, + 35, + 26, + 35, + 8, + 10, + true, + "Sandeep S", + "Sandeep S" + ], + [ + "reference", + "author", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 14650438760956024332, + 12941354247565292233, + 18446744073709551615, + 18446744073709551615, + 37, + 45, + 37, + 45, + 11, + 13, + true, + "Kazuya K", + "Kazuya K" + ], + [ + "reference", + "author", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 14650449385951782031, + 12018837533588020118, + 18446744073709551615, + 18446744073709551615, + 47, + 55, + 47, + 55, + 14, + 17, + true, + "Chris D.", + "Chris D." + ], + [ + "reference", + "citation-number", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 17767354399704235157, + 9080683344301571175, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "5", + "5" + ], + [ + "name", + "person-name", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 9660792047811639733, + 1337128772214092214, + 18446744073709551615, + 18446744073709551615, + 47, + 62, + 47, + 62, + 14, + 18, + true, + "Chris D Neural", + "Chris D. Neural" + ], + [ + "sentence", + "", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 4850647667861134344, + 5505550372083259738, + 18446744073709551615, + 18446744073709551615, + 3, + 171, + 3, + 171, + 2, + 34, + true, + "Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D. Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics; 2016.", + "Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D. Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics; 2016." + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 3027248490321213074, + 16283814403211008850, + 18446744073709551615, + 18446744073709551615, + 3, + 14, + 3, + 14, + 2, + 4, + true, + "Guillaume L", + "Guillaume L" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 14650310996645589292, + 14357325801323977565, + 18446744073709551615, + 18446744073709551615, + 16, + 24, + 16, + 24, + 5, + 7, + true, + "Miguel B", + "Miguel B" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 6049415556904669075, + 4491667145265607561, + 18446744073709551615, + 18446744073709551615, + 26, + 35, + 26, + 35, + 8, + 10, + true, + "Sandeep S", + "Sandeep S" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 14650438760956024332, + 12941354247565292233, + 18446744073709551615, + 18446744073709551615, + 37, + 45, + 37, + 45, + 11, + 13, + true, + "Kazuya K", + "Kazuya K" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 9764460566162632960, + 16642624291773848144, + 18446744073709551615, + 18446744073709551615, + 56, + 76, + 56, + 76, + 17, + 19, + true, + "Neural Architectures", + "Neural Architectures" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 9361732498059105399, + 13956982048443319080, + 18446744073709551615, + 18446744073709551615, + 81, + 105, + 81, + 105, + 20, + 23, + true, + "Named Entity Recognition", + "Named Entity Recognition" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 295551369126235776, + 10028312936701107065, + 18446744073709551615, + 18446744073709551615, + 107, + 121, + 107, + 121, + 24, + 26, + true, + "Stroudsburg PA", + "Stroudsburg PA" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 16550803490317182276, + 1120983039058145469, + 18446744073709551615, + 18446744073709551615, + 139, + 164, + 139, + 164, + 29, + 31, + true, + "Computational Linguistics", + "Computational Linguistics" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 7719000109186773037, + 1029739931494122980, + 18446744073709551615, + 18446744073709551615, + 123, + 134, + 123, + 134, + 27, + 28, + true, + "Association", + "Association" + ], + [ + "reference", + "author", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 12139207556299923335, + 12395232115938598978, + 18446744073709551615, + 18446744073709551615, + 3, + 16, + 3, + 16, + 2, + 5, + true, + "Chiu Jason PC", + "Chiu Jason PC" + ], + [ + "reference", + "author", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 8106350848262626922, + 5052428205716655678, + 18446744073709551615, + 18446744073709551615, + 18, + 25, + 18, + 25, + 6, + 9, + true, + "Eric N.", + "Eric N." + ], + [ + "reference", + "citation-number", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 17767354399704235158, + 2935027410945303089, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "6", + "6" + ], + [ + "reference", + "date", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 389609625548777056, + 1668465275038003542, + 18446744073709551615, + 18446744073709551615, + 87, + 91, + 87, + 91, + 18, + 19, + true, + "2016", + "2016" + ], + [ + "reference", + "journal", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 389609625541773713, + 1712767977156820574, + 18446744073709551615, + 18446744073709551615, + 81, + 85, + 81, + 85, + 16, + 17, + true, + "TACL", + "TACL" + ], + [ + "reference", + "title", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 16636370883913883252, + 5810162511985509685, + 18446744073709551615, + 18446744073709551615, + 26, + 79, + 26, + 79, + 9, + 15, + true, + "Named entity recognition with bidirectional LSTM-CNNs", + "Named entity recognition with bidirectional LSTM-CNNs" + ], + [ + "reference", + "title", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 9584872678510603869, + 10893893406063870923, + 18446744073709551615, + 18446744073709551615, + 91, + 101, + 91, + 101, + 19, + 23, + true, + ";4:357-370", + ";4:357-370" + ], + [ + "name", + "person-name", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 8669939107464889919, + 15575238431294334172, + 18446744073709551615, + 18446744073709551615, + 18, + 31, + 18, + 31, + 6, + 10, + true, + "Eric N Named", + "Eric N. Named" + ], + [ + "expression", + "word-concatenation", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 6627885913248971716, + 14160903326793315633, + 18446744073709551615, + 18446744073709551615, + 70, + 79, + 70, + 79, + 14, + 15, + true, + "LSTM-CNNs", + "LSTM-CNNs" + ], + [ + "sentence", + "", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 16149852804597872204, + 9077761784460652022, + 18446744073709551615, + 18446744073709551615, + 3, + 80, + 3, + 80, + 2, + 16, + true, + "Chiu Jason PC, Eric N. Named entity recognition with bidirectional LSTM-CNNs.", + "Chiu Jason PC, Eric N. Named entity recognition with bidirectional LSTM-CNNs." + ], + [ + "sentence", + "", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 329104162087785161, + 9946009003015376905, + 18446744073709551615, + 18446744073709551615, + 81, + 86, + 81, + 86, + 16, + 18, + true, + "TACL.", + "TACL." + ], + [ + "term", + "single-term", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 12139207556299923335, + 12395232115938598978, + 18446744073709551615, + 18446744073709551615, + 3, + 16, + 3, + 16, + 2, + 5, + true, + "Chiu Jason PC", + "Chiu Jason PC" + ], + [ + "term", + "single-term", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 6624594430573868561, + 9300875014556458820, + 18446744073709551615, + 18446744073709551615, + 32, + 50, + 32, + 50, + 10, + 12, + true, + "entity recognition", + "entity recognition" + ], + [ + "term", + "single-term", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 190135919074194296, + 9627057232939807256, + 18446744073709551615, + 18446744073709551615, + 56, + 79, + 56, + 79, + 13, + 15, + true, + "bidirectional LSTM-CNNs", + "bidirectional LSTM-CNNs" + ], + [ + "term", + "single-term", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 389609625541773713, + 1712767977156820574, + 18446744073709551615, + 18446744073709551615, + 81, + 85, + 81, + 85, + 16, + 17, + true, + "TACL", + "TACL" + ], + [ + "term", + "single-term", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 8104407397552891367, + 18037615861815767977, + 18446744073709551615, + 18446744073709551615, + 94, + 101, + 94, + 101, + 22, + 23, + true, + "357-370", + "357-370" + ], + [ + "reference", + "author", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 6611312511369759405, + 3019524304480366334, + 18446744073709551615, + 18446744073709551615, + 3, + 12, + 3, + 12, + 2, + 4, + true, + "Matthew H", + "Matthew H" + ], + [ + "reference", + "author", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 8106350362383531053, + 10877267985434630613, + 18446744073709551615, + 18446744073709551615, + 14, + 21, + 14, + 21, + 5, + 8, + true, + "Ines M.", + "Ines M." + ], + [ + "reference", + "citation-number", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 17767354399704235159, + 17892509173094146701, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "7", + "7" + ], + [ + "reference", + "date", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 389609625548777057, + 14192492111179186414, + 18446744073709551615, + 18446744073709551615, + 151, + 155, + 151, + 155, + 28, + 29, + true, + "2017", + "2017" + ], + [ + "reference", + "title", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 8673657110667713983, + 2132423457048291450, + 18446744073709551615, + 18446744073709551615, + 22, + 138, + 22, + 138, + 8, + 24, + true, + "spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing", + "spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing" + ], + [ + "sentence", + "", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 7307445048043317682, + 2555209552433482279, + 18446744073709551615, + 18446744073709551615, + 3, + 21, + 3, + 21, + 2, + 8, + true, + "Matthew H, Ines M.", + "Matthew H, Ines M." + ], + [ + "sentence", + "", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 18422321729110645108, + 14077627261602009953, + 18446744073709551615, + 18446744073709551615, + 25, + 139, + 25, + 139, + 8, + 25, + true, + "Cy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing.", + "Cy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing." + ], + [ + "sentence", + "", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 15865958309310945968, + 6557910677090579622, + 18446744073709551615, + 18446744073709551615, + 140, + 150, + 140, + 150, + 25, + 28, + true, + "To appear.", + "To appear." + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 6611312511369759405, + 3019524304480366334, + 18446744073709551615, + 18446744073709551615, + 3, + 12, + 3, + 12, + 2, + 4, + true, + "Matthew H", + "Matthew H" + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 16380809977742382038, + 12118911120655365706, + 18446744073709551615, + 18446744073709551615, + 14, + 20, + 14, + 20, + 5, + 7, + true, + "Ines M", + "Ines M" + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 3070945404202872591, + 4809079122368752762, + 18446744073709551615, + 18446744073709551615, + 31, + 47, + 31, + 47, + 11, + 13, + true, + "natural language", + "natural language" + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 18136559691621189433, + 7438263735663259264, + 18446744073709551615, + 18446744073709551615, + 67, + 83, + 67, + 83, + 15, + 17, + true, + "bloom embeddings", + "bloom embeddings" + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 5074039139067361158, + 2646875362836900663, + 18446744073709551615, + 18446744073709551615, + 85, + 114, + 85, + 114, + 18, + 21, + true, + "convolutional neural networks", + "convolutional neural networks" + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 2536592842635865927, + 13814970629892288506, + 18446744073709551615, + 18446744073709551615, + 119, + 138, + 119, + 138, + 22, + 24, + true, + "incremental parsing", + "incremental parsing" + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 329104161639064018, + 15555581995259937868, + 18446744073709551615, + 18446744073709551615, + 22, + 27, + 22, + 27, + 8, + 9, + true, + "spaCy", + "spaCy" + ], + [ + "reference", + "author", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 6627095272342846459, + 8960025720845820047, + 18446744073709551615, + 18446744073709551615, + 3, + 12, + 3, + 12, + 2, + 4, + true, + "Magoon LB", + "Magoon LB" + ], + [ + "reference", + "author", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 6563582333827106756, + 4026322596752919867, + 18446744073709551615, + 18446744073709551615, + 14, + 23, + 14, + 23, + 5, + 7, + true, + "Hudson TL", + "Hudson TL" + ], + [ + "reference", + "author", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 1612814864176813785, + 12195293078214673428, + 18446744073709551615, + 18446744073709551615, + 25, + 35, + 25, + 35, + 8, + 11, + true, + "Peters KE.", + "Peters KE." + ], + [ + "reference", + "citation-number", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 17767354399704235152, + 15651484829649486928, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "8", + "8" + ], + [ + "reference", + "date", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 329104147695665975, + 7749771140976442, + 18446744073709551615, + 18446744073709551615, + 163, + 168, + 163, + 168, + 33, + 35, + true, + "2005;", + "2005;" + ], + [ + "reference", + "journal", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 14445748745948696227, + 6494504935180328364, + 18446744073709551615, + 18446744073709551615, + 139, + 161, + 139, + 161, + 27, + 32, + true, + "Am Assoc Pet Geol Bull", + "Am Assoc Pet Geol Bull" + ], + [ + "reference", + "title", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 10827383077041810226, + 7289787549141850214, + 18446744073709551615, + 18446744073709551615, + 36, + 52, + 36, + 52, + 11, + 11, + false, + "Egret-Hibernia(!", + "Egret-Hibernia(!" + ], + [ + "reference", + "title", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 8991166294068381652, + 13146587142049422219, + 18446744073709551615, + 18446744073709551615, + 55, + 137, + 55, + 137, + 13, + 26, + true, + "a significant petroleum system, northern Grand Banks area, offshore eastern Canada", + "a significant petroleum system, northern Grand Banks area, offshore eastern Canada" + ], + [ + "name", + "person-name", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 8106396146029139849, + 4756990361978769041, + 18446744073709551615, + 18446744073709551615, + 33, + 41, + 33, + 41, + 9, + 11, + false, + "E Egret", + "E. Egret" + ], + [ + "parenthesis", + "reference", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 12178341415896392564, + 5085929270105113532, + 18446744073709551615, + 18446744073709551615, + 170, + 173, + 170, + 173, + 36, + 39, + true, + "(9)", + "(9)" + ], + [ + "parenthesis", + "round brackets", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 12178341415896398102, + 5085731941478390906, + 18446744073709551615, + 18446744073709551615, + 50, + 53, + 50, + 53, + 11, + 12, + false, + "(!)", + "(!)" + ], + [ + "expression", + "wtoken-concatenation", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 17069592624661941498, + 4631155417717592897, + 18446744073709551615, + 18446744073709551615, + 36, + 53, + 36, + 53, + 11, + 12, + true, + "Egret-Hibernia(!)", + "Egret-Hibernia(!)" + ], + [ + "sentence", + "", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 14351638638233132553, + 9869358074812010297, + 18446744073709551615, + 18446744073709551615, + 3, + 138, + 3, + 138, + 2, + 27, + true, + "Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!), a significant petroleum system, northern Grand Banks area, offshore eastern Canada.", + "Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!), a significant petroleum system, northern Grand Banks area, offshore eastern Canada." + ], + [ + "sentence", + "", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 3239699829819496083, + 8140375803971345791, + 18446744073709551615, + 18446744073709551615, + 139, + 162, + 139, + 162, + 27, + 33, + true, + "Am Assoc Pet Geol Bull.", + "Am Assoc Pet Geol Bull." + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 6627095272342846459, + 8960025720845820047, + 18446744073709551615, + 18446744073709551615, + 3, + 12, + 3, + 12, + 2, + 4, + true, + "Magoon LB", + "Magoon LB" + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 6563582333827106756, + 4026322596752919867, + 18446744073709551615, + 18446744073709551615, + 14, + 23, + 14, + 23, + 5, + 7, + true, + "Hudson TL", + "Hudson TL" + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 2902914387278523955, + 5025725615021492664, + 18446744073709551615, + 18446744073709551615, + 25, + 34, + 25, + 34, + 8, + 10, + true, + "Peters KE", + "Peters KE" + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 2654341348145270052, + 13911007437444991428, + 18446744073709551615, + 18446744073709551615, + 57, + 85, + 57, + 85, + 14, + 17, + true, + "significant petroleum system", + "significant petroleum system" + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 5020053208872345017, + 12778217125270746067, + 18446744073709551615, + 18446744073709551615, + 87, + 112, + 87, + 112, + 18, + 22, + true, + "northern Grand Banks area", + "northern Grand Banks area" + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 11433597316182704533, + 321742181372268229, + 18446744073709551615, + 18446744073709551615, + 123, + 137, + 123, + 137, + 24, + 26, + true, + "eastern Canada", + "eastern Canada" + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 14445748745948696227, + 6494504935180328364, + 18446744073709551615, + 18446744073709551615, + 139, + 161, + 139, + 161, + 27, + 32, + true, + "Am Assoc Pet Geol Bull", + "Am Assoc Pet Geol Bull" + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 17069592624661941498, + 4631155417717592897, + 18446744073709551615, + 18446744073709551615, + 36, + 53, + 36, + 53, + 11, + 12, + true, + "Egret-Hibernia(!)", + "Egret-Hibernia(!)" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 389609625548777262, + 8826555294676663632, + 18446744073709551615, + 18446744073709551615, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 389609625548777251, + 8826555296349648778, + 18446744073709551615, + 18446744073709551615, + 119, + 123, + 119, + 123, + 14, + 14, + false, + "2023", + "2023" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 8104408072666212335, + 13552219042525319352, + 18446744073709551615, + 18446744073709551615, + 71, + 78, + 71, + 78, + 8, + 8, + false, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 389609625548868096, + 8826558551385119058, + 18446744073709551615, + 18446744073709551615, + 82, + 86, + 82, + 86, + 8, + 9, + false, + "2.20", + "2.20" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 14654386914267794441, + 12796143052106760105, + 18446744073709551615, + 18446744073709551615, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 17767354399704235162, + 7753390158484899261, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 15441160910541481791, + 3518619573290839093, + 18446744073709551615, + 18446744073709551615, + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 15441160910541481543, + 3518617976696906498, + 18446744073709551615, + 18446744073709551615, + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 8536069645534292969, + 16063604623463467342, + 18446744073709551615, + 18446744073709551615, + 35, + 87, + 35, + 87, + 8, + 10, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 594099663775968682, + 14698211805947073928, + 18446744073709551615, + 18446744073709551615, + 156, + 208, + 156, + 208, + 22, + 37, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 1697220653346092555, + 8458710314769009562, + 18446744073709551615, + 18446744073709551615, + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 12213187056216195918, + 14309760985361468471, + 18446744073709551615, + 18446744073709551615, + 155, + 209, + 155, + 209, + 21, + 38, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 3856967589249015473, + 3576147774941915841, + 18446744073709551615, + 18446744073709551615, + 35, + 86, + 35, + 86, + 8, + 9, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "sentence", + "", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 10933383461306782608, + 10178418358179275356, + 18446744073709551615, + 18446744073709551615, + 19, + 125, + 19, + 125, + 6, + 16, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 12466457873768409517, + 3430070082404029638, + 18446744073709551615, + 18446744073709551615, + 88, + 108, + 88, + 108, + 10, + 13, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 12466457873768409517, + 3430070082403846184, + 18446744073709551615, + 18446744073709551615, + 213, + 233, + 213, + 233, + 39, + 42, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 10086796047802705645, + 11637015082128438412, + 18446744073709551615, + 18446744073709551615, + 252, + 263, + 252, + 263, + 47, + 49, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 6687370681685741393, + 17939310132506951168, + 18446744073709551615, + 18446744073709551615, + 284, + 319, + 284, + 319, + 53, + 57, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 329104161846385964, + 16017248647642597247, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 18, + 19, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 969969168017005656, + 2961182532179915323, + 18446744073709551615, + 18446744073709551615, + 144, + 154, + 144, + 154, + 20, + 21, + true, + "Conditions", + "Conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 329104161825278214, + 16021621362593374209, + 18446744073709551615, + 18446744073709551615, + 238, + 243, + 238, + 243, + 43, + 44, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 12178341415895516060, + 12061595171928625555, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 45, + 46, + true, + "use", + "use" + ], + [ + "reference", + "author", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 7087532328962869115, + 5488976721015347116, + 18446744073709551615, + 18446744073709551615, + 3, + 13, + 3, + 13, + 2, + 5, + true, + "Estrada E.", + "Estrada E." + ], + [ + "reference", + "citation-number", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 17767354399704235153, + 10433678415276841389, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "9", + "9" + ], + [ + "reference", + "date", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 8104407400303630267, + 3516783299715161152, + 18446744073709551615, + 18446744073709551615, + 67, + 74, + 67, + 74, + 15, + 18, + true, + "2005;71", + "2005;71" + ], + [ + "reference", + "journal", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 1821145667706451373, + 6349148037602643636, + 18446744073709551615, + 18446744073709551615, + 55, + 65, + 55, + 65, + 11, + 14, + true, + "Phys Rev E", + "Phys Rev E" + ], + [ + "reference", + "title", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 10002059539925749429, + 4038144589619849267, + 18446744073709551615, + 18446744073709551615, + 14, + 53, + 14, + 53, + 5, + 10, + true, + "Subgraph centrality in complex networks", + "Subgraph centrality in complex networks" + ], + [ + "name", + "person-name", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 5032660681914123489, + 10411767668009775523, + 18446744073709551615, + 18446744073709551615, + 3, + 22, + 3, + 22, + 2, + 6, + true, + "Estrada E Subgraph", + "Estrada E. Subgraph" + ], + [ + "parenthesis", + "reference", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 12178341415896395383, + 3095186558758793614, + 18446744073709551615, + 18446744073709551615, + 74, + 77, + 74, + 77, + 18, + 21, + true, + "(5)", + "(5)" + ], + [ + "sentence", + "", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 14211509953373686953, + 10442209004816950267, + 18446744073709551615, + 18446744073709551615, + 3, + 54, + 3, + 54, + 2, + 11, + true, + "Estrada E. Subgraph centrality in complex networks.", + "Estrada E. Subgraph centrality in complex networks." + ], + [ + "sentence", + "", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 11914250565653684629, + 14122288949077854502, + 18446744073709551615, + 18446744073709551615, + 55, + 66, + 55, + 66, + 11, + 15, + true, + "Phys Rev E.", + "Phys Rev E." + ], + [ + "term", + "single-term", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 13702393049667549173, + 5943650791086261175, + 18446744073709551615, + 18446744073709551615, + 14, + 33, + 14, + 33, + 5, + 7, + true, + "Subgraph centrality", + "Subgraph centrality" + ], + [ + "term", + "single-term", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 1651177076069931825, + 13122672563215344832, + 18446744073709551615, + 18446744073709551615, + 37, + 53, + 37, + 53, + 8, + 10, + true, + "complex networks", + "complex networks" + ], + [ + "term", + "single-term", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 1821145667706451373, + 6349148037602643636, + 18446744073709551615, + 18446744073709551615, + 55, + 65, + 55, + 65, + 11, + 14, + true, + "Phys Rev E", + "Phys Rev E" + ], + [ + "reference", + "author", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 2628812302410383486, + 8225541491002394036, + 18446744073709551615, + 18446744073709551615, + 4, + 19, + 4, + 19, + 2, + 4, + true, + "Estrada Ernesto", + "Estrada Ernesto" + ], + [ + "reference", + "author", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 17728567422753594500, + 4401840231895103727, + 18446744073709551615, + 18446744073709551615, + 21, + 38, + 21, + 38, + 5, + 9, + true, + "Higham Desmond J.", + "Higham Desmond J." + ], + [ + "reference", + "citation-number", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 15441160910541481982, + 2952327273286615865, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "10", + "10" + ], + [ + "reference", + "date", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 389609625548777062, + 8937154938925173833, + 18446744073709551615, + 18446744073709551615, + 40, + 44, + 40, + 44, + 10, + 11, + true, + "2010", + "2010" + ], + [ + "reference", + "journal", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 745633759305567859, + 2105664067016610109, + 18446744073709551615, + 18446744073709551615, + 47, + 112, + 47, + 112, + 13, + 22, + true, + "Network Properties Revealed through Matrix Functions. SIAM Review", + "Network Properties Revealed through Matrix Functions. SIAM Review" + ], + [ + "reference", + "url", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 16159594323378820687, + 15692242274322104012, + 18446744073709551615, + 18446744073709551615, + 132, + 167, + 132, + 167, + 31, + 44, + true, + "http://dx.doi.org/10.1137/090761070", + "http://dx.doi.org/10.1137/090761070" + ], + [ + "parenthesis", + "reference", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 16380808315360989994, + 7590928242002916775, + 18446744073709551615, + 18446744073709551615, + 39, + 45, + 39, + 45, + 9, + 12, + true, + "(2010)", + "(2010)" + ], + [ + "parenthesis", + "reference", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 12178341415896395057, + 2497165307488647522, + 18446744073709551615, + 18446744073709551615, + 118, + 121, + 118, + 121, + 25, + 28, + true, + "(4)", + "(4)" + ], + [ + "sentence", + "", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 15888965152791123369, + 13311487678580191662, + 18446744073709551615, + 18446744073709551615, + 4, + 38, + 4, + 38, + 2, + 9, + true, + "Estrada Ernesto, Higham Desmond J.", + "Estrada Ernesto, Higham Desmond J." + ], + [ + "sentence", + "", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 13313338743045791386, + 13496281760238992122, + 18446744073709551615, + 18446744073709551615, + 47, + 100, + 47, + 100, + 13, + 20, + true, + "Network Properties Revealed through Matrix Functions.", + "Network Properties Revealed through Matrix Functions." + ], + [ + "sentence", + "", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 12745772866621103425, + 14912721299200279248, + 18446744073709551615, + 18446744073709551615, + 101, + 131, + 101, + 131, + 20, + 31, + true, + "SIAM Review, 52, (4), 696-714.", + "SIAM Review, 52, (4), 696-714." + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 2628812302410383486, + 8225541491002394036, + 18446744073709551615, + 18446744073709551615, + 4, + 19, + 4, + 19, + 2, + 4, + true, + "Estrada Ernesto", + "Estrada Ernesto" + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 9810881374821281499, + 15294715577751716659, + 18446744073709551615, + 18446744073709551615, + 21, + 37, + 21, + 37, + 5, + 8, + true, + "Higham Desmond J", + "Higham Desmond J" + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 9529086943855412027, + 1909130811397866082, + 18446744073709551615, + 18446744073709551615, + 47, + 74, + 47, + 74, + 13, + 16, + true, + "Network Properties Revealed", + "Network Properties Revealed" + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 14050323403523305703, + 1711773991294684512, + 18446744073709551615, + 18446744073709551615, + 83, + 99, + 83, + 99, + 17, + 19, + true, + "Matrix Functions", + "Matrix Functions" + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 2746419737099405232, + 18061106767070096394, + 18446744073709551615, + 18446744073709551615, + 101, + 112, + 101, + 112, + 20, + 22, + true, + "SIAM Review", + "SIAM Review" + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 389609625695173007, + 8776546935861356993, + 18446744073709551615, + 18446744073709551615, + 132, + 136, + 132, + 136, + 31, + 32, + true, + "http", + "http" + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 15441160910541486860, + 2952325046422382730, + 18446744073709551615, + 18446744073709551615, + 139, + 141, + 139, + 141, + 35, + 36, + true, + "dx", + "dx" + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 12178341415895623052, + 2496395224268980578, + 18446744073709551615, + 18446744073709551615, + 146, + 149, + 146, + 149, + 39, + 40, + true, + "org", + "org" + ], + [ + "numval", + "year", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 389609625548777055, + 1517668227262464254, + 18446744073709551615, + 18446744073709551615, + 45, + 49, + 45, + 49, + 9, + 10, + true, + "2019", + "2019" + ], + [ + "numval", + "fval", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 12178341415896427355, + 7596226314134098818, + 18446744073709551615, + 18446744073709551615, + 40, + 43, + 40, + 43, + 7, + 8, + true, + "1.0", + "1.0" + ], + [ + "numval", + "ival", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 15441160910541481983, + 11293846485728944316, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "11", + "11" + ], + [ + "sentence", + "", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 2199715623168261348, + 14344176115087650584, + 18446744073709551615, + 18446744073709551615, + 4, + 15, + 4, + 15, + 2, + 5, + true, + "Labs Redis.", + "Labs Redis." + ], + [ + "sentence", + "", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 9890391113606841714, + 7310122424657160613, + 18446744073709551615, + 18446744073709551615, + 16, + 44, + 16, + 44, + 5, + 9, + true, + "Benchmarking RedisGraph 1.0.", + "Benchmarking RedisGraph 1.0." + ], + [ + "term", + "single-term", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 1413805758909278007, + 12182268615745487814, + 18446744073709551615, + 18446744073709551615, + 4, + 14, + 4, + 14, + 2, + 4, + true, + "Labs Redis", + "Labs Redis" + ], + [ + "term", + "single-term", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 9275871508708668772, + 5709957430710804157, + 18446744073709551615, + 18446744073709551615, + 29, + 39, + 29, + 39, + 6, + 7, + true, + "RedisGraph", + "RedisGraph" + ], + [ + "reference", + "author", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 15754713894443025139, + 17869835566751337591, + 18446744073709551615, + 18446744073709551615, + 4, + 15, + 4, + 15, + 2, + 4, + true, + "TigerGraph.", + "TigerGraph." + ], + [ + "reference", + "citation-number", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 15441160910541481976, + 12703724519968684238, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "12", + "12" + ], + [ + "reference", + "date", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 389609625548777054, + 3194806985827377522, + 18446744073709551615, + 18446744073709551615, + 47, + 51, + 47, + 51, + 11, + 12, + true, + "2018", + "2018" + ], + [ + "reference", + "title", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 17475892521501552303, + 8529795867214537154, + 18446744073709551615, + 18446744073709551615, + 16, + 45, + 16, + 45, + 4, + 10, + true, + "Real-Time Deep Link Analytics", + "Real-Time Deep Link Analytics" + ], + [ + "name", + "name-concatenation", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 5955741586810846236, + 15240428492191467486, + 18446744073709551615, + 18446744073709551615, + 16, + 25, + 16, + 25, + 4, + 7, + true, + "Real-Time", + "Real-Time" + ], + [ + "sentence", + "", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 15754713894443025139, + 17869835566751337591, + 18446744073709551615, + 18446744073709551615, + 4, + 15, + 4, + 15, + 2, + 4, + true, + "TigerGraph.", + "TigerGraph." + ], + [ + "sentence", + "", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 7946825277683884881, + 1230187338989102593, + 18446744073709551615, + 18446744073709551615, + 16, + 46, + 16, + 46, + 4, + 11, + true, + "Real-Time Deep Link Analytics.", + "Real-Time Deep Link Analytics." + ], + [ + "term", + "single-term", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 14317037945453024278, + 15123649660345785041, + 18446744073709551615, + 18446744073709551615, + 21, + 45, + 21, + 45, + 6, + 10, + true, + "Time Deep Link Analytics", + "Time Deep Link Analytics" + ], + [ + "term", + "single-term", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 15861880261780248619, + 9206162103335947231, + 18446744073709551615, + 18446744073709551615, + 4, + 14, + 4, + 14, + 2, + 3, + true, + "TigerGraph", + "TigerGraph" + ], + [ + "reference", + "author", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 14652280730090715542, + 9368048166047908224, + 18446744073709551615, + 18446744073709551615, + 4, + 12, + 4, + 12, + 2, + 4, + true, + "Jeremy K", + "Jeremy K" + ], + [ + "reference", + "author", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 8106396242733918714, + 2646308426186848374, + 18446744073709551615, + 18446744073709551615, + 14, + 21, + 14, + 21, + 5, + 8, + true, + "John G.", + "John G." + ], + [ + "reference", + "citation-number", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 15441160910541481977, + 12462842527617278799, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "13", + "13" + ], + [ + "reference", + "date", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 16381206542172555296, + 17521384641614480308, + 18446744073709551615, + 18446744073709551615, + 138, + 144, + 138, + 144, + 27, + 29, + true, + "; 2011", + "; 2011" + ], + [ + "reference", + "journal", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 1813266722082342225, + 593931840598100395, + 18446744073709551615, + 18446744073709551615, + 74, + 86, + 74, + 86, + 17, + 18, + true, + "Philadelphia", + "Philadelphia" + ], + [ + "reference", + "title", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 11539515714196318944, + 4409464707523225606, + 18446744073709551615, + 18446744073709551615, + 22, + 72, + 22, + 72, + 8, + 16, + true, + "Graph Algorithms in the Language of Linear Algebra", + "Graph Algorithms in the Language of Linear Algebra" + ], + [ + "name", + "person-name", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 4962934261580742358, + 3284808524522933032, + 18446744073709551615, + 18446744073709551615, + 14, + 27, + 14, + 27, + 5, + 9, + true, + "John G Graph", + "John G. Graph" + ], + [ + "sentence", + "", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 2261840617824203371, + 3833037035800633943, + 18446744073709551615, + 18446744073709551615, + 4, + 73, + 4, + 73, + 2, + 17, + true, + "Jeremy K, John G. Graph Algorithms in the Language of Linear Algebra.", + "Jeremy K, John G. Graph Algorithms in the Language of Linear Algebra." + ], + [ + "sentence", + "", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 3918811354618692965, + 10240964037709860462, + 18446744073709551615, + 18446744073709551615, + 74, + 145, + 74, + 145, + 17, + 30, + true, + "Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011.", + "Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011." + ], + [ + "term", + "enum-term-mark-4", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 17988145802762076819, + 11569702800846552129, + 18446744073709551615, + 18446744073709551615, + 104, + 138, + 104, + 138, + 23, + 27, + true, + "Industrial and Applied Mathematics", + "Industrial and Applied Mathematics" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 14652280730090715542, + 9368048166047908224, + 18446744073709551615, + 18446744073709551615, + 4, + 12, + 4, + 12, + 2, + 4, + true, + "Jeremy K", + "Jeremy K" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 14079274028767783387, + 17595184631762760537, + 18446744073709551615, + 18446744073709551615, + 22, + 38, + 22, + 38, + 8, + 10, + true, + "Graph Algorithms", + "Graph Algorithms" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 16513864209537702472, + 7141276361161445756, + 18446744073709551615, + 18446744073709551615, + 58, + 72, + 58, + 72, + 14, + 16, + true, + "Linear Algebra", + "Linear Algebra" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 7685464491762532718, + 11454351202197972573, + 18446744073709551615, + 18446744073709551615, + 119, + 138, + 119, + 138, + 25, + 27, + true, + "Applied Mathematics", + "Applied Mathematics" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 14650462612952067914, + 15224301288684964806, + 18446744073709551615, + 18446744073709551615, + 46, + 54, + 46, + 54, + 12, + 13, + true, + "Language", + "Language" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 1813266722082342225, + 593931840598100395, + 18446744073709551615, + 18446744073709551615, + 74, + 86, + 74, + 86, + 17, + 18, + true, + "Philadelphia", + "Philadelphia" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 15441160910541487654, + 12462847826366847251, + 18446744073709551615, + 18446744073709551615, + 88, + 90, + 88, + 90, + 19, + 20, + true, + "PA", + "PA" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 8106352717733900272, + 18316158962653956918, + 18446744073709551615, + 18446744073709551615, + 92, + 99, + 92, + 99, + 21, + 22, + true, + "Society", + "Society" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 7898186517875929489, + 6998199463972144020, + 18446744073709551615, + 18446744073709551615, + 104, + 114, + 104, + 114, + 23, + 24, + true, + "Industrial", + "Industrial" + ], + [ + "reference", + "author", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 3893756947393595038, + 15910484170600691612, + 18446744073709551615, + 18446744073709551615, + 4, + 17, + 4, + 17, + 2, + 4, + true, + "Kepner Jeremy", + "Kepner Jeremy" + ], + [ + "reference", + "author", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 4638041857648041651, + 2139644705806385528, + 18446744073709551615, + 18446744073709551615, + 19, + 30, + 19, + 30, + 5, + 7, + true, + "Bader David", + "Bader David" + ], + [ + "reference", + "author", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 9621725435760800320, + 4639858687526125642, + 18446744073709551615, + 18446744073709551615, + 32, + 47, + 32, + 45, + 8, + 12, + true, + "Bulu\u00e7 Ayd \u0131 n", + "Bulu\u00e7 Ayd \u0131 n" + ], + [ + "reference", + "author", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 978039607314331382, + 9008054255178396141, + 18446744073709551615, + 18446744073709551615, + 49, + 61, + 47, + 59, + 13, + 15, + true, + "Gilbert John", + "Gilbert John" + ], + [ + "reference", + "author", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 10968707392751490476, + 11627993516556341660, + 18446744073709551615, + 18446744073709551615, + 63, + 78, + 61, + 76, + 16, + 18, + true, + "Mattson Timothy", + "Mattson Timothy" + ], + [ + "reference", + "author", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 3010219124533777340, + 3552467627404320563, + 18446744073709551615, + 18446744073709551615, + 80, + 98, + 78, + 96, + 19, + 21, + true, + "Meyerhenke Henning", + "Meyerhenke Henning" + ], + [ + "reference", + "citation-number", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 15441160910541481978, + 9067685736347109846, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "14", + "14" + ], + [ + "reference", + "date", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 389609625548777059, + 3330964369910711146, + 18446744073709551615, + 18446744073709551615, + 100, + 104, + 98, + 102, + 22, + 23, + true, + "2015", + "2015" + ], + [ + "reference", + "date", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 389609625548777059, + 3330964369910703397, + 18446744073709551615, + 18446744073709551615, + 240, + 244, + 238, + 242, + 61, + 62, + true, + "2015", + "2015" + ], + [ + "reference", + "location", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 9440834537675533739, + 6746478687441634720, + 18446744073709551615, + 18446744073709551615, + 107, + 143, + 105, + 141, + 25, + 33, + true, + "Graphs, Matrices, and the GraphBLAS:", + "Graphs, Matrices, and the GraphBLAS:" + ], + [ + "reference", + "url", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 16959048237954323084, + 10596594611762835857, + 18446744073709551615, + 18446744073709551615, + 206, + 239, + 204, + 237, + 45, + 60, + true, + "http://dx.doi.org/10.1016/j.procs", + "http://dx.doi.org/10.1016/j.procs" + ], + [ + "parenthesis", + "reference", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 16380808315360990702, + 11846655963247696776, + 18446744073709551615, + 18446744073709551615, + 99, + 105, + 97, + 103, + 21, + 24, + true, + "(2015)", + "(2015)" + ], + [ + "sentence", + "", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 8027673259181526609, + 3901675832395310476, + 18446744073709551615, + 18446744073709551615, + 4, + 106, + 4, + 104, + 2, + 25, + true, + "Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning (2015).", + "Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning (2015)." + ], + [ + "sentence", + "", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 17293964586930460261, + 12804061004186124881, + 18446744073709551615, + 18446744073709551615, + 107, + 163, + 105, + 161, + 25, + 37, + true, + "Graphs, Matrices, and the GraphBLAS: Seven Good Reasons.", + "Graphs, Matrices, and the GraphBLAS: Seven Good Reasons." + ], + [ + "sentence", + "", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 6573516791222902756, + 17147560721361502235, + 18446744073709551615, + 18446744073709551615, + 164, + 205, + 162, + 203, + 37, + 45, + true, + "Procedia Computer Science, 51, 2453-2462.", + "Procedia Computer Science, 51, 2453-2462." + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 3893756947393595038, + 15910484170600691612, + 18446744073709551615, + 18446744073709551615, + 4, + 17, + 4, + 17, + 2, + 4, + true, + "Kepner Jeremy", + "Kepner Jeremy" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 4638041857648041651, + 2139644705806385528, + 18446744073709551615, + 18446744073709551615, + 19, + 30, + 19, + 30, + 5, + 7, + true, + "Bader David", + "Bader David" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 6559847563621387069, + 11479165544683600786, + 18446744073709551615, + 18446744073709551615, + 32, + 42, + 32, + 41, + 8, + 10, + true, + "Bulu\u00e7 Ayd", + "Bulu\u00e7 Ayd" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 978039607314331382, + 9008054255178396141, + 18446744073709551615, + 18446744073709551615, + 49, + 61, + 47, + 59, + 13, + 15, + true, + "Gilbert John", + "Gilbert John" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 10968707392751490476, + 11627993516556341660, + 18446744073709551615, + 18446744073709551615, + 63, + 78, + 61, + 76, + 16, + 18, + true, + "Mattson Timothy", + "Mattson Timothy" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 3010219124533777340, + 3552467627404320563, + 18446744073709551615, + 18446744073709551615, + 80, + 98, + 78, + 96, + 19, + 21, + true, + "Meyerhenke Henning", + "Meyerhenke Henning" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 10585062274889693433, + 394824704429372117, + 18446744073709551615, + 18446744073709551615, + 144, + 162, + 142, + 160, + 33, + 36, + true, + "Seven Good Reasons", + "Seven Good Reasons" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 11311803343161413167, + 2833609951174621747, + 18446744073709551615, + 18446744073709551615, + 164, + 189, + 162, + 187, + 37, + 40, + true, + "Procedia Computer Science", + "Procedia Computer Science" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 15441160910541481522, + 9067685766522309508, + 18446744073709551615, + 18446744073709551615, + 231, + 233, + 229, + 231, + 56, + 58, + true, + "/j", + "/j" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 16380809986240833363, + 17968496728215965151, + 18446744073709551615, + 18446744073709551615, + 107, + 113, + 105, + 111, + 25, + 26, + true, + "Graphs", + "Graphs" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 14650311457598610046, + 1458725737752079201, + 18446744073709551615, + 18446744073709551615, + 115, + 123, + 113, + 121, + 27, + 28, + true, + "Matrices", + "Matrices" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 6560668489345557302, + 13696029002884714705, + 18446744073709551615, + 18446744073709551615, + 133, + 142, + 131, + 140, + 31, + 32, + true, + "GraphBLAS", + "GraphBLAS" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 389609625695173007, + 3324915498141700280, + 18446744073709551615, + 18446744073709551615, + 206, + 210, + 204, + 208, + 45, + 46, + true, + "http", + "http" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 15441160910541486860, + 9067695028646494582, + 18446744073709551615, + 18446744073709551615, + 213, + 215, + 211, + 213, + 49, + 50, + true, + "dx", + "dx" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 12178341415895623052, + 2021307257966447238, + 18446744073709551615, + 18446744073709551615, + 220, + 223, + 218, + 221, + 53, + 54, + true, + "org", + "org" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 329104161588706802, + 14124897019871745005, + 18446744073709551615, + 18446744073709551615, + 234, + 239, + 232, + 237, + 59, + 60, + true, + "procs", + "procs" + ], + [ + "reference", + "author", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 8106396252822508385, + 7971302054101082514, + 18446744073709551615, + 18446744073709551615, + 4, + 11, + 4, + 11, + 2, + 4, + true, + "Aydin B", + "Aydin B" + ], + [ + "reference", + "author", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 3367556578117774584, + 5704823584998723957, + 18446744073709551615, + 18446744073709551615, + 13, + 28, + 13, + 28, + 5, + 9, + true, + "Gilbert John R.", + "Gilbert John R." + ], + [ + "reference", + "citation-number", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 15441160910541481979, + 10213682970367471311, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "15", + "15" + ], + [ + "reference", + "date", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 329104147695662665, + 13454856964816440075, + 18446744073709551615, + 18446744073709551615, + 127, + 132, + 127, + 132, + 27, + 27, + false, + "2011;", + "2011;" + ], + [ + "reference", + "journal", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 15067288891537767501, + 3357793480659482128, + 18446744073709551615, + 18446744073709551615, + 95, + 125, + 95, + 125, + 20, + 26, + true, + "Int J High Perform Comput Appl", + "Int J High Perform Comput Appl" + ], + [ + "reference", + "title", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 6150328359964540652, + 10199114762007747151, + 18446744073709551615, + 18446744073709551615, + 29, + 93, + 29, + 93, + 9, + 19, + true, + "The combinatorial BLAS: design, implementation, and applications", + "The combinatorial BLAS: design, implementation, and applications" + ], + [ + "name", + "person-name", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 17208477177194249305, + 13847153851054112206, + 18446744073709551615, + 18446744073709551615, + 13, + 32, + 13, + 32, + 5, + 10, + true, + "Gilbert John R The", + "Gilbert John R. The" + ], + [ + "parenthesis", + "reference", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 12178341415896395057, + 3824454373173587092, + 18446744073709551615, + 18446744073709551615, + 135, + 138, + 135, + 138, + 28, + 31, + true, + "(4)", + "(4)" + ], + [ + "expression", + "wtoken-concatenation", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 8104407400321262254, + 3429534335477953780, + 18446744073709551615, + 18446744073709551615, + 127, + 134, + 127, + 134, + 27, + 28, + true, + "2011;25", + "2011;25" + ], + [ + "sentence", + "", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 16743274342806123059, + 16250199669292766293, + 18446744073709551615, + 18446744073709551615, + 4, + 94, + 4, + 94, + 2, + 20, + true, + "Aydin B, Gilbert John R. The combinatorial BLAS: design, implementation, and applications.", + "Aydin B, Gilbert John R. The combinatorial BLAS: design, implementation, and applications." + ], + [ + "sentence", + "", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 738491526082319197, + 7325738847681902359, + 18446744073709551615, + 18446744073709551615, + 95, + 126, + 95, + 126, + 20, + 27, + true, + "Int J High Perform Comput Appl.", + "Int J High Perform Comput Appl." + ], + [ + "sentence", + "", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 17282712032868423392, + 7113339629628212811, + 18446744073709551615, + 18446744073709551615, + 127, + 147, + 127, + 147, + 27, + 34, + true, + "2011;25 (4):496-509.", + "2011;25 (4):496-509." + ], + [ + "term", + "single-term", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 8106396252822508385, + 7971302054101082514, + 18446744073709551615, + 18446744073709551615, + 4, + 11, + 4, + 11, + 2, + 4, + true, + "Aydin B", + "Aydin B" + ], + [ + "term", + "single-term", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 11111529766026683653, + 13196650859027091171, + 18446744073709551615, + 18446744073709551615, + 33, + 51, + 33, + 51, + 10, + 12, + true, + "combinatorial BLAS", + "combinatorial BLAS" + ], + [ + "term", + "single-term", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 15067288891537767501, + 3357793480659482128, + 18446744073709551615, + 18446744073709551615, + 95, + 125, + 95, + 125, + 20, + 26, + true, + "Int J High Perform Comput Appl", + "Int J High Perform Comput Appl" + ], + [ + "term", + "single-term", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 16381206568241679420, + 15760767362173066532, + 18446744073709551615, + 18446744073709551615, + 53, + 59, + 53, + 59, + 13, + 14, + true, + "design", + "design" + ], + [ + "term", + "single-term", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 16770038681622514616, + 12413351225926077106, + 18446744073709551615, + 18446744073709551615, + 61, + 75, + 61, + 75, + 15, + 16, + true, + "implementation", + "implementation" + ], + [ + "term", + "single-term", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 546291010477001669, + 10618604754194727447, + 18446744073709551615, + 18446744073709551615, + 81, + 93, + 81, + 93, + 18, + 19, + true, + "applications", + "applications" + ], + [ + "reference", + "author", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 14652280730090715542, + 12791881049692147803, + 18446744073709551615, + 18446744073709551615, + 4, + 12, + 4, + 12, + 2, + 4, + true, + "Jeremy K", + "Jeremy K" + ], + [ + "reference", + "author", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 8106352035144611671, + 4513564816050590788, + 18446744073709551615, + 18446744073709551615, + 14, + 21, + 14, + 21, + 5, + 7, + true, + "Peter A", + "Peter A" + ], + [ + "reference", + "author", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 11373457542276896833, + 10633744312666392907, + 18446744073709551615, + 18446744073709551615, + 23, + 36, + 23, + 36, + 8, + 11, + true, + "Bader David A", + "Bader David A" + ], + [ + "reference", + "citation-number", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 15441160910541481860, + 13099555958800192769, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "16", + "16" + ], + [ + "reference", + "container-title", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 10709633855219206820, + 961925091352749103, + 18446744073709551615, + 18446744073709551615, + 88, + 102, + 88, + 102, + 19, + 22, + true, + "2016 IEEE HPEC", + "2016 IEEE HPEC" + ], + [ + "reference", + "date", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 6573474049096193902, + 2260581871937703980, + 18446744073709551615, + 18446744073709551615, + 104, + 113, + 104, + 113, + 23, + 26, + true, + "2016; 1-9", + "2016; 1-9" + ], + [ + "reference", + "title", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 16641826418709048621, + 2282440200854755549, + 18446744073709551615, + 18446744073709551615, + 45, + 86, + 45, + 86, + 13, + 18, + true, + "Mathematical foundations of the GraphBLAS", + "Mathematical foundations of the GraphBLAS" + ], + [ + "expression", + "common", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 329104162180805867, + 691614670836427228, + 18446744073709551615, + 18446744073709551615, + 38, + 44, + 38, + 44, + 12, + 13, + true, + "et al", + "et al." + ], + [ + "sentence", + "", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 11950444114006552808, + 9443399481099004568, + 18446744073709551615, + 18446744073709551615, + 4, + 87, + 4, + 87, + 2, + 19, + true, + "Jeremy K, Peter A, Bader David A, et al. Mathematical foundations of the GraphBLAS.", + "Jeremy K, Peter A, Bader David A, et al. Mathematical foundations of the GraphBLAS." + ], + [ + "sentence", + "", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 515474695412696961, + 6296343322569991622, + 18446744073709551615, + 18446744073709551615, + 93, + 103, + 93, + 103, + 20, + 23, + true, + "IEEE HPEC.", + "IEEE HPEC." + ], + [ + "term", + "single-term", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 14652280730090715542, + 12791881049692147803, + 18446744073709551615, + 18446744073709551615, + 4, + 12, + 4, + 12, + 2, + 4, + true, + "Jeremy K", + "Jeremy K" + ], + [ + "term", + "single-term", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 8106352035144611671, + 4513564816050590788, + 18446744073709551615, + 18446744073709551615, + 14, + 21, + 14, + 21, + 5, + 7, + true, + "Peter A", + "Peter A" + ], + [ + "term", + "single-term", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 11373457542276896833, + 10633744312666392907, + 18446744073709551615, + 18446744073709551615, + 23, + 36, + 23, + 36, + 8, + 11, + true, + "Bader David A", + "Bader David A" + ], + [ + "term", + "single-term", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 1710816933025300174, + 12692422355800601705, + 18446744073709551615, + 18446744073709551615, + 38, + 69, + 38, + 69, + 12, + 15, + true, + "et al Mathematical foundations", + "et al. Mathematical foundations" + ], + [ + "term", + "single-term", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 6560769162974074266, + 16780287060117651276, + 18446744073709551615, + 18446744073709551615, + 93, + 102, + 93, + 102, + 20, + 22, + true, + "IEEE HPEC", + "IEEE HPEC" + ], + [ + "term", + "single-term", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 6560668489345557302, + 18401509678758935464, + 18446744073709551615, + 18446744073709551615, + 77, + 86, + 77, + 86, + 17, + 18, + true, + "GraphBLAS", + "GraphBLAS" + ], + [ + "reference", + "author", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 14650296444613217893, + 2015187192231796797, + 18446744073709551615, + 18446744073709551615, + 4, + 12, + 4, + 12, + 2, + 4, + true, + "Ariful A", + "Ariful A" + ], + [ + "reference", + "author", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 6611311853662317003, + 219996680584521934, + 18446744073709551615, + 18446744073709551615, + 14, + 23, + 14, + 23, + 5, + 7, + true, + "Mathias J", + "Mathias J" + ], + [ + "reference", + "author", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 8106396252822508385, + 5214697480984905265, + 18446744073709551615, + 18446744073709551615, + 25, + 32, + 25, + 32, + 8, + 10, + true, + "Aydin B", + "Aydin B" + ], + [ + "reference", + "author", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 1138450846564361539, + 13516232875802125645, + 18446744073709551615, + 18446744073709551615, + 34, + 46, + 34, + 46, + 11, + 15, + true, + "Ng Esmond G.", + "Ng Esmond G." + ], + [ + "reference", + "citation-number", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 15441160910541481861, + 5749903657566610070, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "17", + "17" + ], + [ + "reference", + "container-title", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 10701056912570859123, + 6872071652706022831, + 18446744073709551615, + 18446744073709551615, + 106, + 175, + 106, + 175, + 22, + 30, + true, + "2017 IEEE International Parallel and Distributed Processing Symposium", + "2017 IEEE International Parallel and Distributed Processing Symposium" + ], + [ + "reference", + "container-title", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 329104161866629985, + 4498077561104002021, + 18446744073709551615, + 18446744073709551615, + 177, + 182, + 177, + 182, + 31, + 32, + true, + "IPDPS", + "IPDPS" + ], + [ + "reference", + "date", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 7366731910384143591, + 4074534479596534226, + 18446744073709551615, + 18446744073709551615, + 185, + 196, + 185, + 196, + 34, + 37, + true, + "2017: 22-31", + "2017: 22-31" + ], + [ + "reference", + "title", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 18143113072209505450, + 5317689214231344382, + 18446744073709551615, + 18446744073709551615, + 47, + 104, + 47, + 104, + 15, + 21, + true, + "The reverse Cuthill-McKee algorithm in distributed-memory", + "The reverse Cuthill-McKee algorithm in distributed-memory" + ], + [ + "name", + "name-concatenation", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 961990462724452746, + 8774024725617322003, + 18446744073709551615, + 18446744073709551615, + 59, + 69, + 59, + 69, + 17, + 17, + false, + "Cuthill-Mc", + "Cuthill-Mc" + ], + [ + "parenthesis", + "round brackets", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 8106341251871154495, + 14756599361695942733, + 18446744073709551615, + 18446744073709551615, + 176, + 183, + 176, + 183, + 30, + 33, + true, + "(IPDPS)", + "(IPDPS)" + ], + [ + "expression", + "word-concatenation", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 17823074998039859280, + 107872343608001032, + 18446744073709551615, + 18446744073709551615, + 59, + 72, + 59, + 72, + 17, + 18, + true, + "Cuthill-McKee", + "Cuthill-McKee" + ], + [ + "expression", + "word-concatenation", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 4632629274084093489, + 13294623612112829573, + 18446744073709551615, + 18446744073709551615, + 86, + 104, + 86, + 104, + 20, + 21, + true, + "distributed-memory", + "distributed-memory" + ], + [ + "sentence", + "", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 8625319757665987765, + 8201554304424324610, + 18446744073709551615, + 18446744073709551615, + 4, + 46, + 4, + 46, + 2, + 15, + true, + "Ariful A, Mathias J, Aydin B, Ng Esmond G.", + "Ariful A, Mathias J, Aydin B, Ng Esmond G." + ], + [ + "sentence", + "", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 12182767863167085705, + 5974957861280769301, + 18446744073709551615, + 18446744073709551615, + 47, + 105, + 47, + 105, + 15, + 22, + true, + "The reverse Cuthill-McKee algorithm in distributed-memory.", + "The reverse Cuthill-McKee algorithm in distributed-memory." + ], + [ + "sentence", + "", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 10555308991053583656, + 8625840606506711403, + 18446744073709551615, + 18446744073709551615, + 111, + 184, + 111, + 184, + 23, + 34, + true, + "IEEE International Parallel and Distributed Processing Symposium (IPDPS).", + "IEEE International Parallel and Distributed Processing Symposium (IPDPS)." + ], + [ + "term", + "enum-term-mark-4", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 8480286396580246383, + 10467027636114534702, + 18446744073709551615, + 18446744073709551615, + 111, + 175, + 111, + 175, + 23, + 30, + true, + "IEEE International Parallel and Distributed Processing Symposium", + "IEEE International Parallel and Distributed Processing Symposium" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 14650296444613217893, + 2015187192231796797, + 18446744073709551615, + 18446744073709551615, + 4, + 12, + 4, + 12, + 2, + 4, + true, + "Ariful A", + "Ariful A" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 6611311853662317003, + 219996680584521934, + 18446744073709551615, + 18446744073709551615, + 14, + 23, + 14, + 23, + 5, + 7, + true, + "Mathias J", + "Mathias J" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 8106396252822508385, + 5214697480984905265, + 18446744073709551615, + 18446744073709551615, + 25, + 32, + 25, + 32, + 8, + 10, + true, + "Aydin B", + "Aydin B" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 7695963223911460273, + 9674237366617659132, + 18446744073709551615, + 18446744073709551615, + 34, + 45, + 34, + 45, + 11, + 14, + true, + "Ng Esmond G", + "Ng Esmond G" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 2782012163530472710, + 8155961828160311400, + 18446744073709551615, + 18446744073709551615, + 51, + 72, + 51, + 72, + 16, + 18, + true, + "reverse Cuthill-McKee", + "reverse Cuthill-McKee" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 8242037745725614235, + 1744334934210275218, + 18446744073709551615, + 18446744073709551615, + 111, + 138, + 111, + 138, + 23, + 26, + true, + "IEEE International Parallel", + "IEEE International Parallel" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 1179067127764944952, + 11104445064314775390, + 18446744073709551615, + 18446744073709551615, + 143, + 175, + 143, + 175, + 27, + 30, + true, + "Distributed Processing Symposium", + "Distributed Processing Symposium" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 4632629274084093489, + 13294623612112829573, + 18446744073709551615, + 18446744073709551615, + 86, + 104, + 86, + 104, + 20, + 21, + true, + "distributed-memory", + "distributed-memory" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 329104161866629985, + 4498077561104002021, + 18446744073709551615, + 18446744073709551615, + 177, + 182, + 177, + 182, + 31, + 32, + true, + "IPDPS", + "IPDPS" + ], + [ + "reference", + "author", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 9277063416399937233, + 9921862040524615824, + 18446744073709551615, + 18446744073709551615, + 4, + 14, + 4, + 14, + 2, + 4, + true, + "Rukhsana S", + "Rukhsana S" + ], + [ + "reference", + "author", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 8106479273814684994, + 12770854321018137055, + 18446744073709551615, + 18446744073709551615, + 16, + 23, + 16, + 23, + 5, + 7, + true, + "Anila U", + "Anila U" + ], + [ + "reference", + "author", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 16985962715048067011, + 772749724699858811, + 18446744073709551615, + 18446744073709551615, + 25, + 37, + 25, + 37, + 8, + 11, + true, + "Chughtai IR.", + "Chughtai IR." + ], + [ + "reference", + "citation-number", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 15441160910541481862, + 17618650105274567067, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "18", + "18" + ], + [ + "reference", + "date", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 389609625548757410, + 18165604049296771030, + 18446744073709551615, + 18446744073709551615, + 88, + 92, + 88, + 92, + 19, + 20, + true, + "2005", + "2005" + ], + [ + "reference", + "date", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 389609625548757410, + 18165604049296772353, + 18446744073709551615, + 18446744073709551615, + 133, + 137, + 133, + 137, + 25, + 26, + true, + "2005", + "2005" + ], + [ + "reference", + "title", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 12931819230736677229, + 14856363282836835505, + 18446744073709551615, + 18446744073709551615, + 38, + 86, + 38, + 86, + 11, + 18, + true, + "Review of storage techniques for sparse matrices", + "Review of storage techniques for sparse matrices" + ], + [ + "reference", + "title", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 1320248361117940781, + 5199561905441189481, + 18446744073709551615, + 18446744073709551615, + 93, + 131, + 93, + 131, + 20, + 24, + true, + "Pakistan Section Multitopic Conference", + "Pakistan Section Multitopic Conference" + ], + [ + "name", + "person-name", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 14652303699240355001, + 1172671267071592161, + 18446744073709551615, + 18446744073709551615, + 35, + 44, + 35, + 44, + 9, + 12, + false, + "R Review", + "R. Review" + ], + [ + "sentence", + "", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 8633051299923742554, + 9812031180740342815, + 18446744073709551615, + 18446744073709551615, + 4, + 87, + 4, + 87, + 2, + 19, + true, + "Rukhsana S, Anila U, Chughtai IR. Review of storage techniques for sparse matrices.", + "Rukhsana S, Anila U, Chughtai IR. Review of storage techniques for sparse matrices." + ], + [ + "sentence", + "", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 14938776978172003836, + 10713320247466750625, + 18446744073709551615, + 18446744073709551615, + 93, + 132, + 93, + 132, + 20, + 25, + true, + "Pakistan Section Multitopic Conference.", + "Pakistan Section Multitopic Conference." + ], + [ + "term", + "single-term", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 9277063416399937233, + 9921862040524615824, + 18446744073709551615, + 18446744073709551615, + 4, + 14, + 4, + 14, + 2, + 4, + true, + "Rukhsana S", + "Rukhsana S" + ], + [ + "term", + "single-term", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 8106479273814684994, + 12770854321018137055, + 18446744073709551615, + 18446744073709551615, + 16, + 23, + 16, + 23, + 5, + 7, + true, + "Anila U", + "Anila U" + ], + [ + "term", + "single-term", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 4371320678784428525, + 15222832476664208124, + 18446744073709551615, + 18446744073709551615, + 25, + 36, + 25, + 36, + 8, + 10, + true, + "Chughtai IR", + "Chughtai IR" + ], + [ + "term", + "single-term", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 5298571882490963181, + 13490463183486071840, + 18446744073709551615, + 18446744073709551615, + 48, + 66, + 48, + 66, + 13, + 15, + true, + "storage techniques", + "storage techniques" + ], + [ + "term", + "single-term", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 5038915387230346489, + 6702839604458857240, + 18446744073709551615, + 18446744073709551615, + 71, + 86, + 71, + 86, + 16, + 18, + true, + "sparse matrices", + "sparse matrices" + ], + [ + "term", + "single-term", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 1320248361117940781, + 5199561905441189481, + 18446744073709551615, + 18446744073709551615, + 93, + 131, + 93, + 131, + 20, + 24, + true, + "Pakistan Section Multitopic Conference", + "Pakistan Section Multitopic Conference" + ], + [ + "reference", + "author", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 14638563242508500832, + 2752940376292253295, + 18446744073709551615, + 18446744073709551615, + 4, + 12, + 4, + 12, + 2, + 4, + true, + "Welte DH", + "Welte DH" + ], + [ + "reference", + "author", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 1317380608127935415, + 8792991722627090893, + 18446744073709551615, + 18446744073709551615, + 14, + 25, + 14, + 25, + 5, + 7, + true, + "Horsfield B", + "Horsfield B" + ], + [ + "reference", + "author", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 4172892994592792372, + 2160694788416159558, + 18446744073709551615, + 18446744073709551615, + 27, + 46, + 27, + 46, + 8, + 12, + true, + "Baker DR. Petroleum", + "Baker DR. Petroleum" + ], + [ + "reference", + "author", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 5561358046097680519, + 15395766198352277458, + 18446744073709551615, + 18446744073709551615, + 51, + 67, + 51, + 67, + 13, + 16, + true, + "Basin Evolution:", + "Basin Evolution:" + ], + [ + "reference", + "citation-number", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 15441160910541481863, + 8099163979199984832, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "19", + "19" + ], + [ + "reference", + "date", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 16381206542172924133, + 9981189962990674937, + 18446744073709551615, + 18446744073709551615, + 169, + 175, + 169, + 175, + 33, + 35, + true, + "; 1997", + "; 1997" + ], + [ + "reference", + "journal", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 2422127895824933260, + 7556925222758925531, + 18446744073709551615, + 18446744073709551615, + 106, + 133, + 106, + 133, + 21, + 26, + true, + "Geology, and Basin Modeling", + "Geology, and Basin Modeling" + ], + [ + "reference", + "location", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 11741555610443867475, + 15927342063432766432, + 18446744073709551615, + 18446744073709551615, + 135, + 153, + 135, + 153, + 27, + 30, + true, + "Berlin Heidelberg:", + "Berlin Heidelberg:" + ], + [ + "reference", + "title", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 1197865287651023688, + 134234943361095181, + 18446744073709551615, + 18446744073709551615, + 68, + 104, + 68, + 104, + 16, + 20, + true, + "Insights from Petroleum Geochemistry", + "Insights from Petroleum Geochemistry" + ], + [ + "name", + "person-name", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 9811818043271335175, + 5388942193352320893, + 18446744073709551615, + 18446744073709551615, + 34, + 46, + 34, + 46, + 9, + 12, + false, + "R Petroleum", + "R. Petroleum" + ], + [ + "name", + "name-concatenation", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 3197612152806046883, + 2512966040017790311, + 18446744073709551615, + 18446744073709551615, + 154, + 169, + 154, + 169, + 30, + 33, + true, + "Springer-Verlag", + "Springer-Verlag" + ], + [ + "sentence", + "", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 1083095538878710059, + 6410519906814031679, + 18446744073709551615, + 18446744073709551615, + 4, + 176, + 4, + 176, + 2, + 36, + true, + "Welte DH, Horsfield B, Baker DR. Petroleum and Basin Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling, Berlin Heidelberg: Springer-Verlag; 1997.", + "Welte DH, Horsfield B, Baker DR. Petroleum and Basin Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling, Berlin Heidelberg: Springer-Verlag; 1997." + ], + [ + "term", + "enum-term-mark-4", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 10939118393609776387, + 16464700551363827911, + 18446744073709551615, + 18446744073709551615, + 37, + 66, + 37, + 66, + 11, + 15, + true, + "Petroleum and Basin Evolution", + "Petroleum and Basin Evolution" + ], + [ + "term", + "enum-term-mark-4", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 17006005703909820457, + 12901071012590413163, + 18446744073709551615, + 18446744073709551615, + 82, + 133, + 82, + 133, + 18, + 26, + true, + "Petroleum Geochemistry, Geology, and Basin Modeling", + "Petroleum Geochemistry, Geology, and Basin Modeling" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 14638563242508500832, + 2752940376292253295, + 18446744073709551615, + 18446744073709551615, + 4, + 12, + 4, + 12, + 2, + 4, + true, + "Welte DH", + "Welte DH" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 1317380608127935415, + 8792991722627090893, + 18446744073709551615, + 18446744073709551615, + 14, + 25, + 14, + 25, + 5, + 7, + true, + "Horsfield B", + "Horsfield B" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 14650425433297857126, + 17200611816160356686, + 18446744073709551615, + 18446744073709551615, + 27, + 35, + 27, + 35, + 8, + 10, + true, + "Baker DR", + "Baker DR" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 1538397892452668306, + 4387773196933243696, + 18446744073709551615, + 18446744073709551615, + 51, + 66, + 51, + 66, + 13, + 15, + true, + "Basin Evolution", + "Basin Evolution" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 6297994706585107052, + 15599666871412118732, + 18446744073709551615, + 18446744073709551615, + 82, + 104, + 82, + 104, + 18, + 20, + true, + "Petroleum Geochemistry", + "Petroleum Geochemistry" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 18229361067854714750, + 1667565054890986990, + 18446744073709551615, + 18446744073709551615, + 119, + 133, + 119, + 133, + 24, + 26, + true, + "Basin Modeling", + "Basin Modeling" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 1961730974653605813, + 770501763529322377, + 18446744073709551615, + 18446744073709551615, + 135, + 152, + 135, + 152, + 27, + 29, + true, + "Berlin Heidelberg", + "Berlin Heidelberg" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 14652305210070084086, + 12917423391664842605, + 18446744073709551615, + 18446744073709551615, + 68, + 76, + 68, + 76, + 16, + 17, + true, + "Insights", + "Insights" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 8106351569626681077, + 12181079068816099642, + 18446744073709551615, + 18446744073709551615, + 106, + 113, + 106, + 113, + 21, + 22, + true, + "Geology", + "Geology" + ], + [ + "reference", + "author", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 8106351306870445011, + 7231860053894851093, + 18446744073709551615, + 18446744073709551615, + 37, + 44, + 37, + 44, + 9, + 11, + true, + "Dolfi M", + "Dolfi M" + ], + [ + "reference", + "author", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 8106479197488776816, + 6022123083747398357, + 18446744073709551615, + 18446744073709551615, + 46, + 53, + 46, + 53, + 12, + 15, + true, + "Auer C.", + "Auer C." + ], + [ + "reference", + "date", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 12668563530344603848, + 14820206483220239473, + 18446744073709551615, + 18446744073709551615, + 173, + 183, + 173, + 183, + 35, + 36, + true, + "2020;1:e20", + "2020;1:e20" + ], + [ + "reference", + "journal", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 10525943314116263182, + 11312474291607917611, + 18446744073709551615, + 18446744073709551615, + 153, + 171, + 153, + 171, + 31, + 34, + true, + "Applied AI Letters", + "Applied AI Letters" + ], + [ + "reference", + "title", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 912378836411683307, + 17710224191321636054, + 18446744073709551615, + 18446744073709551615, + 0, + 35, + 0, + 35, + 0, + 8, + true, + "How to cite this article: Staar PWJ", + "How to cite this article: Staar PWJ" + ], + [ + "reference", + "title", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 4375081646508065875, + 5872894694925809811, + 18446744073709551615, + 18446744073709551615, + 54, + 151, + 54, + 151, + 15, + 30, + true, + "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", + "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora" + ], + [ + "reference", + "url", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 751450063096904044, + 2161551171101074414, + 18446744073709551615, + 18446744073709551615, + 185, + 216, + 185, + 216, + 37, + 49, + true, + "https://doi.org/10.1002/ail2.20", + "https://doi.org/10.1002/ail2.20" + ], + [ + "name", + "person-name", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 13763699434920414504, + 14310942059015767454, + 18446744073709551615, + 18446744073709551615, + 46, + 60, + 46, + 60, + 12, + 16, + true, + "Auer C Corpus", + "Auer C. Corpus" + ], + [ + "expression", + "wtoken-concatenation", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 12668563530344603848, + 14820206483220239473, + 18446744073709551615, + 18446744073709551615, + 173, + 183, + 173, + 183, + 35, + 36, + true, + "2020;1:e20", + "2020;1:e20" + ], + [ + "sentence", + "", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 3399237007757536794, + 10798111996929910377, + 18446744073709551615, + 18446744073709551615, + 0, + 152, + 0, + 152, + 0, + 31, + true, + "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora.", + "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora." + ], + [ + "sentence", + "", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 9017840063542546137, + 3221707506812699045, + 18446744073709551615, + 18446744073709551615, + 153, + 172, + 153, + 172, + 31, + 35, + true, + "Applied AI Letters.", + "Applied AI Letters." + ], + [ + "sentence", + "", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 7365754457409807236, + 16458707549922411068, + 18446744073709551615, + 18446744073709551615, + 173, + 184, + 173, + 184, + 35, + 37, + true, + "2020;1:e20.", + "2020;1:e20." + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 6052191155307735802, + 18032874033977513490, + 18446744073709551615, + 18446744073709551615, + 26, + 35, + 26, + 35, + 6, + 8, + true, + "Staar PWJ", + "Staar PWJ" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 8106351306870445011, + 7231860053894851093, + 18446744073709551615, + 18446744073709551615, + 37, + 44, + 37, + 44, + 9, + 11, + true, + "Dolfi M", + "Dolfi M" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 1821123588367592853, + 13143602266977617422, + 18446744073709551615, + 18446744073709551615, + 54, + 79, + 54, + 79, + 15, + 18, + true, + "Corpus processing service", + "Corpus processing service" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 12981440865159980116, + 8397818236619725491, + 18446744073709551615, + 18446744073709551615, + 83, + 107, + 83, + 107, + 20, + 23, + true, + "Knowledge Graph platform", + "Knowledge Graph platform" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 13671659409933113155, + 12446642666303205360, + 18446744073709551615, + 18446744073709551615, + 119, + 140, + 119, + 140, + 25, + 28, + true, + "deep data exploration", + "deep data exploration" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 10525943314116263182, + 11312474291607917611, + 18446744073709551615, + 18446744073709551615, + 153, + 171, + 153, + 171, + 31, + 34, + true, + "Applied AI Letters", + "Applied AI Letters" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 389609625537446556, + 7737228572826305234, + 18446744073709551615, + 18446744073709551615, + 208, + 212, + 208, + 212, + 46, + 48, + true, + "/ail", + "/ail" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 8106397798288310212, + 6892140235696191542, + 18446744073709551615, + 18446744073709551615, + 17, + 24, + 17, + 24, + 4, + 5, + true, + "article", + "article" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 8106398483106473371, + 4135599828090019002, + 18446744073709551615, + 18446744073709551615, + 144, + 151, + 144, + 151, + 29, + 30, + true, + "corpora", + "corpora" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 329104161533497127, + 15533708503938485693, + 18446744073709551615, + 18446744073709551615, + 185, + 190, + 185, + 190, + 37, + 38, + true, + "https", + "https" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 12178341415895452239, + 2509341829612471905, + 18446744073709551615, + 18446744073709551615, + 193, + 196, + 193, + 196, + 41, + 42, + true, + "doi", + "doi" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 12178341415895623052, + 2509292496994469077, + 18446744073709551615, + 18446744073709551615, + 197, + 200, + 197, + 200, + 43, + 44, + true, + "org", + "org" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 389609625548777262, + 8826555294676663632, + 18446744073709551615, + 18446744073709551615, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 389609625548777251, + 8826555296349648778, + 18446744073709551615, + 18446744073709551615, + 119, + 123, + 119, + 123, + 14, + 14, + false, + "2023", + "2023" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 8104408072666212335, + 13552219042525319352, + 18446744073709551615, + 18446744073709551615, + 71, + 78, + 71, + 78, + 8, + 8, + false, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 389609625548868096, + 8826558551385119058, + 18446744073709551615, + 18446744073709551615, + 82, + 86, + 82, + 86, + 8, + 9, + false, + "2.20", + "2.20" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 14654386914267794441, + 12796143052106760105, + 18446744073709551615, + 18446744073709551615, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 17767354399704235162, + 7753390158484899261, + 18446744073709551615, + 18446744073709551615, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 15441160910541481791, + 3518619573290839093, + 18446744073709551615, + 18446744073709551615, + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 15441160910541481543, + 3518617976696906498, + 18446744073709551615, + 18446744073709551615, + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 8536069645534292969, + 16063604623463467342, + 18446744073709551615, + 18446744073709551615, + 35, + 87, + 35, + 87, + 8, + 10, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 594099663775968682, + 14698211805947073928, + 18446744073709551615, + 18446744073709551615, + 156, + 208, + 156, + 208, + 22, + 37, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 1697220653346092555, + 8458710314769009562, + 18446744073709551615, + 18446744073709551615, + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 12213187056216195918, + 14309760985361468471, + 18446744073709551615, + 18446744073709551615, + 155, + 209, + 155, + 209, + 21, + 38, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 3856967589249015473, + 3576147774941915841, + 18446744073709551615, + 18446744073709551615, + 35, + 86, + 35, + 86, + 8, + 9, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 15691754593896323724, + 15433429984583237828, + 18446744073709551615, + 18446744073709551615, + 112, + 124, + 112, + 124, + 14, + 15, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "sentence", + "", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 10933383461306782608, + 10178418358179275356, + 18446744073709551615, + 18446744073709551615, + 19, + 125, + 19, + 125, + 6, + 16, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 12466457873768409517, + 3430070082404029638, + 18446744073709551615, + 18446744073709551615, + 88, + 108, + 88, + 108, + 10, + 13, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 12466457873768409517, + 3430070082403846184, + 18446744073709551615, + 18446744073709551615, + 213, + 233, + 213, + 233, + 39, + 42, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 10086796047802705645, + 11637015082128438412, + 18446744073709551615, + 18446744073709551615, + 252, + 263, + 252, + 263, + 47, + 49, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 6687370681685741393, + 17939310132506951168, + 18446744073709551615, + 18446744073709551615, + 284, + 319, + 284, + 319, + 53, + 57, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 329104161846385964, + 16017248647642597247, + 18446744073709551615, + 18446744073709551615, + 134, + 139, + 134, + 139, + 18, + 19, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 969969168017005656, + 2961182532179915323, + 18446744073709551615, + 18446744073709551615, + 144, + 154, + 144, + 154, + 20, + 21, + true, + "Conditions", + "Conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 329104161825278214, + 16021621362593374209, + 18446744073709551615, + 18446744073709551615, + 238, + 243, + 238, + 243, + 43, + 44, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 12178341415895516060, + 12061595171928625555, + 18446744073709551615, + 18446744073709551615, + 247, + 250, + 247, + 250, + 45, + 46, + true, + "use", + "use" + ], + [ + "numval", + "ival", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 15441160910541482672, + 3558959168916500461, + 0, + 2, + 3, + 5, + 3, + 5, + 1, + 3, + true, + "-1", + "-1" + ], + [ + "numval", + "ival", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 15441160910541482673, + 3558959168967845780, + 0, + 3, + 3, + 5, + 3, + 5, + 1, + 3, + true, + "-2", + "-2" + ], + [ + "numval", + "ival", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 15441160910541482674, + 3558959169084991311, + 0, + 4, + 3, + 5, + 3, + 5, + 1, + 3, + true, + "-3", + "-3" + ], + [ + "numval", + "ival", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 15441160910541482676, + 3558959170275494348, + 0, + 5, + 3, + 5, + 3, + 5, + 1, + 3, + true, + "-5", + "-5" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995426, + 7990768689708475978, + 1, + 2, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.82", + "0.82" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995621, + 7990774618103388257, + 1, + 3, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.96", + "0.96" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995627, + 7990774615713296517, + 1, + 4, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.98", + "0.98" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625536250803, + 7990774066976884381, + 1, + 5, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "1.00", + "1.00" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995622, + 7990774618160743993, + 2, + 2, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.93", + "0.93" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995627, + 7990774615712524481, + 2, + 3, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.98", + "0.98" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625536250803, + 7990774066976098009, + 2, + 4, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "1.00", + "1.00" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625536250803, + 7990774066976110280, + 2, + 5, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "1.00", + "1.00" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995293, + 7990774599790700074, + 3, + 2, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.62", + "0.62" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995424, + 7990768689730984037, + 3, + 3, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.80", + "0.80" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995433, + 7990768688117646262, + 3, + 4, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.87", + "0.87" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995623, + 7990774617730131452, + 3, + 5, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.94", + "0.94" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995492, + 7990768692352137559, + 4, + 2, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.73", + "0.73" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995616, + 7990774618481181961, + 4, + 3, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.91", + "0.91" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995623, + 7990774617741217753, + 4, + 4, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.94", + "0.94" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995626, + 7990774612563908250, + 4, + 5, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.97", + "0.97" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995426, + 7990768689764177354, + 5, + 2, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.82", + "0.82" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995623, + 7990774617746212517, + 5, + 3, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.94", + "0.94" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995626, + 7990774612589838230, + 5, + 4, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.97", + "0.97" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995627, + 7990774616182657591, + 5, + 5, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.98", + "0.98" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995426, + 7990768689764403839, + 6, + 2, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.82", + "0.82" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995617, + 7990774618567229989, + 6, + 3, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.92", + "0.92" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995620, + 7990774618125993935, + 6, + 4, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.95", + "0.95" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995626, + 7990774612590090226, + 6, + 5, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.97", + "0.97" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995494, + 7990768689217789732, + 7, + 2, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.75", + "0.75" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995617, + 7990774619359159209, + 7, + 3, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.92", + "0.92" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995621, + 7990774618108893234, + 7, + 4, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.96", + "0.96" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995626, + 7990774612570894765, + 7, + 5, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.97", + "0.97" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995435, + 7990774626011945031, + 8, + 2, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.89", + "0.89" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995621, + 7990774618110730915, + 8, + 3, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.96", + "0.96" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995626, + 7990774612562839849, + 8, + 4, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.97", + "0.97" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995627, + 7990774616172489304, + 8, + 5, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.98", + "0.98" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995429, + 7990774613602439211, + 9, + 2, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.83", + "0.83" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995617, + 7990774619353439571, + 9, + 3, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.92", + "0.92" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995620, + 7990774618123099565, + 9, + 4, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.95", + "0.95" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995621, + 7990774618110462820, + 9, + 5, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "0.96", + "0.96" + ], + [ + "numval", + "ival", + 5929648907277899214, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 17767354399704235161, + 4107390487752235766, + 18446744073709551615, + 18446744073709551615, + 6, + 7, + 6, + 7, + 1, + 2, + true, + "1", + "1" + ], + [ + "parenthesis", + "round brackets", + 5929648907277899214, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 8294861687438024833, + 14638350656524639582, + 18446744073709551615, + 18446744073709551615, + 153, + 199, + 153, + 199, + 29, + 41, + true, + "(abstracts, paragraphs, tables, figures, etc.)", + "(abstracts, paragraphs, tables, figures, etc.)" + ], + [ + "numval", + "enum", + 13588295264109661534, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 15441160910541481982, + 15791677333869698890, + 18446744073709551615, + 18446744073709551615, + 169, + 171, + 169, + 171, + 38, + 39, + true, + "10", + "10" + ], + [ + "numval", + "enum", + 13588295264109661534, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 15441160910541481456, + 15791677297642385524, + 18446744073709551615, + 18446744073709551615, + 178, + 180, + 178, + 180, + 41, + 42, + true, + "90", + "90" + ], + [ + "numval", + "ival", + 13588295264109661534, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 17767354399704235163, + 12968463287909013222, + 18446744073709551615, + 18446744073709551615, + 7, + 8, + 7, + 8, + 1, + 2, + true, + "3", + "3" + ], + [ + "numval", + "ival", + 13588295264109661534, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 17767354399704235156, + 12968463287517139604, + 18446744073709551615, + 18446744073709551615, + 63, + 64, + 63, + 64, + 16, + 17, + true, + "4", + "4" + ], + [ + "numval", + "ival", + 13588295264109661534, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 12178341415896310600, + 15929337508691933278, + 18446744073709551615, + 18446744073709551615, + 131, + 134, + 131, + 134, + 31, + 32, + true, + "500", + "500" + ], + [ + "numval", + "ival", + 5867845979623066511, + "TEXT", + "#/figures/3/captions/0", + 1.0, + 17767354399704235156, + 18092554593004530149, + 18446744073709551615, + 18446744073709551615, + 7, + 8, + 7, + 8, + 1, + 2, + true, + "4", + "4" + ], + [ + "parenthesis", + "round brackets", + 5867845979623066511, + "TEXT", + "#/figures/3/captions/0", + 1.0, + 16380808301115321112, + 8559960025009261802, + 18446744073709551615, + 18446744073709551615, + 331, + 337, + 331, + 337, + 58, + 61, + true, + "(blue)", + "(blue)" + ], + [ + "parenthesis", + "round brackets", + 5867845979623066511, + "TEXT", + "#/figures/3/captions/0", + 1.0, + 329104053567936837, + 7216475808247076767, + 18446744073709551615, + 18446744073709551615, + 349, + 354, + 349, + 354, + 63, + 66, + true, + "(red)", + "(red)" + ], + [ + "parenthesis", + "round brackets", + 5867845979623066511, + "TEXT", + "#/figures/3/captions/0", + 1.0, + 8106340651774642079, + 2159439219661376602, + 18446744073709551615, + 18446744073709551615, + 374, + 381, + 374, + 381, + 69, + 72, + true, + "(green)", + "(green)" + ], + [ + "parenthesis", + "round brackets", + 5867845979623066511, + "TEXT", + "#/figures/3/captions/0", + 1.0, + 14650668794724624115, + 15475675493095400477, + 18446744073709551615, + 18446744073709551615, + 406, + 414, + 406, + 414, + 75, + 78, + true, + "(yellow)", + "(yellow)" + ], + [ + "numval", + "ival", + 3722064109667835816, + "TEXT", + "#/figures/5/captions/0", + 1.0, + 17767354399704235157, + 10987801596440375178, + 18446744073709551615, + 18446744073709551615, + 6, + 7, + 6, + 7, + 1, + 2, + true, + "5", + "5" + ], + [ + "numval", + "ival", + 5492278710328857395, + "TEXT", + "#/figures/6/captions/0", + 1.0, + 17767354399704235158, + 17570255895164198091, + 18446744073709551615, + 18446744073709551615, + 7, + 8, + 7, + 8, + 1, + 2, + true, + "6", + "6" + ], + [ + "numval", + "irng", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 12178341415896434995, + 8200167909602972064, + 18446744073709551615, + 18446744073709551615, + 315, + 318, + 315, + 318, + 61, + 62, + true, + "3-5", + "3-5" + ], + [ + "numval", + "irng", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 12178341415896302497, + 8200192220266339866, + 18446744073709551615, + 18446744073709551615, + 320, + 323, + 320, + 323, + 63, + 64, + true, + "7-9", + "7-9" + ], + [ + "numval", + "ival", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 17767354399704235159, + 18431093858453549758, + 18446744073709551615, + 18446744073709551615, + 7, + 8, + 7, + 8, + 1, + 2, + true, + "7", + "7" + ], + [ + "numval", + "ival", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 17767354399704235161, + 18431093858355240250, + 18446744073709551615, + 18446744073709551615, + 263, + 264, + 263, + 264, + 50, + 51, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 17767354399704235162, + 18431093858405147593, + 18446744073709551615, + 18446744073709551615, + 269, + 270, + 269, + 270, + 52, + 53, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 15441160910541481983, + 2025497815005383460, + 18446744073709551615, + 18446744073709551615, + 325, + 327, + 325, + 327, + 65, + 66, + true, + "11", + "11" + ], + [ + "numval", + "ival", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 15441160910541481976, + 2025497814654711226, + 18446744073709551615, + 18446744073709551615, + 329, + 331, + 329, + 331, + 67, + 68, + true, + "12", + "12" + ], + [ + "numval", + "ival", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 17767354399704235158, + 18431093863766763921, + 18446744073709551615, + 18446744073709551615, + 388, + 389, + 388, + 389, + 77, + 78, + true, + "6", + "6" + ], + [ + "numval", + "ival", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 15441160910541481982, + 2025497814813301901, + 18446744073709551615, + 18446744073709551615, + 391, + 393, + 391, + 393, + 79, + 80, + true, + "10", + "10" + ], + [ + "numval", + "ival", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 15441160910541481977, + 2025497813562735514, + 18446744073709551615, + 18446744073709551615, + 395, + 397, + 395, + 397, + 81, + 82, + true, + "13", + "13" + ], + [ + "numval", + "ival", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 15441160910541481978, + 2025497813546162547, + 18446744073709551615, + 18446744073709551615, + 399, + 401, + 399, + 401, + 83, + 84, + true, + "14", + "14" + ], + [ + "numval", + "ival", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 15441160910541481979, + 2025497814802361732, + 18446744073709551615, + 18446744073709551615, + 467, + 469, + 467, + 469, + 98, + 99, + true, + "15", + "15" + ], + [ + "numval", + "ival", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 15441160910541481860, + 2025497812649619486, + 18446744073709551615, + 18446744073709551615, + 487, + 489, + 487, + 489, + 104, + 105, + true, + "16", + "16" + ], + [ + "numval", + "ival", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 15441160910541481861, + 2025497812905248884, + 18446744073709551615, + 18446744073709551615, + 511, + 513, + 511, + 513, + 110, + 111, + true, + "17", + "17" + ], + [ + "numval", + "ival", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 15441160910541481862, + 2025497812887754216, + 18446744073709551615, + 18446744073709551615, + 592, + 594, + 592, + 594, + 126, + 127, + true, + "18", + "18" + ], + [ + "parenthesis", + "round brackets", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 329104053344678624, + 1614848038244927360, + 18446744073709551615, + 18446744073709551615, + 75, + 80, + 75, + 80, + 11, + 14, + true, + "(PSE)", + "(PSE)" + ], + [ + "parenthesis", + "round brackets", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 11694060085626929525, + 16844235190282340832, + 18446744073709551615, + 18446744073709551615, + 196, + 227, + 196, + 227, + 35, + 44, + true, + "(eg, source, reservoir or seal)", + "(eg, source, reservoir or seal)" + ], + [ + "parenthesis", + "round brackets", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 15032279876472675989, + 2718089939750888205, + 18446744073709551615, + 18446744073709551615, + 252, + 271, + 252, + 271, + 48, + 54, + true, + "(worktasks 1 and 2)", + "(worktasks 1 and 2)" + ], + [ + "parenthesis", + "round brackets", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 15359775366274806025, + 15080229009031614827, + 18446744073709551615, + 18446744073709551615, + 304, + 332, + 304, + 332, + 59, + 69, + true, + "(worktasks 3-5, 7-9, 11, 12)", + "(worktasks 3-5, 7-9, 11, 12)" + ], + [ + "parenthesis", + "round brackets", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 15753836491225885957, + 4303796380513418775, + 18446744073709551615, + 18446744073709551615, + 377, + 402, + 377, + 402, + 75, + 85, + true, + "(worktasks 6, 10, 13, 14)", + "(worktasks 6, 10, 13, 14)" + ], + [ + "parenthesis", + "round brackets", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 9594608305374444490, + 549028029822782819, + 18446744073709551615, + 18446744073709551615, + 457, + 470, + 457, + 470, + 96, + 100, + true, + "(worktask 15)", + "(worktask 15)" + ], + [ + "parenthesis", + "round brackets", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 9594608305374447126, + 549027974665356501, + 18446744073709551615, + 18446744073709551615, + 477, + 490, + 477, + 490, + 102, + 106, + true, + "(worktask 16)", + "(worktask 16)" + ], + [ + "parenthesis", + "round brackets", + 14119822239274862236, + "TEXT", + "#/figures/7/captions/0", + 1.0, + 9594608305374447191, + 549027973638755167, + 18446744073709551615, + 18446744073709551615, + 501, + 514, + 501, + 514, + 108, + 112, + true, + "(worktask 17)", + "(worktask 17)" + ] + ], + "headers": [ + "type", + "subtype", + "subj_hash", + "subj_name", + "subj_path", + "conf", + "hash", + "ihash", + "coor_i", + "coor_j", + "char_i", + "char_j", + "ctok_i", + "ctok_j", + "wtok_i", + "wtok_j", + "wtok-match", + "name", + "original" + ] + }, + "meta": [ + { + "$ref": "#/page-headers/0" + }, + { + "$ref": "#/page-headers/1" + }, + { + "$ref": "#/page-headers/2" + }, + { + "$ref": "#/page-headers/3" + }, + { + "$ref": "#/footnotes/0" + }, + { + "$ref": "#/footnotes/1" + }, + { + "$ref": "#/page-footers/0" + }, + { + "$ref": "#/page-footers/1" + }, + { + "$ref": "#/page-headers/4" + }, + { + "$ref": "#/page-headers/5" + }, + { + "$ref": "#/figures/0/captions/0" + }, + { + "$ref": "#/page-headers/6" + }, + { + "$ref": "#/page-headers/7" + }, + { + "$ref": "#/page-headers/8" + }, + { + "$ref": "#/page-headers/9" + }, + { + "$ref": "#/page-headers/10" + }, + { + "$ref": "#/page-headers/11" + }, + { + "$ref": "#/figures/2/captions/0" + }, + { + "$ref": "#/page-headers/12" + }, + { + "$ref": "#/figures/3/captions/0" + }, + { + "$ref": "#/page-headers/13" + }, + { + "$ref": "#/page-headers/14" + }, + { + "$ref": "#/figures/5/captions/0" + }, + { + "$ref": "#/page-headers/15" + }, + { + "$ref": "#/figures/6/captions/0" + }, + { + "$ref": "#/page-headers/16" + }, + { + "$ref": "#/tables/0/captions/0" + }, + { + "$ref": "#/page-headers/17" + }, + { + "$ref": "#/page-headers/18" + } + ], + "model-application": { + "message": "success", + "success": true + }, + "other": [], + "page-dimensions": [ + { + "height": 782.3619995117188, + "page": 1, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 2, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 3, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 4, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 5, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 6, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 7, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 8, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 9, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 10, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 11, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 12, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 13, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 14, + "width": 595.2760009765625 + }, + { + "height": 782.3619995117188, + "page": 15, + "width": 595.2760009765625 + } + ], + "page-elements": [ + { + "bbox": [ + 44.78739929199219, + 743.57568359375, + 131.78494262695312, + 750.7937622070312 + ], + "iref": "#/page-headers/0", + "name": "page-header", + "orig-order": 15, + "page": 1, + "span": [ + 0, + 28 + ], + "sref": "#/page-elements/0", + "text-order": 0, + "type": "page-header" + }, + { + "bbox": [ + 146.3265380859375, + 744.093017578125, + 229.3131561279297, + 751.4437866210938 + ], + "iref": "#/page-headers/1", + "name": "page-header", + "orig-order": 16, + "page": 1, + "span": [ + 0, + 26 + ], + "sref": "#/page-elements/1", + "text-order": 1, + "type": "page-header" + }, + { + "bbox": [ + 243.7840576171875, + 743.953369140625, + 332.99346923828125, + 751.3480224609375 + ], + "iref": "#/page-headers/2", + "name": "page-header", + "orig-order": 17, + "page": 1, + "span": [ + 0, + 27 + ], + "sref": "#/page-elements/2", + "text-order": 2, + "type": "page-header" + }, + { + "bbox": [ + 44.6877326965332, + 730.7138671875, + 106.1191635131836, + 737.30078125 + ], + "iref": "#/page-headers/3", + "name": "page-header", + "orig-order": 18, + "page": 1, + "span": [ + 0, + 21 + ], + "sref": "#/page-elements/3", + "text-order": 3, + "type": "page-header" + }, + { + "bbox": [ + 43.95979690551758, + 702.3956298828125, + 91.94560241699219, + 712.1011962890625 + ], + "iref": "#/texts/0", + "name": "subtitle-level-1", + "orig-order": 0, + "page": 1, + "span": [ + 0, + 6 + ], + "sref": "#/page-elements/4", + "text-order": 4, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.709346771240234, + 631.2674560546875, + 520.7667236328125, + 672.0067749023438 + ], + "iref": "#/texts/1", + "name": "subtitle-level-1", + "orig-order": 1, + "page": 1, + "span": [ + 0, + 97 + ], + "sref": "#/page-elements/5", + "text-order": 5, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.78739929199219, + 593.6065673828125, + 146.4720458984375, + 606.4735717773438 + ], + "iref": "#/texts/2", + "name": "subtitle-level-1", + "orig-order": 2, + "page": 1, + "span": [ + 0, + 17 + ], + "sref": "#/page-elements/6", + "text-order": 6, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 160.10069274902344, + 593.7201538085938, + 163.59266662597656, + 605.1080322265625 + ], + "iref": "#/texts/3", + "name": "text", + "orig-order": 3, + "page": 1, + "span": [ + 0, + 1 + ], + "sref": "#/page-elements/7", + "text-order": 7, + "type": "paragraph" + }, + { + "bbox": [ + 170.39439392089844, + 593.4388427734375, + 265.1170959472656, + 607.2059326171875 + ], + "iref": "#/texts/4", + "name": "subtitle-level-1", + "orig-order": 4, + "page": 1, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/8", + "text-order": 8, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 274.5636901855469, + 593.7201538085938, + 278.0556640625, + 605.1080322265625 + ], + "iref": "#/texts/5", + "name": "text", + "orig-order": 5, + "page": 1, + "span": [ + 0, + 1 + ], + "sref": "#/page-elements/9", + "text-order": 9, + "type": "paragraph" + }, + { + "bbox": [ + 290.0411682128906, + 593.2594604492188, + 387.6253967285156, + 606.9615478515625 + ], + "iref": "#/texts/6", + "name": "text", + "orig-order": 6, + "page": 1, + "span": [ + 0, + 14 + ], + "sref": "#/page-elements/10", + "text-order": 10, + "type": "paragraph" + }, + { + "bbox": [ + 44.78739929199219, + 559.602294921875, + 182.68014526367188, + 567.3045654296875 + ], + "iref": "#/texts/7", + "name": "text", + "orig-order": 7, + "page": 1, + "span": [ + 0, + 38 + ], + "sref": "#/page-elements/11", + "text-order": 11, + "type": "paragraph" + }, + { + "bbox": [ + 44.78739929199219, + 493.4922180175781, + 164.66183471679688, + 545.3080444335938 + ], + "iref": "#/texts/8", + "name": "text", + "orig-order": 8, + "page": 1, + "span": [ + 0, + 121 + ], + "sref": "#/page-elements/12", + "text-order": 12, + "type": "paragraph" + }, + { + "bbox": [ + 209.1903839111328, + 552.2532348632812, + 249.1348114013672, + 561.7433471679688 + ], + "iref": "#/texts/9", + "name": "subtitle-level-1", + "orig-order": 9, + "page": 1, + "span": [ + 0, + 8 + ], + "sref": "#/page-elements/13", + "text-order": 13, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 208.6128387451172, + 251.58563232421875, + 543.8583984375, + 547.040771484375 + ], + "iref": "#/texts/10", + "name": "text", + "orig-order": 10, + "page": 1, + "span": [ + 0, + 1624 + ], + "sref": "#/page-elements/14", + "text-order": 14, + "type": "paragraph" + }, + { + "bbox": [ + 209.21104431152344, + 228.2025146484375, + 269.01025390625, + 237.28173828125 + ], + "iref": "#/texts/11", + "name": "subtitle-level-1", + "orig-order": 11, + "page": 1, + "span": [ + 0, + 8 + ], + "sref": "#/page-elements/15", + "text-order": 15, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 208.79600524902344, + 214.08453369140625, + 401.0297546386719, + 222.97467041015625 + ], + "iref": "#/texts/12", + "name": "text", + "orig-order": 12, + "page": 1, + "span": [ + 0, + 53 + ], + "sref": "#/page-elements/16", + "text-order": 16, + "type": "paragraph" + }, + { + "bbox": [ + 44.27853012084961, + 187.51553344726562, + 189.71961975097656, + 199.65557861328125 + ], + "iref": "#/texts/13", + "name": "subtitle-level-1", + "orig-order": 13, + "page": 1, + "span": [ + 0, + 16 + ], + "sref": "#/page-elements/17", + "text-order": 17, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.78739929199219, + 96.98406982421875, + 552.6513061523438, + 172.33074951171875 + ], + "iref": "#/texts/14", + "name": "text", + "orig-order": 14, + "page": 1, + "span": [ + 0, + 639 + ], + "sref": "#/page-elements/18", + "text-order": 18, + "type": "paragraph" + }, + { + "bbox": [ + 44.787384033203125, + 52.49696731567383, + 540.7015991210938, + 70.33258056640625 + ], + "iref": "#/footnotes/0", + "name": "footnote", + "orig-order": 19, + "page": 1, + "span": [ + 0, + 201 + ], + "sref": "#/page-elements/19", + "text-order": 19, + "type": "footnote" + }, + { + "bbox": [ + 44.787384033203125, + 42.44549560546875, + 272.1662902832031, + 50.207763671875 + ], + "iref": "#/footnotes/1", + "name": "footnote", + "orig-order": 20, + "page": 1, + "span": [ + 0, + 75 + ], + "sref": "#/page-elements/20", + "text-order": 20, + "type": "footnote" + }, + { + "bbox": [ + 44.38350296020508, + 12.301444053649902, + 135.58876037597656, + 30.8690185546875 + ], + "iref": "#/page-footers/0", + "name": "page-footer", + "orig-order": 21, + "page": 1, + "span": [ + 0, + 64 + ], + "sref": "#/page-elements/21", + "text-order": 21, + "type": "page-footer" + }, + { + "bbox": [ + 400.53094482421875, + 22.279802322387695, + 550.6204223632812, + 29.6954345703125 + ], + "iref": "#/page-footers/1", + "name": "page-footer", + "orig-order": 22, + "page": 1, + "span": [ + 0, + 42 + ], + "sref": "#/page-elements/22", + "text-order": 22, + "type": "page-footer" + }, + { + "bbox": [ + 46.48820114135742, + 751.4075317382812, + 68.55958557128906, + 758.0504760742188 + ], + "iref": "#/texts/15", + "name": "text", + "orig-order": 40, + "page": 2, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/23", + "text-order": 23, + "type": "paragraph" + }, + { + "bbox": [ + 510.634765625, + 751.4635620117188, + 550.9636840820312, + 758.332763671875 + ], + "iref": "#/page-headers/4", + "name": "page-header", + "orig-order": 41, + "page": 2, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/24", + "text-order": 24, + "type": "page-header" + }, + { + "bbox": [ + 45.97464370727539, + 604.0350952148438, + 554.3433227539062, + 732.5863037109375 + ], + "iref": "#/texts/16", + "name": "text", + "orig-order": 23, + "page": 2, + "span": [ + 0, + 1082 + ], + "sref": "#/page-elements/25", + "text-order": 25, + "type": "paragraph" + }, + { + "bbox": [ + 46.485626220703125, + 513.0453491210938, + 553.2366943359375, + 601.0419921875 + ], + "iref": "#/texts/17", + "name": "text", + "orig-order": 24, + "page": 2, + "span": [ + 0, + 836 + ], + "sref": "#/page-elements/26", + "text-order": 26, + "type": "paragraph" + }, + { + "bbox": [ + 46.48820114135742, + 500.0622253417969, + 340.59906005859375, + 509.4723205566406 + ], + "iref": "#/texts/18", + "name": "text", + "orig-order": 25, + "page": 2, + "span": [ + 0, + 69 + ], + "sref": "#/page-elements/27", + "text-order": 27, + "type": "paragraph" + }, + { + "bbox": [ + 57.86075973510742, + 487.0791015625, + 492.157958984375, + 496.63543701171875 + ], + "iref": "#/texts/19", + "name": "text", + "orig-order": 26, + "page": 2, + "span": [ + 0, + 101 + ], + "sref": "#/page-elements/28", + "text-order": 28, + "type": "paragraph" + }, + { + "bbox": [ + 46.48820114135742, + 461.0568542480469, + 262.5708312988281, + 470.5727233886719 + ], + "iref": "#/texts/20", + "name": "list-item", + "orig-order": 27, + "page": 2, + "span": [ + 0, + 49 + ], + "sref": "#/page-elements/29", + "text-order": 29, + "type": "paragraph" + }, + { + "bbox": [ + 45.779930114746094, + 448.07373046875, + 241.75213623046875, + 457.51177978515625 + ], + "iref": "#/texts/21", + "name": "list-item", + "orig-order": 28, + "page": 2, + "span": [ + 0, + 45 + ], + "sref": "#/page-elements/30", + "text-order": 30, + "type": "paragraph" + }, + { + "bbox": [ + 46.48820114135742, + 435.03460693359375, + 174.95623779296875, + 444.5535583496094 + ], + "iref": "#/texts/22", + "name": "list-item", + "orig-order": 29, + "page": 2, + "span": [ + 0, + 29 + ], + "sref": "#/page-elements/31", + "text-order": 31, + "type": "paragraph" + }, + { + "bbox": [ + 46.48820114135742, + 422.0514831542969, + 528.8121948242188, + 431.5508728027344 + ], + "iref": "#/texts/23", + "name": "list-item", + "orig-order": 30, + "page": 2, + "span": [ + 0, + 112 + ], + "sref": "#/page-elements/32", + "text-order": 32, + "type": "paragraph" + }, + { + "bbox": [ + 45.387489318847656, + 409.068359375, + 446.47918701171875, + 418.8954772949219 + ], + "iref": "#/texts/24", + "name": "list-item", + "orig-order": 31, + "page": 2, + "span": [ + 0, + 94 + ], + "sref": "#/page-elements/33", + "text-order": 33, + "type": "paragraph" + }, + { + "bbox": [ + 45.996150970458984, + 292.05224609375, + 553.0557861328125, + 392.69879150390625 + ], + "iref": "#/texts/25", + "name": "text", + "orig-order": 32, + "page": 2, + "span": [ + 0, + 869 + ], + "sref": "#/page-elements/34", + "text-order": 34, + "type": "paragraph" + }, + { + "bbox": [ + 46.48820114135742, + 265.89093017578125, + 551.4827270507812, + 288.8219299316406 + ], + "iref": "#/texts/26", + "name": "text", + "orig-order": 33, + "page": 2, + "span": [ + 0, + 140 + ], + "sref": "#/page-elements/35", + "text-order": 35, + "type": "paragraph" + }, + { + "bbox": [ + 46.371070861816406, + 240.06375122070312, + 515.491943359375, + 249.5263671875 + ], + "iref": "#/texts/27", + "name": "list-item", + "orig-order": 34, + "page": 2, + "span": [ + 0, + 111 + ], + "sref": "#/page-elements/36", + "text-order": 36, + "type": "paragraph" + }, + { + "bbox": [ + 46.48820114135742, + 214.04150390625, + 551.0504760742188, + 236.58538818359375 + ], + "iref": "#/texts/28", + "name": "list-item", + "orig-order": 35, + "page": 2, + "span": [ + 0, + 180 + ], + "sref": "#/page-elements/37", + "text-order": 37, + "type": "paragraph" + }, + { + "bbox": [ + 45.20487594604492, + 201.05838012695312, + 376.7724914550781, + 210.76416015625 + ], + "iref": "#/texts/29", + "name": "list-item", + "orig-order": 36, + "page": 2, + "span": [ + 0, + 82 + ], + "sref": "#/page-elements/38", + "text-order": 38, + "type": "paragraph" + }, + { + "bbox": [ + 46.2375373840332, + 110.07154846191406, + 553.1372680664062, + 184.7841796875 + ], + "iref": "#/texts/30", + "name": "text", + "orig-order": 37, + "page": 2, + "span": [ + 0, + 647 + ], + "sref": "#/page-elements/39", + "text-order": 39, + "type": "paragraph" + }, + { + "bbox": [ + 46.487701416015625, + 84.04928588867188, + 550.5083618164062, + 107.71282958984375 + ], + "iref": "#/texts/31", + "name": "text", + "orig-order": 38, + "page": 2, + "span": [ + 0, + 202 + ], + "sref": "#/page-elements/40", + "text-order": 40, + "type": "paragraph" + }, + { + "bbox": [ + 45.976261138916016, + 45.04500961303711, + 551.8382568359375, + 81.24627685546875 + ], + "iref": "#/texts/32", + "name": "text", + "orig-order": 39, + "page": 2, + "span": [ + 0, + 346 + ], + "sref": "#/page-elements/41", + "text-order": 41, + "type": "paragraph" + }, + { + "bbox": [ + 578.368896484375, + 15.450490951538086, + 583.4779663085938, + 766.7100219726562 + ], + "iref": "#/texts/33", + "name": "text", + "orig-order": 42, + "page": 2, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/42", + "text-order": 42, + "type": "paragraph" + }, + { + "bbox": [ + 44.50688552856445, + 751.4635620117188, + 85.01602935791016, + 758.0504760742188 + ], + "iref": "#/page-headers/5", + "name": "page-header", + "orig-order": 50, + "page": 3, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/43", + "text-order": 43, + "type": "page-header" + }, + { + "bbox": [ + 528.5497436523438, + 751.4075317382812, + 550.62109375, + 758.0504760742188 + ], + "iref": "#/texts/34", + "name": "text", + "orig-order": 51, + "page": 3, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/44", + "text-order": 44, + "type": "paragraph" + }, + { + "bbox": [ + 44.78739929199219, + 695.0468139648438, + 549.4096069335938, + 730.4614868164062 + ], + "iref": "#/texts/35", + "name": "text", + "orig-order": 43, + "page": 3, + "span": [ + 0, + 262 + ], + "sref": "#/page-elements/45", + "text-order": 45, + "type": "paragraph" + }, + { + "bbox": [ + 44.78739929199219, + 655.5153198242188, + 378.15191650390625, + 666.9031982421875 + ], + "iref": "#/texts/36", + "name": "subtitle-level-1", + "orig-order": 44, + "page": 3, + "span": [ + 0, + 37 + ], + "sref": "#/page-elements/46", + "text-order": 46, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.785400390625, + 552.0484008789062, + 549.7849731445312, + 639.5802001953125 + ], + "iref": "#/texts/37", + "name": "text", + "orig-order": 45, + "page": 3, + "span": [ + 0, + 796 + ], + "sref": "#/page-elements/47", + "text-order": 47, + "type": "paragraph" + }, + { + "bbox": [ + 44.785430908203125, + 409.068603515625, + 554.4052124023438, + 548.475341796875 + ], + "iref": "#/texts/38", + "name": "text", + "orig-order": 46, + "page": 3, + "span": [ + 0, + 1141 + ], + "sref": "#/page-elements/48", + "text-order": 48, + "type": "paragraph" + }, + { + "bbox": [ + 44.78739929199219, + 369.4996032714844, + 134.88641357421875, + 380.88751220703125 + ], + "iref": "#/texts/39", + "name": "subtitle-level-1", + "orig-order": 47, + "page": 3, + "span": [ + 0, + 14 + ], + "sref": "#/page-elements/49", + "text-order": 49, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.524391174316406, + 317.6519470214844, + 552.3914184570312, + 353.5248107910156 + ], + "iref": "#/texts/40", + "name": "text", + "orig-order": 48, + "page": 3, + "span": [ + 0, + 232 + ], + "sref": "#/page-elements/50", + "text-order": 50, + "type": "paragraph" + }, + { + "bbox": [ + 78.5494384765625, + 102.71893310546875, + 512.3916625976562, + 284.9899597167969 + ], + "iref": "#/figures/0", + "name": "picture", + "orig-order": 53, + "page": 3, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/51", + "text-order": 51, + "type": "figure" + }, + { + "bbox": [ + 44.78328323364258, + 45.39774703979492, + 545.7940673828125, + 89.4708251953125 + ], + "iref": "#/figures/0/captions/0", + "name": "caption", + "orig-order": 49, + "page": 3, + "span": [ + 0, + 498 + ], + "sref": "#/page-elements/52", + "text-order": 52, + "type": "caption" + }, + { + "bbox": [ + 578.368896484375, + 15.450490951538086, + 583.4779663085938, + 766.7100219726562 + ], + "iref": "#/texts/41", + "name": "text", + "orig-order": 52, + "page": 3, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/53", + "text-order": 53, + "type": "paragraph" + }, + { + "bbox": [ + 46.48820114135742, + 751.4075317382812, + 68.55958557128906, + 758.0504760742188 + ], + "iref": "#/texts/42", + "name": "text", + "orig-order": 63, + "page": 4, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/54", + "text-order": 54, + "type": "paragraph" + }, + { + "bbox": [ + 510.634765625, + 751.4635620117188, + 550.9420166015625, + 758.4869384765625 + ], + "iref": "#/page-headers/6", + "name": "page-header", + "orig-order": 64, + "page": 4, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/55", + "text-order": 55, + "type": "page-header" + }, + { + "bbox": [ + 45.14111328125, + 720.4854736328125, + 157.7607421875, + 732.3443603515625 + ], + "iref": "#/texts/43", + "name": "subtitle-level-1", + "orig-order": 54, + "page": 4, + "span": [ + 0, + 18 + ], + "sref": "#/page-elements/56", + "text-order": 56, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 46.48820114135742, + 656.0805053710938, + 553.5469360351562, + 704.7728881835938 + ], + "iref": "#/texts/44", + "name": "text", + "orig-order": 55, + "page": 4, + "span": [ + 0, + 403 + ], + "sref": "#/page-elements/57", + "text-order": 57, + "type": "paragraph" + }, + { + "bbox": [ + 45.56229019165039, + 604.0359497070312, + 553.0910034179688, + 652.8948974609375 + ], + "iref": "#/texts/45", + "name": "text", + "orig-order": 56, + "page": 4, + "span": [ + 0, + 417 + ], + "sref": "#/page-elements/58", + "text-order": 58, + "type": "paragraph" + }, + { + "bbox": [ + 45.6591796875, + 565.0864868164062, + 552.8568115234375, + 600.9397583007812 + ], + "iref": "#/texts/46", + "name": "text", + "orig-order": 57, + "page": 4, + "span": [ + 0, + 282 + ], + "sref": "#/page-elements/59", + "text-order": 59, + "type": "paragraph" + }, + { + "bbox": [ + 45.497798919677734, + 525.5185546875, + 161.91403198242188, + 536.9064331054688 + ], + "iref": "#/texts/47", + "name": "subtitle-level-1", + "orig-order": 58, + "page": 4, + "span": [ + 0, + 18 + ], + "sref": "#/page-elements/60", + "text-order": 60, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 46.28074645996094, + 435.03485107421875, + 552.7772827148438, + 509.80706787109375 + ], + "iref": "#/texts/48", + "name": "text", + "orig-order": 59, + "page": 4, + "span": [ + 0, + 647 + ], + "sref": "#/page-elements/61", + "text-order": 61, + "type": "paragraph" + }, + { + "bbox": [ + 45.999271392822266, + 370.0654296875, + 551.750244140625, + 431.6009521484375 + ], + "iref": "#/texts/49", + "name": "text", + "orig-order": 60, + "page": 4, + "span": [ + 0, + 542 + ], + "sref": "#/page-elements/62", + "text-order": 62, + "type": "paragraph" + }, + { + "bbox": [ + 46.37678527832031, + 304.9195251464844, + 551.427001953125, + 366.6332092285156 + ], + "iref": "#/texts/50", + "name": "text", + "orig-order": 61, + "page": 4, + "span": [ + 0, + 580 + ], + "sref": "#/page-elements/63", + "text-order": 63, + "type": "paragraph" + }, + { + "bbox": [ + 46.48663330078125, + 45.39759826660156, + 540.3204956054688, + 67.21272277832031 + ], + "iref": "#/texts/51", + "name": "text", + "orig-order": 62, + "page": 4, + "span": [ + 0, + 220 + ], + "sref": "#/page-elements/64", + "text-order": 64, + "type": "paragraph" + }, + { + "bbox": [ + 578.368896484375, + 15.450490951538086, + 583.4779663085938, + 766.7100219726562 + ], + "iref": "#/texts/52", + "name": "text", + "orig-order": 65, + "page": 4, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/65", + "text-order": 65, + "type": "paragraph" + }, + { + "bbox": [ + 44.041500091552734, + 751.3096313476562, + 85.72028350830078, + 759.7291870117188 + ], + "iref": "#/page-headers/7", + "name": "page-header", + "orig-order": 72, + "page": 5, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/66", + "text-order": 66, + "type": "page-header" + }, + { + "bbox": [ + 454.1357421875, + 745.7154541015625, + 550.62109375, + 761.0070190429688 + ], + "iref": "#/figures/1", + "name": "picture", + "orig-order": 73, + "page": 5, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/67", + "text-order": 67, + "type": "figure" + }, + { + "bbox": [ + 44.78594970703125, + 483.39947509765625, + 548.2582397460938, + 529.3165283203125 + ], + "iref": "#/texts/53", + "name": "text", + "orig-order": 71, + "page": 5, + "span": [ + 0, + 421 + ], + "sref": "#/page-elements/68", + "text-order": 68, + "type": "paragraph" + }, + { + "bbox": [ + 44.78684997558594, + 370.0640563964844, + 549.865478515625, + 444.5719299316406 + ], + "iref": "#/texts/54", + "name": "text", + "orig-order": 66, + "page": 5, + "span": [ + 0, + 687 + ], + "sref": "#/page-elements/69", + "text-order": 69, + "type": "paragraph" + }, + { + "bbox": [ + 44.206939697265625, + 330.4949035644531, + 223.93128967285156, + 341.8828125 + ], + "iref": "#/texts/55", + "name": "subtitle-level-1", + "orig-order": 67, + "page": 5, + "span": [ + 0, + 31 + ], + "sref": "#/page-elements/70", + "text-order": 70, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.78684616088867, + 149.07435607910156, + 549.819091796875, + 314.53570556640625 + ], + "iref": "#/texts/56", + "name": "text", + "orig-order": 68, + "page": 5, + "span": [ + 0, + 1517 + ], + "sref": "#/page-elements/71", + "text-order": 71, + "type": "paragraph" + }, + { + "bbox": [ + 43.94790267944336, + 109.50601959228516, + 254.47779846191406, + 120.89392852783203 + ], + "iref": "#/texts/57", + "name": "subtitle-level-1", + "orig-order": 69, + "page": 5, + "span": [ + 0, + 36 + ], + "sref": "#/page-elements/72", + "text-order": 72, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.78739929199219, + 45.00958251953125, + 549.1444091796875, + 93.61456298828125 + ], + "iref": "#/texts/58", + "name": "text", + "orig-order": 70, + "page": 5, + "span": [ + 0, + 384 + ], + "sref": "#/page-elements/73", + "text-order": 73, + "type": "paragraph" + }, + { + "bbox": [ + 578.368896484375, + 15.450490951538086, + 583.4779663085938, + 766.7100219726562 + ], + "iref": "#/texts/59", + "name": "text", + "orig-order": 74, + "page": 5, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/74", + "text-order": 74, + "type": "paragraph" + }, + { + "bbox": [ + 46.48820114135742, + 751.4075317382812, + 68.55958557128906, + 758.0504760742188 + ], + "iref": "#/texts/60", + "name": "text", + "orig-order": 89, + "page": 6, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/75", + "text-order": 75, + "type": "paragraph" + }, + { + "bbox": [ + 510.634765625, + 751.4635620117188, + 550.9879150390625, + 758.9756469726562 + ], + "iref": "#/page-headers/8", + "name": "page-header", + "orig-order": 90, + "page": 6, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/76", + "text-order": 76, + "type": "page-header" + }, + { + "bbox": [ + 45.78483581542969, + 669.0628051757812, + 554.4027709960938, + 730.823486328125 + ], + "iref": "#/texts/61", + "name": "text", + "orig-order": 75, + "page": 6, + "span": [ + 0, + 564 + ], + "sref": "#/page-elements/77", + "text-order": 77, + "type": "paragraph" + }, + { + "bbox": [ + 45.753639221191406, + 629.4933471679688, + 148.00445556640625, + 641.5734252929688 + ], + "iref": "#/texts/62", + "name": "subtitle-level-1", + "orig-order": 76, + "page": 6, + "span": [ + 0, + 16 + ], + "sref": "#/page-elements/78", + "text-order": 78, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 46.48820114135742, + 591.0541381835938, + 552.9049682617188, + 613.8143310546875 + ], + "iref": "#/texts/63", + "name": "text", + "orig-order": 77, + "page": 6, + "span": [ + 0, + 225 + ], + "sref": "#/page-elements/79", + "text-order": 79, + "type": "paragraph" + }, + { + "bbox": [ + 46.445133209228516, + 552.0497436523438, + 553.362548828125, + 575.2869873046875 + ], + "iref": "#/texts/64", + "name": "list-item", + "orig-order": 78, + "page": 6, + "span": [ + 0, + 179 + ], + "sref": "#/page-elements/80", + "text-order": 80, + "type": "paragraph" + }, + { + "bbox": [ + 45.744380950927734, + 526.0834350585938, + 553.5414428710938, + 548.8994140625 + ], + "iref": "#/texts/65", + "name": "list-item", + "orig-order": 79, + "page": 6, + "span": [ + 0, + 133 + ], + "sref": "#/page-elements/81", + "text-order": 81, + "type": "paragraph" + }, + { + "bbox": [ + 44.8809700012207, + 513.0443115234375, + 481.36083984375, + 523.5081787109375 + ], + "iref": "#/texts/66", + "name": "list-item", + "orig-order": 80, + "page": 6, + "span": [ + 0, + 101 + ], + "sref": "#/page-elements/82", + "text-order": 82, + "type": "paragraph" + }, + { + "bbox": [ + 46.38796615600586, + 435.0345458984375, + 553.393310546875, + 497.0226135253906 + ], + "iref": "#/texts/67", + "name": "text", + "orig-order": 81, + "page": 6, + "span": [ + 0, + 525 + ], + "sref": "#/page-elements/83", + "text-order": 83, + "type": "paragraph" + }, + { + "bbox": [ + 45.54835891723633, + 344.0406799316406, + 555.0050048828125, + 432.1236877441406 + ], + "iref": "#/texts/68", + "name": "text", + "orig-order": 82, + "page": 6, + "span": [ + 0, + 693 + ], + "sref": "#/page-elements/84", + "text-order": 84, + "type": "paragraph" + }, + { + "bbox": [ + 46.25617980957031, + 304.472900390625, + 469.55108642578125, + 315.8608093261719 + ], + "iref": "#/texts/69", + "name": "subtitle-level-1", + "orig-order": 83, + "page": 6, + "span": [ + 0, + 48 + ], + "sref": "#/page-elements/85", + "text-order": 85, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 46.48820114135742, + 265.92974853515625, + 552.6448364257812, + 288.6134338378906 + ], + "iref": "#/texts/70", + "name": "text", + "orig-order": 84, + "page": 6, + "span": [ + 0, + 166 + ], + "sref": "#/page-elements/86", + "text-order": 86, + "type": "paragraph" + }, + { + "bbox": [ + 46.377140045166016, + 240.049560546875, + 429.5157165527344, + 249.76214599609375 + ], + "iref": "#/texts/71", + "name": "list-item", + "orig-order": 85, + "page": 6, + "span": [ + 0, + 92 + ], + "sref": "#/page-elements/87", + "text-order": 87, + "type": "paragraph" + }, + { + "bbox": [ + 45.62164306640625, + 227.0850830078125, + 346.3638916015625, + 237.665283203125 + ], + "iref": "#/texts/72", + "name": "list-item", + "orig-order": 86, + "page": 6, + "span": [ + 0, + 73 + ], + "sref": "#/page-elements/88", + "text-order": 88, + "type": "paragraph" + }, + { + "bbox": [ + 45.322208404541016, + 162.0574493408203, + 553.8873901367188, + 210.65191650390625 + ], + "iref": "#/texts/73", + "name": "text", + "orig-order": 87, + "page": 6, + "span": [ + 0, + 472 + ], + "sref": "#/page-elements/89", + "text-order": 89, + "type": "paragraph" + }, + { + "bbox": [ + 45.762847900390625, + 71.06684875488281, + 554.2275390625, + 158.80230712890625 + ], + "iref": "#/texts/74", + "name": "text", + "orig-order": 88, + "page": 6, + "span": [ + 0, + 761 + ], + "sref": "#/page-elements/90", + "text-order": 90, + "type": "paragraph" + }, + { + "bbox": [ + 578.368896484375, + 15.450490951538086, + 583.4779663085938, + 766.7100219726562 + ], + "iref": "#/texts/75", + "name": "text", + "orig-order": 91, + "page": 6, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/91", + "text-order": 91, + "type": "paragraph" + }, + { + "bbox": [ + 44.35243225097656, + 751.4635620117188, + 85.42164611816406, + 758.9300537109375 + ], + "iref": "#/page-headers/9", + "name": "page-header", + "orig-order": 103, + "page": 7, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/92", + "text-order": 92, + "type": "page-header" + }, + { + "bbox": [ + 528.5497436523438, + 751.4075317382812, + 550.62109375, + 758.0504760742188 + ], + "iref": "#/texts/76", + "name": "text", + "orig-order": 104, + "page": 7, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/93", + "text-order": 93, + "type": "paragraph" + }, + { + "bbox": [ + 44.78684997558594, + 695.0850830078125, + 549.5508422851562, + 730.6725463867188 + ], + "iref": "#/texts/77", + "name": "text", + "orig-order": 92, + "page": 7, + "span": [ + 0, + 324 + ], + "sref": "#/page-elements/94", + "text-order": 94, + "type": "paragraph" + }, + { + "bbox": [ + 44.71910095214844, + 655.5153198242188, + 236.7943572998047, + 666.9031982421875 + ], + "iref": "#/texts/78", + "name": "subtitle-level-1", + "orig-order": 93, + "page": 7, + "span": [ + 0, + 32 + ], + "sref": "#/page-elements/95", + "text-order": 95, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.78636169433594, + 578.0709838867188, + 549.254638671875, + 640.1705932617188 + ], + "iref": "#/texts/79", + "name": "text", + "orig-order": 94, + "page": 7, + "span": [ + 0, + 502 + ], + "sref": "#/page-elements/96", + "text-order": 96, + "type": "paragraph" + }, + { + "bbox": [ + 44.733577728271484, + 539.0667114257812, + 548.8603515625, + 576.5675048828125 + ], + "iref": "#/texts/80", + "name": "text", + "orig-order": 95, + "page": 7, + "span": [ + 0, + 324 + ], + "sref": "#/page-elements/97", + "text-order": 97, + "type": "paragraph" + }, + { + "bbox": [ + 214.75270080566406, + 498.5877685546875, + 548.7813110351562, + 529.3681030273438 + ], + "iref": "#/texts/81", + "name": "formula", + "orig-order": 96, + "page": 7, + "span": [ + 0, + 92 + ], + "sref": "#/page-elements/98", + "text-order": 98, + "type": "equation" + }, + { + "bbox": [ + 44.784271240234375, + 435.0351257324219, + 548.7523193359375, + 470.5306396484375 + ], + "iref": "#/texts/82", + "name": "text", + "orig-order": 97, + "page": 7, + "span": [ + 0, + 327 + ], + "sref": "#/page-elements/99", + "text-order": 99, + "type": "paragraph" + }, + { + "bbox": [ + 234.89254760742188, + 399.494873046875, + 549.147216796875, + 425.90399169921875 + ], + "iref": "#/texts/83", + "name": "formula", + "orig-order": 98, + "page": 7, + "span": [ + 0, + 114 + ], + "sref": "#/page-elements/100", + "text-order": 100, + "type": "equation" + }, + { + "bbox": [ + 44.786224365234375, + 279.0730285644531, + 549.0149536132812, + 379.8307189941406 + ], + "iref": "#/texts/84", + "name": "text", + "orig-order": 99, + "page": 7, + "span": [ + 0, + 960 + ], + "sref": "#/page-elements/101", + "text-order": 101, + "type": "paragraph" + }, + { + "bbox": [ + 44.786224365234375, + 253.05079650878906, + 549.2977294921875, + 275.7553405761719 + ], + "iref": "#/texts/85", + "name": "text", + "orig-order": 100, + "page": 7, + "span": [ + 0, + 204 + ], + "sref": "#/page-elements/102", + "text-order": 102, + "type": "paragraph" + }, + { + "bbox": [ + 43.776466369628906, + 213.4808349609375, + 380.18682861328125, + 224.8687286376953 + ], + "iref": "#/texts/86", + "name": "subtitle-level-1", + "orig-order": 101, + "page": 7, + "span": [ + 0, + 54 + ], + "sref": "#/page-elements/103", + "text-order": 103, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.78739929199219, + 58.08219528198242, + 550.3234252929688, + 197.4915771484375 + ], + "iref": "#/texts/87", + "name": "text", + "orig-order": 102, + "page": 7, + "span": [ 0, - 4, + 1216 + ], + "sref": "#/page-elements/104", + "text-order": 104, + "type": "paragraph" + }, + { + "bbox": [ + 578.368896484375, + 15.450490951538086, + 583.4779663085938, + 766.7100219726562 + ], + "iref": "#/texts/88", + "name": "text", + "orig-order": 105, + "page": 7, + "span": [ 0, - 4, - true, - "0.96", - "0.96" - ] - ], - "headers": [ - "type", - "subtype", - "subj_hash", - "subj_name", - "subj_path", - "conf", - "hash", - "ihash", - "coor_i", - "coor_j", - "char_i", - "char_j", - "ctok_i", - "ctok_j", - "wtok_i", - "wtok_j", - "wtok-match", - "name", - "original" - ] - }, - "meta": [ + 320 + ], + "sref": "#/page-elements/105", + "text-order": 105, + "type": "paragraph" + }, { - "$ref": "#/page-headers/0" + "bbox": [ + 45.74378967285156, + 751.4075317382812, + 68.55958557128906, + 758.9868774414062 + ], + "iref": "#/page-headers/10", + "name": "page-header", + "orig-order": 113, + "page": 8, + "span": [ + 0, + 6 + ], + "sref": "#/page-elements/106", + "text-order": 106, + "type": "page-header" }, { - "$ref": "#/page-headers/1" + "bbox": [ + 510.634765625, + 751.4635620117188, + 550.921142578125, + 758.3907470703125 + ], + "iref": "#/page-headers/11", + "name": "page-header", + "orig-order": 114, + "page": 8, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/107", + "text-order": 107, + "type": "page-header" }, { - "$ref": "#/page-headers/2" + "bbox": [ + 96.34707641601562, + 537.8071899414062, + 496.8702697753906, + 731.7752075195312 + ], + "iref": "#/figures/2", + "name": "picture", + "orig-order": 116, + "page": 8, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/108", + "text-order": 108, + "type": "figure" }, { - "$ref": "#/page-headers/3" + "bbox": [ + 46.00423812866211, + 491.7976379394531, + 543.2025756835938, + 523.7771606445312 + ], + "iref": "#/figures/2/captions/0", + "name": "caption", + "orig-order": 112, + "page": 8, + "span": [ + 0, + 268 + ], + "sref": "#/page-elements/109", + "text-order": 109, + "type": "caption" }, { - "$ref": "#/footnotes/0" + "bbox": [ + 46.486663818359375, + 370.0644836425781, + 551.9771728515625, + 457.6360168457031 + ], + "iref": "#/texts/89", + "name": "text", + "orig-order": 106, + "page": 8, + "span": [ + 0, + 745 + ], + "sref": "#/page-elements/110", + "text-order": 110, + "type": "paragraph" }, { - "$ref": "#/footnotes/1" + "bbox": [ + 46.486663818359375, + 239.97216796875, + 551.4871215820312, + 366.491455078125 + ], + "iref": "#/texts/90", + "name": "text", + "orig-order": 107, + "page": 8, + "span": [ + 0, + 1027 + ], + "sref": "#/page-elements/111", + "text-order": 111, + "type": "paragraph" }, { - "$ref": "#/page-footers/0" + "bbox": [ + 45.14011764526367, + 200.4981231689453, + 333.7398986816406, + 211.88601684570312 + ], + "iref": "#/texts/91", + "name": "subtitle-level-1", + "orig-order": 108, + "page": 8, + "span": [ + 0, + 48 + ], + "sref": "#/page-elements/112", + "text-order": 112, + "type": "subtitle-level-1" }, { - "$ref": "#/page-footers/1" + "bbox": [ + 45.9116325378418, + 162.0589599609375, + 551.3727416992188, + 184.45217895507812 + ], + "iref": "#/texts/92", + "name": "text", + "orig-order": 109, + "page": 8, + "span": [ + 0, + 179 + ], + "sref": "#/page-elements/113", + "text-order": 113, + "type": "paragraph" }, { - "$ref": "#/page-headers/4" + "bbox": [ + 46.21662902832031, + 84.04818725585938, + 550.9126586914062, + 158.48593139648438 + ], + "iref": "#/texts/93", + "name": "text", + "orig-order": 110, + "page": 8, + "span": [ + 0, + 643 + ], + "sref": "#/page-elements/114", + "text-order": 114, + "type": "paragraph" + }, + { + "bbox": [ + 44.992271423339844, + 45.01641845703125, + 552.1865844726562, + 80.5264892578125 + ], + "iref": "#/texts/94", + "name": "text", + "orig-order": 111, + "page": 8, + "span": [ + 0, + 262 + ], + "sref": "#/page-elements/115", + "text-order": 115, + "type": "paragraph" + }, + { + "bbox": [ + 578.368896484375, + 15.450490951538086, + 583.4779663085938, + 766.7100219726562 + ], + "iref": "#/texts/95", + "name": "text", + "orig-order": 115, + "page": 8, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/116", + "text-order": 116, + "type": "paragraph" + }, + { + "bbox": [ + 44.34560012817383, + 751.4635620117188, + 84.67137145996094, + 758.0504760742188 + ], + "iref": "#/page-headers/12", + "name": "page-header", + "orig-order": 126, + "page": 9, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/117", + "text-order": 117, + "type": "page-header" + }, + { + "bbox": [ + 528.5497436523438, + 751.4075317382812, + 550.62109375, + 758.0504760742188 + ], + "iref": "#/texts/96", + "name": "text", + "orig-order": 127, + "page": 9, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/118", + "text-order": 118, + "type": "paragraph" + }, + { + "bbox": [ + 116.26325988769531, + 507.8388977050781, + 473.644775390625, + 731.2719116210938 + ], + "iref": "#/figures/3", + "name": "picture", + "orig-order": 129, + "page": 9, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/119", + "text-order": 119, + "type": "figure" + }, + { + "bbox": [ + 44.78739929199219, + 447.43023681640625, + 541.6075439453125, + 491.6891174316406 + ], + "iref": "#/figures/3/captions/0", + "name": "caption", + "orig-order": 125, + "page": 9, + "span": [ + 0, + 473 + ], + "sref": "#/page-elements/120", + "text-order": 120, + "type": "caption" + }, + { + "bbox": [ + 44.418067932128906, + 395.521728515625, + 176.333251953125, + 406.9096374511719 + ], + "iref": "#/texts/97", + "name": "subtitle-level-1", + "orig-order": 117, + "page": 9, + "span": [ + 0, + 22 + ], + "sref": "#/page-elements/121", + "text-order": 121, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.78739929199219, + 343.8106384277344, + 548.7684326171875, + 379.5713806152344 + ], + "iref": "#/texts/98", + "name": "text", + "orig-order": 118, + "page": 9, + "span": [ + 0, + 270 + ], + "sref": "#/page-elements/122", + "text-order": 122, + "type": "paragraph" + }, + { + "bbox": [ + 245.61886596679688, + 303.5643005371094, + 549.354736328125, + 334.3446350097656 + ], + "iref": "#/texts/99", + "name": "formula", + "orig-order": 119, + "page": 9, + "span": [ + 0, + 72 + ], + "sref": "#/page-elements/123", + "text-order": 123, + "type": "equation" + }, + { + "bbox": [ + 44.27131652832031, + 266.0909118652344, + 323.5520935058594, + 275.5295104980469 + ], + "iref": "#/texts/100", + "name": "text", + "orig-order": 120, + "page": 9, + "span": [ + 0, + 69 + ], + "sref": "#/page-elements/124", + "text-order": 124, + "type": "paragraph" + }, + { + "bbox": [ + 44.087921142578125, + 226.52023315429688, + 183.25424194335938, + 237.9081268310547 + ], + "iref": "#/texts/101", + "name": "subtitle-level-1", + "orig-order": 121, + "page": 9, + "span": [ + 0, + 23 + ], + "sref": "#/page-elements/125", + "text-order": 125, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.12942886352539, + 149.07611083984375, + 549.1555786132812, + 210.865478515625 + ], + "iref": "#/texts/102", + "name": "text", + "orig-order": 122, + "page": 9, + "span": [ + 0, + 580 + ], + "sref": "#/page-elements/126", + "text-order": 126, + "type": "paragraph" + }, + { + "bbox": [ + 213.45111083984375, + 107.99786376953125, + 548.7833251953125, + 139.26446533203125 + ], + "iref": "#/texts/103", + "name": "formula", + "orig-order": 123, + "page": 9, + "span": [ + 0, + 147 + ], + "sref": "#/page-elements/127", + "text-order": 127, + "type": "equation" + }, + { + "bbox": [ + 44.78630447387695, + 45.0455436706543, + 548.7993774414062, + 80.76483154296875 + ], + "iref": "#/texts/104", + "name": "text", + "orig-order": 124, + "page": 9, + "span": [ + 0, + 307 + ], + "sref": "#/page-elements/128", + "text-order": 128, + "type": "paragraph" }, { - "$ref": "#/page-headers/5" + "bbox": [ + 578.368896484375, + 15.450490951538086, + 583.4779663085938, + 766.7100219726562 + ], + "iref": "#/texts/105", + "name": "text", + "orig-order": 128, + "page": 9, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/129", + "text-order": 129, + "type": "paragraph" }, { - "$ref": "#/figures/0/captions/0" + "bbox": [ + 45.890689849853516, + 743.98095703125, + 143.1890869140625, + 761.30615234375 + ], + "iref": "#/figures/4", + "name": "picture", + "orig-order": 142, + "page": 10, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/130", + "text-order": 130, + "type": "figure" }, { - "$ref": "#/page-headers/6" + "bbox": [ + 510.634765625, + 751.4635620117188, + 550.8926391601562, + 758.5383911132812 + ], + "iref": "#/page-headers/13", + "name": "page-header", + "orig-order": 143, + "page": 10, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/131", + "text-order": 131, + "type": "page-header" }, { - "$ref": "#/page-headers/7" + "bbox": [ + 44.981788635253906, + 720.4783935546875, + 201.29905700683594, + 731.9963989257812 + ], + "iref": "#/texts/106", + "name": "subtitle-level-1", + "orig-order": 130, + "page": 10, + "span": [ + 0, + 26 + ], + "sref": "#/page-elements/132", + "text-order": 132, + "type": "subtitle-level-1" }, { - "$ref": "#/page-headers/8" + "bbox": [ + 46.0963020324707, + 656.0805053710938, + 554.1248779296875, + 705.2210693359375 + ], + "iref": "#/texts/107", + "name": "text", + "orig-order": 131, + "page": 10, + "span": [ + 0, + 390 + ], + "sref": "#/page-elements/133", + "text-order": 133, + "type": "paragraph" }, { - "$ref": "#/page-headers/9" + "bbox": [ + 45.49040985107422, + 616.5106201171875, + 214.94256591796875, + 627.93359375 + ], + "iref": "#/texts/108", + "name": "subtitle-level-1", + "orig-order": 132, + "page": 10, + "span": [ + 0, + 27 + ], + "sref": "#/page-elements/134", + "text-order": 134, + "type": "subtitle-level-1" }, { - "$ref": "#/page-headers/10" + "bbox": [ + 45.356536865234375, + 578.0712890625, + 552.450927734375, + 600.5599365234375 + ], + "iref": "#/texts/109", + "name": "text", + "orig-order": 133, + "page": 10, + "span": [ + 0, + 172 + ], + "sref": "#/page-elements/135", + "text-order": 135, + "type": "paragraph" }, { - "$ref": "#/page-headers/11" + "bbox": [ + 46.00928497314453, + 500.0617370605469, + 551.898193359375, + 574.4982299804688 + ], + "iref": "#/texts/110", + "name": "text", + "orig-order": 134, + "page": 10, + "span": [ + 0, + 691 + ], + "sref": "#/page-elements/136", + "text-order": 136, + "type": "paragraph" }, { - "$ref": "#/figures/2/captions/0" + "bbox": [ + 45.801177978515625, + 448.0732421875, + 552.126953125, + 496.556396484375 + ], + "iref": "#/texts/111", + "name": "text", + "orig-order": 135, + "page": 10, + "span": [ + 0, + 420 + ], + "sref": "#/page-elements/137", + "text-order": 137, + "type": "paragraph" }, { - "$ref": "#/page-headers/12" + "bbox": [ + 46.02473449707031, + 408.5044250488281, + 321.5076904296875, + 419.892333984375 + ], + "iref": "#/texts/112", + "name": "subtitle-level-1", + "orig-order": 136, + "page": 10, + "span": [ + 0, + 31 + ], + "sref": "#/page-elements/138", + "text-order": 138, + "type": "subtitle-level-1" }, { - "$ref": "#/figures/3/captions/0" + "bbox": [ + 46.301429748535156, + 357.0820007324219, + 550.6118774414062, + 392.4583435058594 + ], + "iref": "#/texts/113", + "name": "text", + "orig-order": 137, + "page": 10, + "span": [ + 0, + 334 + ], + "sref": "#/page-elements/139", + "text-order": 139, + "type": "paragraph" }, { - "$ref": "#/page-headers/13" + "bbox": [ + 46.488189697265625, + 253.0490264892578, + 551.0360107421875, + 353.4529724121094 + ], + "iref": "#/texts/114", + "name": "text", + "orig-order": 138, + "page": 10, + "span": [ + 0, + 847 + ], + "sref": "#/page-elements/140", + "text-order": 140, + "type": "paragraph" }, { - "$ref": "#/page-headers/14" + "bbox": [ + 46.440311431884766, + 188.080810546875, + 551.396484375, + 249.4759979248047 + ], + "iref": "#/texts/115", + "name": "text", + "orig-order": 139, + "page": 10, + "span": [ + 0, + 477 + ], + "sref": "#/page-elements/141", + "text-order": 141, + "type": "paragraph" }, { - "$ref": "#/figures/5/captions/0" + "bbox": [ + 46.27632141113281, + 136.03631591796875, + 550.9563598632812, + 184.4517822265625 + ], + "iref": "#/texts/116", + "name": "text", + "orig-order": 140, + "page": 10, + "span": [ + 0, + 404 + ], + "sref": "#/page-elements/142", + "text-order": 142, + "type": "paragraph" }, { - "$ref": "#/page-headers/15" + "bbox": [ + 46.42215347290039, + 58.08152389526367, + 551.0359497070312, + 132.46327209472656 + ], + "iref": "#/texts/117", + "name": "text", + "orig-order": 141, + "page": 10, + "span": [ + 0, + 572 + ], + "sref": "#/page-elements/143", + "text-order": 143, + "type": "paragraph" }, { - "$ref": "#/figures/6/captions/0" + "bbox": [ + 578.368896484375, + 15.450490951538086, + 583.4779663085938, + 766.7100219726562 + ], + "iref": "#/texts/118", + "name": "text", + "orig-order": 144, + "page": 10, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/144", + "text-order": 144, + "type": "paragraph" }, { - "$ref": "#/page-headers/16" + "bbox": [ + 43.98883056640625, + 751.4635620117188, + 84.67137145996094, + 758.0504760742188 + ], + "iref": "#/page-headers/14", + "name": "page-header", + "orig-order": 150, + "page": 11, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/145", + "text-order": 145, + "type": "page-header" }, { - "$ref": "#/tables/0/captions/0" + "bbox": [ + 525.1477661132812, + 751.4075317382812, + 548.775146484375, + 758.0504760742188 + ], + "iref": "#/texts/119", + "name": "text", + "orig-order": 151, + "page": 11, + "span": [ + 0, + 6 + ], + "sref": "#/page-elements/146", + "text-order": 146, + "type": "paragraph" }, { - "$ref": "#/page-headers/17" + "bbox": [ + 48.36570739746094, + 477.8360900878906, + 548.3624267578125, + 732.3331298828125 + ], + "iref": "#/figures/5", + "name": "picture", + "orig-order": 153, + "page": 11, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/147", + "text-order": 147, + "type": "figure" }, { - "$ref": "#/page-headers/18" - } - ], - "model-application": { - "message": "success", - "success": true - }, - "other": [], - "page-dimensions": [ - { - "height": 782.3619995117188, - "page": 1, - "width": 595.2760009765625 + "bbox": [ + 44.78739929199219, + 428.34173583984375, + 541.0477905273438, + 460.564697265625 + ], + "iref": "#/figures/5/captions/0", + "name": "caption", + "orig-order": 149, + "page": 11, + "span": [ + 0, + 275 + ], + "sref": "#/page-elements/148", + "text-order": 148, + "type": "caption" }, { - "height": 782.3619995117188, - "page": 2, - "width": 595.2760009765625 + "bbox": [ + 44.78684997558594, + 331.06005859375, + 550.6510620117188, + 405.4977722167969 + ], + "iref": "#/texts/120", + "name": "text", + "orig-order": 145, + "page": 11, + "span": [ + 0, + 596 + ], + "sref": "#/page-elements/149", + "text-order": 149, + "type": "paragraph" }, { - "height": 782.3619995117188, - "page": 3, - "width": 595.2760009765625 + "bbox": [ + 44.489322662353516, + 291.4902038574219, + 365.9893798828125, + 302.87811279296875 + ], + "iref": "#/texts/121", + "name": "subtitle-level-1", + "orig-order": 146, + "page": 11, + "span": [ + 0, + 39 + ], + "sref": "#/page-elements/150", + "text-order": 150, + "type": "subtitle-level-1" }, { - "height": 782.3619995117188, - "page": 4, - "width": 595.2760009765625 + "bbox": [ + 44.785736083984375, + 175.04168701171875, + 549.7868041992188, + 275.5009460449219 + ], + "iref": "#/texts/122", + "name": "text", + "orig-order": 147, + "page": 11, + "span": [ + 0, + 861 + ], + "sref": "#/page-elements/151", + "text-order": 151, + "type": "paragraph" }, { - "height": 782.3619995117188, - "page": 5, - "width": 595.2760009765625 + "bbox": [ + 44.785736083984375, + 45.043888092041016, + 549.4429931640625, + 171.5908203125 + ], + "iref": "#/texts/123", + "name": "text", + "orig-order": 148, + "page": 11, + "span": [ + 0, + 1189 + ], + "sref": "#/page-elements/152", + "text-order": 152, + "type": "paragraph" }, { - "height": 782.3619995117188, - "page": 6, - "width": 595.2760009765625 + "bbox": [ + 578.368896484375, + 15.450490951538086, + 583.4779663085938, + 766.7100219726562 + ], + "iref": "#/texts/124", + "name": "text", + "orig-order": 152, + "page": 11, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/153", + "text-order": 153, + "type": "paragraph" }, { - "height": 782.3619995117188, - "page": 7, - "width": 595.2760009765625 + "bbox": [ + 46.48820114135742, + 751.4075317382812, + 51.251686096191406, + 758.0504760742188 + ], + "iref": "#/texts/125", + "name": "text", + "orig-order": 166, + "page": 12, + "span": [ + 0, + 2 + ], + "sref": "#/page-elements/154", + "text-order": 154, + "type": "paragraph" }, { - "height": 782.3619995117188, - "page": 8, - "width": 595.2760009765625 + "bbox": [ + 56.12232208251953, + 751.4075317382812, + 70.11566162109375, + 758.0504760742188 + ], + "iref": "#/texts/126", + "name": "text", + "orig-order": 167, + "page": 12, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/155", + "text-order": 155, + "type": "paragraph" }, { - "height": 782.3619995117188, - "page": 9, - "width": 595.2760009765625 + "bbox": [ + 510.634765625, + 751.4635620117188, + 550.7427368164062, + 758.252197265625 + ], + "iref": "#/page-headers/15", + "name": "page-header", + "orig-order": 168, + "page": 12, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/156", + "text-order": 156, + "type": "page-header" }, { - "height": 782.3619995117188, - "page": 10, - "width": 595.2760009765625 + "bbox": [ + 55.876461029052734, + 606.848876953125, + 541.853759765625, + 729.6771850585938 + ], + "iref": "#/figures/6", + "name": "picture", + "orig-order": 164, + "page": 12, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/157", + "text-order": 157, + "type": "figure" }, { - "height": 782.3619995117188, - "page": 11, - "width": 595.2760009765625 + "bbox": [ + 44.766658782958984, + 585.4602661132812, + 387.12310791015625, + 593.5936279296875 + ], + "iref": "#/figures/6/captions/0", + "name": "caption", + "orig-order": 165, + "page": 12, + "span": [ + 0, + 88 + ], + "sref": "#/page-elements/158", + "text-order": 158, + "type": "caption" }, { - "height": 782.3619995117188, + "bbox": [ + 45.36357116699219, + 526.083984375, + 552.5618286132812, + 548.4772338867188 + ], + "iref": "#/texts/127", + "name": "text", + "orig-order": 154, "page": 12, - "width": 595.2760009765625 + "span": [ + 0, + 171 + ], + "sref": "#/page-elements/159", + "text-order": 159, + "type": "paragraph" }, { - "height": 782.3619995117188, - "page": 13, - "width": 595.2760009765625 + "bbox": [ + 46.48820114135742, + 448.0732421875, + 552.16748046875, + 522.4549560546875 + ], + "iref": "#/texts/128", + "name": "text", + "orig-order": 155, + "page": 12, + "span": [ + 0, + 596 + ], + "sref": "#/page-elements/160", + "text-order": 160, + "type": "paragraph" }, { - "height": 782.3619995117188, - "page": 14, - "width": 595.2760009765625 + "bbox": [ + 46.228458404541016, + 382.8196716308594, + 552.1286010742188, + 444.5987854003906 + ], + "iref": "#/texts/129", + "name": "text", + "orig-order": 156, + "page": 12, + "span": [ + 0, + 460 + ], + "sref": "#/page-elements/161", + "text-order": 161, + "type": "paragraph" }, - { - "height": 782.3619995117188, - "page": 15, - "width": 595.2760009765625 - } - ], - "page-elements": [ { "bbox": [ - 44.78739929199219, - 743.57568359375, - 131.78494262695312, - 750.7937622070312 + 46.48820114135742, + 357.0803527832031, + 309.6529846191406, + 366.4904479980469 ], - "iref": "#/page-headers/0", - "name": "page-header", - "orig-order": 15, - "page": 1, + "iref": "#/texts/130", + "name": "list-item", + "orig-order": 157, + "page": 12, "span": [ 0, - 28 + 57 ], - "sref": "#/page-elements/0", - "text-order": 0, - "type": "page-header" + "sref": "#/page-elements/162", + "text-order": 162, + "type": "paragraph" }, { "bbox": [ - 146.3265380859375, - 744.093017578125, - 229.3131561279297, - 751.4437866210938 + 46.48820114135742, + 344.0412292480469, + 336.8304748535156, + 353.6436767578125 ], - "iref": "#/page-headers/1", - "name": "page-header", - "orig-order": 16, - "page": 1, + "iref": "#/texts/131", + "name": "list-item", + "orig-order": 158, + "page": 12, "span": [ 0, - 26 + 65 ], - "sref": "#/page-elements/1", - "text-order": 1, - "type": "page-header" + "sref": "#/page-elements/163", + "text-order": 163, + "type": "paragraph" }, { "bbox": [ - 243.7840576171875, - 743.953369140625, - 332.99346923828125, - 751.3480224609375 + 45.47064971923828, + 331.05810546875, + 478.3088684082031, + 340.54962158203125 ], - "iref": "#/page-headers/2", - "name": "page-header", - "orig-order": 17, - "page": 1, + "iref": "#/texts/132", + "name": "list-item", + "orig-order": 159, + "page": 12, "span": [ 0, - 27 + 101 ], - "sref": "#/page-elements/2", - "text-order": 2, - "type": "page-header" + "sref": "#/page-elements/164", + "text-order": 164, + "type": "paragraph" }, { "bbox": [ - 44.6877326965332, - 730.7138671875, - 106.1191635131836, - 737.30078125 + 46.16604232788086, + 214.04542541503906, + 551.7832641601562, + 314.4459533691406 ], - "iref": "#/page-headers/3", - "name": "page-header", - "orig-order": 18, - "page": 1, + "iref": "#/texts/133", + "name": "text", + "orig-order": 160, + "page": 12, "span": [ 0, - 21 + 923 ], - "sref": "#/page-elements/3", - "text-order": 3, - "type": "page-header" + "sref": "#/page-elements/165", + "text-order": 165, + "type": "paragraph" }, { "bbox": [ - 43.95979690551758, - 702.3956298828125, - 91.94560241699219, - 712.1011962890625 + 46.26358413696289, + 149.0762481689453, + 551.3743896484375, + 210.68536376953125 ], - "iref": "#/texts/0", - "name": "subtitle-level-1", - "orig-order": 0, - "page": 1, + "iref": "#/texts/134", + "name": "text", + "orig-order": 161, + "page": 12, "span": [ 0, - 6 + 569 ], - "sref": "#/page-elements/4", - "text-order": 4, - "type": "subtitle-level-1" + "sref": "#/page-elements/166", + "text-order": 166, + "type": "paragraph" }, { "bbox": [ - 44.709346771240234, - 631.2674560546875, - 520.7667236328125, - 672.0067749023438 + 45.70681381225586, + 71.06546783447266, + 551.875732421875, + 145.5064697265625 ], - "iref": "#/texts/1", - "name": "subtitle-level-1", - "orig-order": 1, - "page": 1, + "iref": "#/texts/135", + "name": "text", + "orig-order": 162, + "page": 12, "span": [ 0, - 97 + 698 ], - "sref": "#/page-elements/5", - "text-order": 5, - "type": "subtitle-level-1" + "sref": "#/page-elements/167", + "text-order": 167, + "type": "paragraph" }, { "bbox": [ - 44.78739929199219, - 593.6065673828125, - 146.4720458984375, - 606.4735717773438 + 46.488380432128906, + 45.0432014465332, + 551.8381958007812, + 67.6728515625 ], - "iref": "#/texts/2", - "name": "subtitle-level-1", - "orig-order": 2, - "page": 1, + "iref": "#/texts/136", + "name": "text", + "orig-order": 163, + "page": 12, "span": [ 0, - 17 + 218 ], - "sref": "#/page-elements/6", - "text-order": 6, - "type": "subtitle-level-1" + "sref": "#/page-elements/168", + "text-order": 168, + "type": "paragraph" }, { "bbox": [ - 160.10069274902344, - 593.7201538085938, - 163.59266662597656, - 605.1080322265625 + 578.368896484375, + 15.450490951538086, + 583.4779663085938, + 766.7100219726562 ], - "iref": "#/texts/3", + "iref": "#/texts/137", "name": "text", - "orig-order": 3, - "page": 1, + "orig-order": 169, + "page": 12, "span": [ 0, - 1 + 320 ], - "sref": "#/page-elements/7", - "text-order": 7, + "sref": "#/page-elements/169", + "text-order": 169, "type": "paragraph" }, { "bbox": [ - 170.39439392089844, - 593.4388427734375, - 265.1170959472656, - 607.2059326171875 + 44.31840515136719, + 751.4635620117188, + 84.67137145996094, + 758.0541381835938 ], - "iref": "#/texts/4", - "name": "subtitle-level-1", - "orig-order": 4, - "page": 1, + "iref": "#/page-headers/16", + "name": "page-header", + "orig-order": 177, + "page": 13, "span": [ 0, 13 ], - "sref": "#/page-elements/8", - "text-order": 8, - "type": "subtitle-level-1" + "sref": "#/page-elements/170", + "text-order": 170, + "type": "page-header" }, { "bbox": [ - 274.5636901855469, - 593.7201538085938, - 278.0556640625, - 605.1080322265625 + 525.1477661132812, + 751.4075317382812, + 529.9112548828125, + 758.0504760742188 ], - "iref": "#/texts/5", + "iref": "#/texts/138", "name": "text", - "orig-order": 5, - "page": 1, + "orig-order": 178, + "page": 13, "span": [ 0, - 1 + 2 ], - "sref": "#/page-elements/9", - "text-order": 9, + "sref": "#/page-elements/171", + "text-order": 171, "type": "paragraph" }, { "bbox": [ - 290.0411682128906, - 593.2594604492188, - 387.6253967285156, - 606.9615478515625 + 534.7818603515625, + 751.4075317382812, + 548.775146484375, + 758.0504760742188 ], - "iref": "#/texts/6", + "iref": "#/texts/139", "name": "text", - "orig-order": 6, - "page": 1, + "orig-order": 179, + "page": 13, "span": [ 0, - 14 + 5 ], - "sref": "#/page-elements/10", - "text-order": 10, + "sref": "#/page-elements/172", + "text-order": 172, "type": "paragraph" }, { "bbox": [ - 44.78739929199219, - 559.602294921875, - 182.68014526367188, - 567.3045654296875 + 45.15538024902344, + 607.3761596679688, + 548.95361328125, + 731.4898681640625 ], - "iref": "#/texts/7", - "name": "text", - "orig-order": 7, - "page": 1, + "iref": "#/figures/7", + "name": "picture", + "orig-order": 181, + "page": 13, "span": [ 0, - 38 + 0 ], - "sref": "#/page-elements/11", - "text-order": 11, - "type": "paragraph" + "sref": "#/page-elements/173", + "text-order": 173, + "type": "figure" }, { "bbox": [ - 44.78739929199219, - 493.4922180175781, - 164.66183471679688, - 545.3080444335938 + 44.35472869873047, + 537.0355224609375, + 539.2632446289062, + 593.7362670898438 ], - "iref": "#/texts/8", + "iref": "#/figures/7/captions/0", "name": "text", - "orig-order": 8, - "page": 1, + "orig-order": 174, + "page": 13, "span": [ 0, - 121 + 608 ], - "sref": "#/page-elements/12", - "text-order": 12, + "sref": "#/page-elements/174", + "text-order": 174, "type": "paragraph" }, { "bbox": [ - 209.1903839111328, - 552.2532348632812, - 249.1348114013672, - 561.7433471679688 + 44.49153518676758, + 441.90771484375, + 181.1155242919922, + 498.2774658203125 ], - "iref": "#/texts/9", - "name": "subtitle-level-1", - "orig-order": 9, - "page": 1, + "iref": "#/tables/0/captions/0", + "name": "caption", + "orig-order": 175, + "page": 13, "span": [ 0, - 8 + 160 ], - "sref": "#/page-elements/13", - "text-order": 13, - "type": "subtitle-level-1" + "sref": "#/page-elements/175", + "text-order": 175, + "type": "caption" }, { "bbox": [ - 208.6128387451172, - 251.58563232421875, - 543.8583984375, - 547.040771484375 + 210.0027313232422, + 346.577880859375, + 549.0220336914062, + 499.1263427734375 ], - "iref": "#/texts/10", + "iref": "#/tables/0", + "name": "table", + "orig-order": 176, + "page": 13, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/176", + "text-order": 176, + "type": "table" + }, + { + "bbox": [ + 44.78739929199219, + 292.05572509765625, + 549.0201416015625, + 314.4489440917969 + ], + "iref": "#/texts/140", "name": "text", - "orig-order": 10, - "page": 1, + "orig-order": 170, + "page": 13, "span": [ 0, - 1624 + 191 ], - "sref": "#/page-elements/14", - "text-order": 14, + "sref": "#/page-elements/177", + "text-order": 177, "type": "paragraph" }, { "bbox": [ - 209.21104431152344, - 228.2025146484375, - 269.01025390625, - 237.28173828125 + 44.786376953125, + 188.07875061035156, + 550.8748779296875, + 288.5342712402344 + ], + "iref": "#/texts/141", + "name": "text", + "orig-order": 171, + "page": 13, + "span": [ + 0, + 834 + ], + "sref": "#/page-elements/178", + "text-order": 178, + "type": "paragraph" + }, + { + "bbox": [ + 44.73537826538086, + 148.51072692871094, + 178.22747802734375, + 159.89862060546875 ], - "iref": "#/texts/11", + "iref": "#/texts/142", "name": "subtitle-level-1", - "orig-order": 11, - "page": 1, + "orig-order": 172, + "page": 13, "span": [ 0, - 8 + 15 ], - "sref": "#/page-elements/15", - "text-order": 15, + "sref": "#/page-elements/179", + "text-order": 179, "type": "subtitle-level-1" }, { "bbox": [ - 208.79600524902344, - 214.08453369140625, - 401.0297546386719, - 222.97467041015625 + 44.78739929199219, + 58.0830192565918, + 549.515625, + 132.5465087890625 ], - "iref": "#/texts/12", + "iref": "#/texts/143", "name": "text", - "orig-order": 12, - "page": 1, + "orig-order": 173, + "page": 13, "span": [ 0, - 53 + 699 ], - "sref": "#/page-elements/16", - "text-order": 16, + "sref": "#/page-elements/180", + "text-order": 180, "type": "paragraph" }, { "bbox": [ - 44.27853012084961, - 187.51553344726562, - 189.71961975097656, - 199.65557861328125 + 578.368896484375, + 15.450490951538086, + 583.4779663085938, + 766.7100219726562 ], - "iref": "#/texts/13", - "name": "subtitle-level-1", - "orig-order": 13, - "page": 1, + "iref": "#/texts/144", + "name": "text", + "orig-order": 180, + "page": 13, "span": [ 0, - 16 + 320 ], - "sref": "#/page-elements/17", - "text-order": 17, - "type": "subtitle-level-1" + "sref": "#/page-elements/181", + "text-order": 181, + "type": "paragraph" }, { "bbox": [ - 44.78739929199219, - 96.98406982421875, - 552.6513061523438, - 172.33074951171875 + 46.48820114135742, + 751.4075317382812, + 70.11566162109375, + 758.0504760742188 ], - "iref": "#/texts/14", + "iref": "#/texts/145", "name": "text", - "orig-order": 14, - "page": 1, + "orig-order": 213, + "page": 14, "span": [ 0, - 639 + 6 ], - "sref": "#/page-elements/18", - "text-order": 18, + "sref": "#/page-elements/182", + "text-order": 182, "type": "paragraph" }, { "bbox": [ - 44.787384033203125, - 52.49696731567383, - 540.7015991210938, - 70.33258056640625 + 510.634765625, + 751.3934326171875, + 551.0859985351562, + 759.209228515625 ], - "iref": "#/footnotes/0", - "name": "footnote", - "orig-order": 19, - "page": 1, + "iref": "#/page-headers/17", + "name": "page-header", + "orig-order": 214, + "page": 14, "span": [ 0, - 201 + 13 ], - "sref": "#/page-elements/19", - "text-order": 19, - "type": "footnote" + "sref": "#/page-elements/183", + "text-order": 183, + "type": "page-header" }, { "bbox": [ - 44.787384033203125, - 42.44549560546875, - 272.1662902832031, - 50.207763671875 + 46.38566589355469, + 708.0682373046875, + 552.190673828125, + 731.0924072265625 ], - "iref": "#/footnotes/1", - "name": "footnote", - "orig-order": 20, - "page": 1, + "iref": "#/texts/146", + "name": "text", + "orig-order": 182, + "page": 14, "span": [ 0, - 75 + 119 ], - "sref": "#/page-elements/20", - "text-order": 20, - "type": "footnote" + "sref": "#/page-elements/184", + "text-order": 184, + "type": "paragraph" }, { "bbox": [ - 44.38350296020508, - 12.301444053649902, - 135.58876037597656, - 30.8690185546875 + 45.289154052734375, + 669.0628051757812, + 553.278076171875, + 705.6804809570312 ], - "iref": "#/page-footers/0", - "name": "page-footer", - "orig-order": 21, - "page": 1, + "iref": "#/texts/147", + "name": "text", + "orig-order": 183, + "page": 14, "span": [ 0, - 64 + 322 ], - "sref": "#/page-elements/21", - "text-order": 21, - "type": "page-footer" + "sref": "#/page-elements/185", + "text-order": 185, + "type": "paragraph" }, { "bbox": [ - 400.53094482421875, - 22.279802322387695, - 550.6204223632812, - 29.6954345703125 + 44.96582794189453, + 643.04052734375, + 553.867431640625, + 666.6377563476562 ], - "iref": "#/page-footers/1", - "name": "page-footer", - "orig-order": 22, - "page": 1, + "iref": "#/texts/148", + "name": "text", + "orig-order": 184, + "page": 14, "span": [ 0, - 42 + 172 ], - "sref": "#/page-elements/22", - "text-order": 22, - "type": "page-footer" + "sref": "#/page-elements/186", + "text-order": 186, + "type": "paragraph" }, { "bbox": [ 46.48820114135742, - 751.4075317382812, - 68.55958557128906, - 758.0504760742188 + 616.512939453125, + 242.9811553955078, + 628.0685424804688 ], - "iref": "#/texts/15", + "iref": "#/texts/149", + "name": "subtitle-level-1", + "orig-order": 185, + "page": 14, + "span": [ + 0, + 27 + ], + "sref": "#/page-elements/187", + "text-order": 187, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 46.48820114135742, + 603.7968139648438, + 209.16476440429688, + 615.1295166015625 + ], + "iref": "#/texts/150", "name": "text", - "orig-order": 40, - "page": 2, + "orig-order": 186, + "page": 14, "span": [ 0, - 5 + 41 ], - "sref": "#/page-elements/23", - "text-order": 23, + "sref": "#/page-elements/188", + "text-order": 188, "type": "paragraph" }, { "bbox": [ - 510.634765625, - 751.4635620117188, - 550.9636840820312, - 758.332763671875 + 45.64805603027344, + 577.8392333984375, + 84.40357971191406, + 589.0214233398438 ], - "iref": "#/page-headers/4", - "name": "page-header", - "orig-order": 41, - "page": 2, + "iref": "#/texts/151", + "name": "subtitle-level-1", + "orig-order": 187, + "page": 14, "span": [ 0, - 13 + 5 ], - "sref": "#/page-elements/24", - "text-order": 24, - "type": "page-header" + "sref": "#/page-elements/189", + "text-order": 189, + "type": "subtitle-level-1" }, { "bbox": [ - 45.97464370727539, - 604.0350952148438, - 554.3433227539062, - 732.5863037109375 + 45.716941833496094, + 539.067138671875, + 288.83966064453125, + 575.9967041015625 ], - "iref": "#/texts/16", + "iref": "#/texts/152", "name": "text", - "orig-order": 23, - "page": 2, + "orig-order": 188, + "page": 14, "span": [ 0, - 1082 + 160 ], - "sref": "#/page-elements/25", - "text-order": 25, + "sref": "#/page-elements/190", + "text-order": 190, "type": "paragraph" }, { "bbox": [ - 46.485626220703125, - 513.0453491210938, - 553.2366943359375, - 601.0419921875 + 45.982421875, + 512.6180419921875, + 110.57768249511719, + 524.0657958984375 ], - "iref": "#/texts/17", - "name": "text", - "orig-order": 24, - "page": 2, + "iref": "#/texts/153", + "name": "subtitle-level-1", + "orig-order": 189, + "page": 14, "span": [ 0, - 836 + 8 ], - "sref": "#/page-elements/26", - "text-order": 26, - "type": "paragraph" + "sref": "#/page-elements/191", + "text-order": 191, + "type": "subtitle-level-1" }, { "bbox": [ 46.48820114135742, - 500.0622253417969, - 340.59906005859375, - 509.4723205566406 + 498.1862487792969, + 411.1214904785156, + 507.86468505859375 ], - "iref": "#/texts/18", - "name": "text", - "orig-order": 25, - "page": 2, + "iref": "#/texts/154", + "name": "list-item", + "orig-order": 190, + "page": 14, "span": [ 0, - 69 + 99 ], - "sref": "#/page-elements/27", - "text-order": 27, + "sref": "#/page-elements/192", + "text-order": 192, "type": "paragraph" }, { "bbox": [ - 57.86075973510742, - 487.0791015625, - 492.157958984375, - 496.63543701171875 + 46.17177200317383, + 472.4082946777344, + 552.9000854492188, + 493.8719482421875 ], - "iref": "#/texts/19", - "name": "text", - "orig-order": 26, - "page": 2, + "iref": "#/texts/155", + "name": "list-item", + "orig-order": 191, + "page": 14, "span": [ 0, - 101 + 285 ], - "sref": "#/page-elements/28", - "text-order": 28, + "sref": "#/page-elements/193", + "text-order": 193, "type": "paragraph" }, { "bbox": [ - 46.48820114135742, - 461.0568542480469, - 262.5708312988281, - 470.5727233886719 + 46.39039993286133, + 457.71929931640625, + 129.30548095703125, + 468.0890197753906 ], - "iref": "#/texts/20", + "iref": "#/texts/156", "name": "list-item", - "orig-order": 27, - "page": 2, + "orig-order": 192, + "page": 14, "span": [ 0, - 49 + 24 ], - "sref": "#/page-elements/29", - "text-order": 29, + "sref": "#/page-elements/194", + "text-order": 194, "type": "paragraph" }, { "bbox": [ - 45.779930114746094, - 448.07373046875, - 241.75213623046875, - 457.51177978515625 + 45.71389389038086, + 443.1494140625, + 242.0704345703125, + 453.0476989746094 ], - "iref": "#/texts/21", + "iref": "#/texts/157", "name": "list-item", - "orig-order": 28, - "page": 2, + "orig-order": 193, + "page": 14, "span": [ 0, - 45 + 53 ], - "sref": "#/page-elements/30", - "text-order": 30, + "sref": "#/page-elements/195", + "text-order": 195, "type": "paragraph" }, { "bbox": [ - 46.48820114135742, - 435.03460693359375, - 174.95623779296875, - 444.5535583496094 + 46.020606994628906, + 417.41619873046875, + 554.6400756835938, + 438.90777587890625 ], - "iref": "#/texts/22", + "iref": "#/texts/158", "name": "list-item", - "orig-order": 29, - "page": 2, + "orig-order": 194, + "page": 14, "span": [ 0, - 29 + 248 ], - "sref": "#/page-elements/31", - "text-order": 31, + "sref": "#/page-elements/196", + "text-order": 196, "type": "paragraph" }, { "bbox": [ - 46.48820114135742, - 422.0514831542969, - 528.8121948242188, - 431.5508728027344 + 46.48814010620117, + 402.9024353027344, + 321.26422119140625, + 412.63861083984375 ], - "iref": "#/texts/23", + "iref": "#/texts/159", "name": "list-item", - "orig-order": 30, - "page": 2, + "orig-order": 195, + "page": 14, "span": [ 0, - 112 + 70 ], - "sref": "#/page-elements/32", - "text-order": 32, + "sref": "#/page-elements/197", + "text-order": 197, "type": "paragraph" }, { "bbox": [ - 45.387489318847656, - 409.068359375, - 446.47918701171875, - 418.8954772949219 + 46.00100326538086, + 376.937744140625, + 554.378662109375, + 398.0555114746094 ], - "iref": "#/texts/24", + "iref": "#/texts/160", "name": "list-item", - "orig-order": 31, - "page": 2, + "orig-order": 196, + "page": 14, "span": [ 0, - 94 + 211 ], - "sref": "#/page-elements/33", - "text-order": 33, + "sref": "#/page-elements/198", + "text-order": 198, "type": "paragraph" }, { "bbox": [ - 45.996150970458984, - 292.05224609375, - 553.0557861328125, - 392.69879150390625 + 46.0579719543457, + 350.9154052734375, + 553.2630004882812, + 372.03350830078125 ], - "iref": "#/texts/25", - "name": "text", - "orig-order": 32, - "page": 2, + "iref": "#/texts/161", + "name": "list-item", + "orig-order": 197, + "page": 14, "span": [ 0, - 869 + 156 ], - "sref": "#/page-elements/34", - "text-order": 34, + "sref": "#/page-elements/199", + "text-order": 199, "type": "paragraph" }, { "bbox": [ - 46.48820114135742, - 265.89093017578125, - 551.4827270507812, - 288.8219299316406 + 45.94832229614258, + 335.78765869140625, + 129.86572265625, + 346.3191833496094 ], - "iref": "#/texts/26", - "name": "text", - "orig-order": 33, - "page": 2, + "iref": "#/texts/162", + "name": "list-item", + "orig-order": 198, + "page": 14, "span": [ 0, - 140 + 25 ], - "sref": "#/page-elements/35", - "text-order": 35, + "sref": "#/page-elements/200", + "text-order": 200, "type": "paragraph" }, { "bbox": [ - 46.371070861816406, - 240.06375122070312, - 515.491943359375, - 249.5263671875 + 45.82542419433594, + 321.9457092285156, + 234.11181640625, + 331.8630065917969 ], - "iref": "#/texts/27", + "iref": "#/texts/163", "name": "list-item", - "orig-order": 34, - "page": 2, + "orig-order": 199, + "page": 14, "span": [ 0, - 111 + 54 ], - "sref": "#/page-elements/36", - "text-order": 36, + "sref": "#/page-elements/201", + "text-order": 201, "type": "paragraph" }, { "bbox": [ - 46.48820114135742, - 214.04150390625, - 551.0504760742188, - 236.58538818359375 + 46.478782653808594, + 307.19293212890625, + 269.6688537597656, + 316.9698486328125 ], - "iref": "#/texts/28", + "iref": "#/texts/164", "name": "list-item", - "orig-order": 35, - "page": 2, + "orig-order": 200, + "page": 14, "span": [ 0, - 180 + 61 ], - "sref": "#/page-elements/37", - "text-order": 37, + "sref": "#/page-elements/202", + "text-order": 202, "type": "paragraph" }, { "bbox": [ - 45.20487594604492, - 201.05838012695312, - 376.7724914550781, - 210.76416015625 + 46.01924514770508, + 292.9189147949219, + 301.0096130371094, + 302.8531799316406 ], - "iref": "#/texts/29", + "iref": "#/texts/165", "name": "list-item", - "orig-order": 36, - "page": 2, + "orig-order": 201, + "page": 14, "span": [ 0, - 82 + 75 ], - "sref": "#/page-elements/38", - "text-order": 38, + "sref": "#/page-elements/203", + "text-order": 203, "type": "paragraph" }, { "bbox": [ - 46.2375373840332, - 110.07154846191406, - 553.1372680664062, - 184.7841796875 + 46.444217681884766, + 278.1666564941406, + 187.92904663085938, + 288.1064453125 ], - "iref": "#/texts/30", - "name": "text", - "orig-order": 37, - "page": 2, + "iref": "#/texts/166", + "name": "list-item", + "orig-order": 202, + "page": 14, "span": [ 0, - 647 + 43 ], - "sref": "#/page-elements/39", - "text-order": 39, + "sref": "#/page-elements/204", + "text-order": 204, "type": "paragraph" }, { "bbox": [ - 46.487701416015625, - 84.04928588867188, - 550.5083618164062, - 107.71282958984375 + 46.00947952270508, + 263.8026123046875, + 169.3743896484375, + 274.1329345703125 ], - "iref": "#/texts/31", - "name": "text", - "orig-order": 38, - "page": 2, + "iref": "#/texts/167", + "name": "list-item", + "orig-order": 203, + "page": 14, "span": [ 0, - 202 + 36 ], - "sref": "#/page-elements/40", - "text-order": 40, + "sref": "#/page-elements/205", + "text-order": 205, "type": "paragraph" }, { "bbox": [ - 45.976261138916016, - 45.04500961303711, - 551.8382568359375, - 81.24627685546875 + 46.049869537353516, + 231.931396484375, + 123.2709732055664, + 244.548095703125 ], - "iref": "#/texts/32", - "name": "text", - "orig-order": 39, - "page": 2, + "iref": "#/texts/168", + "name": "subtitle-level-1", + "orig-order": 204, + "page": 14, "span": [ 0, - 346 + 10 ], - "sref": "#/page-elements/41", - "text-order": 41, - "type": "paragraph" + "sref": "#/page-elements/206", + "text-order": 206, + "type": "subtitle-level-1" }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 50.6671142578125, + 207.4257049560547, + 552.3800659179688, + 228.917724609375 ], - "iref": "#/texts/33", - "name": "text", - "orig-order": 42, - "page": 2, + "iref": "#/texts/169", + "name": "list-item", + "orig-order": 205, + "page": 14, "span": [ 0, - 320 + 179 ], - "sref": "#/page-elements/42", - "text-order": 42, + "sref": "#/page-elements/207", + "text-order": 207, "type": "paragraph" }, { "bbox": [ - 44.50688552856445, - 751.4635620117188, - 85.01602935791016, - 758.0504760742188 + 50.74010467529297, + 184.40769958496094, + 552.61669921875, + 205.76568603515625 ], - "iref": "#/page-headers/5", - "name": "page-header", - "orig-order": 50, - "page": 3, + "iref": "#/texts/170", + "name": "list-item", + "orig-order": 206, + "page": 14, "span": [ 0, - 13 + 163 ], - "sref": "#/page-elements/43", - "text-order": 43, - "type": "page-header" + "sref": "#/page-elements/208", + "text-order": 208, + "type": "paragraph" }, { "bbox": [ - 528.5497436523438, - 751.4075317382812, - 550.62109375, - 758.0504760742188 + 50.74015808105469, + 161.3896942138672, + 552.6810302734375, + 182.65234375 ], - "iref": "#/texts/34", - "name": "text", - "orig-order": 51, - "page": 3, + "iref": "#/texts/171", + "name": "list-item", + "orig-order": 207, + "page": 14, "span": [ 0, - 5 + 168 ], - "sref": "#/page-elements/44", - "text-order": 44, + "sref": "#/page-elements/209", + "text-order": 209, "type": "paragraph" }, { "bbox": [ - 44.78739929199219, - 695.0468139648438, - 549.4096069335938, - 730.4614868164062 + 50.16819763183594, + 126.91963195800781, + 552.5728759765625, + 159.62261962890625 ], - "iref": "#/texts/35", - "name": "text", - "orig-order": 43, - "page": 3, + "iref": "#/texts/172", + "name": "list-item", + "orig-order": 208, + "page": 14, "span": [ 0, - 262 + 292 ], - "sref": "#/page-elements/45", - "text-order": 45, + "sref": "#/page-elements/210", + "text-order": 210, "type": "paragraph" }, { "bbox": [ - 44.78739929199219, - 655.5153198242188, - 378.15191650390625, - 666.9031982421875 + 50.49177551269531, + 103.90162658691406, + 553.5820922851562, + 124.90191650390625 ], - "iref": "#/texts/36", - "name": "subtitle-level-1", - "orig-order": 44, - "page": 3, + "iref": "#/texts/173", + "name": "list-item", + "orig-order": 209, + "page": 14, "span": [ 0, - 37 + 171 ], - "sref": "#/page-elements/46", - "text-order": 46, - "type": "subtitle-level-1" + "sref": "#/page-elements/211", + "text-order": 211, + "type": "paragraph" }, { "bbox": [ - 44.785400390625, - 552.0484008789062, - 549.7849731445312, - 639.5802001953125 + 50.74018859863281, + 92.39262390136719, + 436.9924011230469, + 101.68670654296875 ], - "iref": "#/texts/37", - "name": "text", - "orig-order": 45, - "page": 3, + "iref": "#/texts/174", + "name": "list-item", + "orig-order": 210, + "page": 14, "span": [ 0, - 796 + 102 ], - "sref": "#/page-elements/47", - "text-order": 47, + "sref": "#/page-elements/212", + "text-order": 212, "type": "paragraph" }, { "bbox": [ - 44.785430908203125, - 409.068603515625, - 554.4052124023438, - 548.475341796875 + 50.74017333984375, + 69.43157196044922, + 552.4933471679688, + 90.58172607421875 ], - "iref": "#/texts/38", - "name": "text", - "orig-order": 46, - "page": 3, + "iref": "#/texts/175", + "name": "list-item", + "orig-order": 211, + "page": 14, "span": [ 0, - 1141 + 156 ], - "sref": "#/page-elements/48", - "text-order": 48, + "sref": "#/page-elements/213", + "text-order": 213, "type": "paragraph" }, { "bbox": [ - 44.78739929199219, - 369.4996032714844, - 134.88641357421875, - 380.88751220703125 + 50.37576675415039, + 46.413570404052734, + 553.1749267578125, + 67.59844970703125 ], - "iref": "#/texts/39", - "name": "subtitle-level-1", - "orig-order": 47, - "page": 3, + "iref": "#/texts/176", + "name": "list-item", + "orig-order": 212, + "page": 14, "span": [ 0, - 14 + 184 ], - "sref": "#/page-elements/49", - "text-order": 49, - "type": "subtitle-level-1" + "sref": "#/page-elements/214", + "text-order": 214, + "type": "paragraph" }, { "bbox": [ - 44.524391174316406, - 317.6519470214844, - 552.3914184570312, - 353.5248107910156 + 578.368896484375, + 15.450490951538086, + 583.4779663085938, + 766.7100219726562 ], - "iref": "#/texts/40", + "iref": "#/texts/177", "name": "text", - "orig-order": 48, - "page": 3, + "orig-order": 215, + "page": 14, "span": [ 0, - 232 + 320 ], - "sref": "#/page-elements/50", - "text-order": 50, + "sref": "#/page-elements/215", + "text-order": 215, "type": "paragraph" }, { "bbox": [ - 78.5494384765625, - 102.71893310546875, - 512.3916625976562, - 284.9899597167969 + 44.473201751708984, + 751.4635620117188, + 84.89160919189453, + 758.80615234375 ], - "iref": "#/figures/0", - "name": "picture", - "orig-order": 53, - "page": 3, + "iref": "#/page-headers/18", + "name": "page-header", + "orig-order": 228, + "page": 15, "span": [ 0, - 0 + 13 ], - "sref": "#/page-elements/51", - "text-order": 51, - "type": "figure" + "sref": "#/page-elements/216", + "text-order": 216, + "type": "page-header" }, { "bbox": [ - 44.78328323364258, - 45.39774703979492, - 545.7940673828125, - 89.4708251953125 + 454.5641784667969, + 745.4571533203125, + 549.099365234375, + 761.863037109375 ], - "iref": "#/figures/0/captions/0", - "name": "caption", - "orig-order": 49, - "page": 3, + "iref": "#/figures/8", + "name": "picture", + "orig-order": 229, + "page": 15, "span": [ 0, - 498 + 0 ], - "sref": "#/page-elements/52", - "text-order": 52, - "type": "caption" + "sref": "#/page-elements/217", + "text-order": 217, + "type": "figure" }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 46.63217544555664, + 722.4282836914062, + 362.7469787597656, + 731.7239990234375 ], - "iref": "#/texts/41", - "name": "text", - "orig-order": 52, - "page": 3, + "iref": "#/texts/178", + "name": "list-item", + "orig-order": 216, + "page": 15, "span": [ 0, - 320 + 85 ], - "sref": "#/page-elements/53", - "text-order": 53, + "sref": "#/page-elements/218", + "text-order": 218, "type": "paragraph" }, { "bbox": [ - 46.48820114135742, - 751.4075317382812, - 68.55958557128906, - 758.0504760742188 + 44.78684997558594, + 699.5198364257812, + 549.7481689453125, + 720.4119262695312 ], - "iref": "#/texts/42", - "name": "text", - "orig-order": 63, - "page": 4, + "iref": "#/texts/179", + "name": "list-item", + "orig-order": 217, + "page": 15, "span": [ 0, - 5 + 168 ], - "sref": "#/page-elements/54", - "text-order": 54, + "sref": "#/page-elements/219", + "text-order": 219, "type": "paragraph" }, { "bbox": [ - 510.634765625, - 751.4635620117188, - 550.9420166015625, - 758.4869384765625 + 44.7877197265625, + 688.0108642578125, + 238.66644287109375, + 697.144287109375 ], - "iref": "#/page-headers/6", - "name": "page-header", - "orig-order": 64, - "page": 4, + "iref": "#/texts/180", + "name": "list-item", + "orig-order": 218, + "page": 15, "span": [ 0, - 13 + 50 ], - "sref": "#/page-elements/55", - "text-order": 55, - "type": "page-header" + "sref": "#/page-elements/220", + "text-order": 220, + "type": "paragraph" }, { "bbox": [ - 45.14111328125, - 720.4854736328125, - 157.7607421875, - 732.3443603515625 + 44.54977798461914, + 676.5018920898438, + 243.0414581298828, + 685.6976318359375 ], - "iref": "#/texts/43", - "name": "subtitle-level-1", - "orig-order": 54, - "page": 4, + "iref": "#/texts/181", + "name": "list-item", + "orig-order": 219, + "page": 15, "span": [ 0, - 18 + 52 ], - "sref": "#/page-elements/56", - "text-order": 56, - "type": "subtitle-level-1" + "sref": "#/page-elements/221", + "text-order": 221, + "type": "paragraph" }, { "bbox": [ - 46.48820114135742, - 656.0805053710938, - 553.5469360351562, - 704.7728881835938 + 44.7877197265625, + 653.5408935546875, + 548.7638549804688, + 674.378662109375 ], - "iref": "#/texts/44", - "name": "text", - "orig-order": 55, - "page": 4, + "iref": "#/texts/182", + "name": "list-item", + "orig-order": 220, + "page": 15, "span": [ 0, - 403 + 145 ], - "sref": "#/page-elements/57", - "text-order": 57, + "sref": "#/page-elements/222", + "text-order": 222, "type": "paragraph" }, { "bbox": [ - 45.56229019165039, - 604.0359497070312, - 553.0910034179688, - 652.8948974609375 + 44.7877197265625, + 630.52294921875, + 548.82861328125, + 651.5768432617188 ], - "iref": "#/texts/45", - "name": "text", - "orig-order": 56, - "page": 4, + "iref": "#/texts/183", + "name": "list-item", + "orig-order": 221, + "page": 15, "span": [ 0, - 417 + 252 ], - "sref": "#/page-elements/58", - "text-order": 58, + "sref": "#/page-elements/223", + "text-order": 223, "type": "paragraph" }, { "bbox": [ - 45.6591796875, - 565.0864868164062, - 552.8568115234375, - 600.9397583007812 + 44.787750244140625, + 607.5050048828125, + 550.8438720703125, + 628.0836181640625 ], - "iref": "#/texts/46", - "name": "text", - "orig-order": 57, - "page": 4, + "iref": "#/texts/184", + "name": "list-item", + "orig-order": 222, + "page": 15, "span": [ 0, - 282 + 147 ], - "sref": "#/page-elements/59", - "text-order": 59, + "sref": "#/page-elements/224", + "text-order": 224, "type": "paragraph" }, { "bbox": [ - 45.497798919677734, - 525.5185546875, - 161.91403198242188, - 536.9064331054688 + 44.787750244140625, + 595.9960327148438, + 474.9829406738281, + 604.6593627929688 ], - "iref": "#/texts/47", - "name": "subtitle-level-1", - "orig-order": 58, - "page": 4, + "iref": "#/texts/185", + "name": "list-item", + "orig-order": 223, + "page": 15, "span": [ 0, - 18 - ], - "sref": "#/page-elements/60", - "text-order": 60, - "type": "subtitle-level-1" + 114 + ], + "sref": "#/page-elements/225", + "text-order": 225, + "type": "paragraph" }, { "bbox": [ - 46.28074645996094, - 435.03485107421875, - 552.7772827148438, - 509.80706787109375 + 44.786895751953125, + 573.0350341796875, + 548.8020629882812, + 592.54248046875 ], - "iref": "#/texts/48", - "name": "text", - "orig-order": 59, - "page": 4, + "iref": "#/texts/186", + "name": "list-item", + "orig-order": 224, + "page": 15, "span": [ 0, - 647 + 197 ], - "sref": "#/page-elements/61", - "text-order": 61, + "sref": "#/page-elements/226", + "text-order": 226, "type": "paragraph" }, { "bbox": [ - 45.999271392822266, - 370.0654296875, - 551.750244140625, - 431.6009521484375 + 44.786865234375, + 550.01708984375, + 548.7230834960938, + 569.8275146484375 ], - "iref": "#/texts/49", - "name": "text", - "orig-order": 60, - "page": 4, + "iref": "#/texts/187", + "name": "list-item", + "orig-order": 225, + "page": 15, "span": [ 0, - 542 + 142 ], - "sref": "#/page-elements/62", - "text-order": 62, + "sref": "#/page-elements/227", + "text-order": 227, "type": "paragraph" }, { "bbox": [ - 46.37678527832031, - 304.9195251464844, - 551.427001953125, - 366.6332092285156 + 44.78601837158203, + 526.9991455078125, + 550.565185546875, + 546.7464599609375 ], - "iref": "#/texts/50", - "name": "text", - "orig-order": 61, - "page": 4, + "iref": "#/texts/188", + "name": "list-item", + "orig-order": 226, + "page": 15, "span": [ 0, - 580 + 176 ], - "sref": "#/page-elements/63", - "text-order": 63, + "sref": "#/page-elements/228", + "text-order": 228, "type": "paragraph" }, { "bbox": [ - 46.48663330078125, - 45.39759826660156, - 540.3204956054688, - 67.21272277832031 + 57.16337966918945, + 468.5407409667969, + 529.73583984375, + 491.138916015625 ], - "iref": "#/texts/51", + "iref": "#/texts/189", "name": "text", - "orig-order": 62, - "page": 4, + "orig-order": 227, + "page": 15, "span": [ 0, - 220 + 216 ], - "sref": "#/page-elements/64", - "text-order": 64, + "sref": "#/page-elements/229", + "text-order": 229, "type": "paragraph" }, { @@ -14870,3445 +75196,3394 @@ 583.4779663085938, 766.7100219726562 ], - "iref": "#/texts/52", + "iref": "#/texts/190", "name": "text", - "orig-order": 65, - "page": 4, + "orig-order": 230, + "page": 15, "span": [ 0, 320 ], - "sref": "#/page-elements/65", - "text-order": 65, + "sref": "#/page-elements/230", + "text-order": 230, "type": "paragraph" + } + ], + "page-footers": [ + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-footers/0", + "hash": 12400883656433726216, + "orig": "Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", + "prov": [ + { + "$ref": "#/page-elements/21" + } + ], + "sref": "#/page-footers/0", + "text": "Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", + "text-hash": 8372141692634509619, + "type": "page-footer" }, { - "bbox": [ - 44.041500091552734, - 751.3096313476562, - 85.72028350830078, - 759.7291870117188 + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-footers/1", + "hash": 10244115652970867690, + "orig": "wileyonlinelibrary.com/journal/ail2 1of15", + "prov": [ + { + "$ref": "#/page-elements/22" + } ], - "iref": "#/page-headers/7", - "name": "page-header", - "orig-order": 72, - "page": 5, - "span": [ - 0, - 13 + "sref": "#/page-footers/1", + "text": "wileyonlinelibrary.com/journal/ail2 1of15", + "text-hash": 6196517219334265105, + "type": "page-footer" + } + ], + "page-headers": [ + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/0", + "hash": 1841431076736563689, + "orig": "Received: 15 September 2020", + "prov": [ + { + "$ref": "#/page-elements/0" + } ], - "sref": "#/page-elements/66", - "text-order": 66, + "sref": "#/page-headers/0", + "text": "Received: 15 September 2020", + "text-hash": 16688788223092401940, "type": "page-header" }, { - "bbox": [ - 454.1357421875, - 745.7154541015625, - 550.62109375, - 761.0070190429688 + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/1", + "hash": 3915126318503464014, + "orig": "Revised: 23 November 2020", + "prov": [ + { + "$ref": "#/page-elements/1" + } ], - "iref": "#/figures/1", - "name": "picture", - "orig-order": 73, - "page": 5, - "span": [ - 0, - 0 + "sref": "#/page-headers/1", + "text": "Revised: 23 November 2020", + "text-hash": 1000711515083668085, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/2", + "hash": 1727876228376027809, + "orig": "Accepted: 25 November 2020", + "prov": [ + { + "$ref": "#/page-elements/2" + } ], - "sref": "#/page-elements/67", - "text-order": 67, - "type": "figure" + "sref": "#/page-headers/2", + "text": "Accepted: 25 November 2020", + "text-hash": 17099649843681009628, + "type": "page-header" }, { - "bbox": [ - 44.78594970703125, - 483.39947509765625, - 548.2582397460938, - 529.3165283203125 + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/3", + "hash": 4558221577189246496, + "orig": "DOI: 10.1002/ail2.20", + "prov": [ + { + "$ref": "#/page-elements/3" + } ], - "iref": "#/texts/53", - "name": "text", - "orig-order": 71, - "page": 5, - "span": [ - 0, - 421 + "sref": "#/page-headers/3", + "text": "DOI: 10.1002/ail2.20", + "text-hash": 348625343742526555, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/4", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/24" + } ], - "sref": "#/page-elements/68", - "text-order": 68, - "type": "paragraph" + "sref": "#/page-headers/4", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" }, { - "bbox": [ - 44.78684997558594, - 370.0640563964844, - 549.865478515625, - 444.5719299316406 + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/5", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/43" + } + ], + "sref": "#/page-headers/5", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/6", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/55" + } + ], + "sref": "#/page-headers/6", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/7", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/66" + } + ], + "sref": "#/page-headers/7", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/8", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/76" + } + ], + "sref": "#/page-headers/8", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/9", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/92" + } + ], + "sref": "#/page-headers/9", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/10", + "hash": 4361549266732238272, + "orig": "8of15", + "prov": [ + { + "$ref": "#/page-elements/106" + } + ], + "sref": "#/page-headers/10", + "text": "8of15", + "text-hash": 329104147727696635, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/11", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/107" + } + ], + "sref": "#/page-headers/11", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/12", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/117" + } + ], + "sref": "#/page-headers/12", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/13", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/131" + } + ], + "sref": "#/page-headers/13", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/14", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/145" + } + ], + "sref": "#/page-headers/14", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/15", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/156" + } + ], + "sref": "#/page-headers/15", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/16", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/170" + } + ], + "sref": "#/page-headers/16", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/17", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/183" + } + ], + "sref": "#/page-headers/17", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/18", + "hash": 8492015887072434396, + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/216" + } + ], + "sref": "#/page-headers/18", + "text": "STAAR ET AL.", + "text-hash": 14658966106383255015, + "type": "page-header" + } + ], + "properties": { + "data": [ + [ + "language", + 13357303559203493643, + "DOCUMENT", + "#", + "en", + 1.0 + ], + [ + "semantic", + 2144509362215609527, + "TEXT", + "#/texts/0", + "meta-data", + 1.0 + ], + [ + "language", + 2144509362215609527, + "TEXT", + "#/texts/0", + "en", + 0.4099999964237213 + ], + [ + "semantic", + 16672720454366774824, + "TEXT", + "#/texts/1", + "header", + 0.8999999761581421 + ], + [ + "language", + 16672720454366774824, + "TEXT", + "#/texts/1", + "en", + 0.75 + ], + [ + "semantic", + 16781763356419781679, + "TEXT", + "#/texts/2", + "meta-data", + 0.6100000143051147 + ], + [ + "language", + 16781763356419781679, + "TEXT", + "#/texts/2", + "nl", + 0.44999998807907104 + ], + [ + "semantic", + 3352447812305581329, + "TEXT", + "#/texts/3", + "text", + 1.0 + ], + [ + "language", + 3352447812305581329, + "TEXT", + "#/texts/3", + "ceb", + 0.49000000953674316 + ], + [ + "semantic", + 14877831450145300436, + "TEXT", + "#/texts/4", + "meta-data", + 1.0 + ], + [ + "language", + 14877831450145300436, + "TEXT", + "#/texts/4", + "it", + 0.36000001430511475 + ], + [ + "semantic", + 3352447812305581329, + "TEXT", + "#/texts/5", + "text", + 1.0 + ], + [ + "language", + 3352447812305581329, + "TEXT", + "#/texts/5", + "ceb", + 0.49000000953674316 + ], + [ + "semantic", + 13336841394978214677, + "TEXT", + "#/texts/6", + "meta-data", + 0.5899999737739563 + ], + [ + "language", + 13336841394978214677, + "TEXT", + "#/texts/6", + "de", + 0.5699999928474426 + ], + [ + "semantic", + 15325526562897377208, + "TEXT", + "#/texts/7", + "meta-data", + 1.0 + ], + [ + "language", + 15325526562897377208, + "TEXT", + "#/texts/7", + "en", + 0.8100000023841858 + ], + [ + "semantic", + 4017434568255781081, + "TEXT", + "#/texts/8", + "meta-data", + 0.9300000071525574 + ], + [ + "language", + 4017434568255781081, + "TEXT", + "#/texts/8", + "en", + 0.3400000035762787 + ], + [ + "semantic", + 8487024695951375934, + "TEXT", + "#/texts/9", + "meta-data", + 1.0 + ], + [ + "language", + 8487024695951375934, + "TEXT", + "#/texts/9", + "en", + 0.3199999928474426 + ], + [ + "semantic", + 11695737263227886476, + "TEXT", + "#/texts/10", + "text", + 0.9599999785423279 + ], + [ + "language", + 11695737263227886476, + "TEXT", + "#/texts/10", + "en", + 0.9300000071525574 + ], + [ + "semantic", + 8500733160758672230, + "TEXT", + "#/texts/11", + "text", + 1.0 + ], + [ + "language", + 8500733160758672230, + "TEXT", + "#/texts/11", + "es", + 0.3700000047683716 + ], + [ + "semantic", + 4452030907228745864, + "TEXT", + "#/texts/12", + "text", + 0.8700000047683716 ], - "iref": "#/texts/54", - "name": "text", - "orig-order": 66, - "page": 5, - "span": [ - 0, - 687 + [ + "language", + 4452030907228745864, + "TEXT", + "#/texts/12", + "en", + 0.6200000047683716 ], - "sref": "#/page-elements/69", - "text-order": 69, - "type": "paragraph" - }, - { - "bbox": [ - 44.206939697265625, - 330.4949035644531, - 223.93128967285156, - 341.8828125 + [ + "semantic", + 11913688961435238004, + "TEXT", + "#/texts/13", + "meta-data", + 1.0 ], - "iref": "#/texts/55", - "name": "subtitle-level-1", - "orig-order": 67, - "page": 5, - "span": [ - 0, - 31 + [ + "language", + 11913688961435238004, + "TEXT", + "#/texts/13", + "en", + 0.6399999856948853 ], - "sref": "#/page-elements/70", - "text-order": 70, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.78684616088867, - 149.07435607910156, - 549.819091796875, - 314.53570556640625 + [ + "semantic", + 9977041563469582014, + "TEXT", + "#/texts/14", + "text", + 0.9800000190734863 ], - "iref": "#/texts/56", - "name": "text", - "orig-order": 68, - "page": 5, - "span": [ - 0, - 1517 + [ + "language", + 9977041563469582014, + "TEXT", + "#/texts/14", + "en", + 0.9599999785423279 ], - "sref": "#/page-elements/71", - "text-order": 71, - "type": "paragraph" - }, - { - "bbox": [ - 43.94790267944336, - 109.50601959228516, - 254.47779846191406, - 120.89392852783203 + [ + "semantic", + 4361549266817300114, + "TEXT", + "#/texts/15", + "text", + 0.9700000286102295 ], - "iref": "#/texts/57", - "name": "subtitle-level-1", - "orig-order": 69, - "page": 5, - "span": [ - 0, - 36 + [ + "language", + 4361549266817300114, + "TEXT", + "#/texts/15", + "en", + 0.1899999976158142 ], - "sref": "#/page-elements/72", - "text-order": 72, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.78739929199219, - 45.00958251953125, - 549.1444091796875, - 93.61456298828125 + [ + "semantic", + 8425126282903547933, + "TEXT", + "#/texts/16", + "text", + 0.9399999976158142 ], - "iref": "#/texts/58", - "name": "text", - "orig-order": 70, - "page": 5, - "span": [ - 0, - 384 + [ + "language", + 8425126282903547933, + "TEXT", + "#/texts/16", + "en", + 0.9300000071525574 ], - "sref": "#/page-elements/73", - "text-order": 73, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "semantic", + 16507313240019459642, + "TEXT", + "#/texts/17", + "text", + 0.9700000286102295 ], - "iref": "#/texts/59", - "name": "text", - "orig-order": 74, - "page": 5, - "span": [ - 0, - 320 + [ + "language", + 16507313240019459642, + "TEXT", + "#/texts/17", + "en", + 0.9100000262260437 ], - "sref": "#/page-elements/74", - "text-order": 74, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 751.4075317382812, - 68.55958557128906, - 758.0504760742188 + [ + "semantic", + 7900229969942228522, + "TEXT", + "#/texts/18", + "text", + 0.9900000095367432 ], - "iref": "#/texts/60", - "name": "text", - "orig-order": 89, - "page": 6, - "span": [ - 0, - 5 + [ + "language", + 7900229969942228522, + "TEXT", + "#/texts/18", + "en", + 0.9900000095367432 ], - "sref": "#/page-elements/75", - "text-order": 75, - "type": "paragraph" - }, - { - "bbox": [ - 510.634765625, - 751.4635620117188, - 550.9879150390625, - 758.9756469726562 + [ + "semantic", + 10081303962589804251, + "TEXT", + "#/texts/19", + "text", + 1.0 ], - "iref": "#/page-headers/8", - "name": "page-header", - "orig-order": 90, - "page": 6, - "span": [ - 0, - 13 + [ + "language", + 10081303962589804251, + "TEXT", + "#/texts/19", + "en", + 0.9200000166893005 ], - "sref": "#/page-elements/76", - "text-order": 76, - "type": "page-header" - }, - { - "bbox": [ - 45.78483581542969, - 669.0628051757812, - 554.4027709960938, - 730.823486328125 + [ + "semantic", + 12186698460099365002, + "TEXT", + "#/texts/20", + "header", + 0.49000000953674316 ], - "iref": "#/texts/61", - "name": "text", - "orig-order": 75, - "page": 6, - "span": [ - 0, - 564 + [ + "language", + 12186698460099365002, + "TEXT", + "#/texts/20", + "en", + 0.5099999904632568 ], - "sref": "#/page-elements/77", - "text-order": 77, - "type": "paragraph" - }, - { - "bbox": [ - 45.753639221191406, - 629.4933471679688, - 148.00445556640625, - 641.5734252929688 + [ + "semantic", + 14190244699299580163, + "TEXT", + "#/texts/21", + "text", + 0.9599999785423279 ], - "iref": "#/texts/62", - "name": "subtitle-level-1", - "orig-order": 76, - "page": 6, - "span": [ - 0, - 16 + [ + "language", + 14190244699299580163, + "TEXT", + "#/texts/21", + "en", + 0.6299999952316284 ], - "sref": "#/page-elements/78", - "text-order": 78, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 46.48820114135742, - 591.0541381835938, - 552.9049682617188, - 613.8143310546875 + [ + "semantic", + 1376279050886549305, + "TEXT", + "#/texts/22", + "header", + 0.800000011920929 ], - "iref": "#/texts/63", - "name": "text", - "orig-order": 77, - "page": 6, - "span": [ - 0, - 225 + [ + "language", + 1376279050886549305, + "TEXT", + "#/texts/22", + "en", + 0.4699999988079071 ], - "sref": "#/page-elements/79", - "text-order": 79, - "type": "paragraph" - }, - { - "bbox": [ - 46.445133209228516, - 552.0497436523438, - 553.362548828125, - 575.2869873046875 + [ + "semantic", + 10155628801693924200, + "TEXT", + "#/texts/23", + "text", + 0.8999999761581421 ], - "iref": "#/texts/64", - "name": "list-item", - "orig-order": 78, - "page": 6, - "span": [ - 0, - 179 + [ + "language", + 10155628801693924200, + "TEXT", + "#/texts/23", + "en", + 0.9300000071525574 ], - "sref": "#/page-elements/80", - "text-order": 80, - "type": "paragraph" - }, - { - "bbox": [ - 45.744380950927734, - 526.0834350585938, - 553.5414428710938, - 548.8994140625 + [ + "semantic", + 9107499507097280105, + "TEXT", + "#/texts/24", + "text", + 0.6100000143051147 ], - "iref": "#/texts/65", - "name": "list-item", - "orig-order": 79, - "page": 6, - "span": [ - 0, - 133 + [ + "language", + 9107499507097280105, + "TEXT", + "#/texts/24", + "en", + 0.9300000071525574 ], - "sref": "#/page-elements/81", - "text-order": 81, - "type": "paragraph" - }, - { - "bbox": [ - 44.8809700012207, - 513.0443115234375, - 481.36083984375, - 523.5081787109375 + [ + "semantic", + 7248467870339433322, + "TEXT", + "#/texts/25", + "text", + 1.0 ], - "iref": "#/texts/66", - "name": "list-item", - "orig-order": 80, - "page": 6, - "span": [ - 0, - 101 + [ + "language", + 7248467870339433322, + "TEXT", + "#/texts/25", + "en", + 0.9399999976158142 ], - "sref": "#/page-elements/82", - "text-order": 82, - "type": "paragraph" - }, - { - "bbox": [ - 46.38796615600586, - 435.0345458984375, - 553.393310546875, - 497.0226135253906 + [ + "semantic", + 13346892078888080449, + "TEXT", + "#/texts/26", + "text", + 0.9700000286102295 ], - "iref": "#/texts/67", - "name": "text", - "orig-order": 81, - "page": 6, - "span": [ - 0, - 525 + [ + "language", + 13346892078888080449, + "TEXT", + "#/texts/26", + "en", + 0.8899999856948853 ], - "sref": "#/page-elements/83", - "text-order": 83, - "type": "paragraph" - }, - { - "bbox": [ - 45.54835891723633, - 344.0406799316406, - 555.0050048828125, - 432.1236877441406 + [ + "semantic", + 1118972765223422660, + "TEXT", + "#/texts/27", + "text", + 0.8299999833106995 ], - "iref": "#/texts/68", - "name": "text", - "orig-order": 82, - "page": 6, - "span": [ - 0, - 693 + [ + "language", + 1118972765223422660, + "TEXT", + "#/texts/27", + "en", + 0.9100000262260437 ], - "sref": "#/page-elements/84", - "text-order": 84, - "type": "paragraph" - }, - { - "bbox": [ - 46.25617980957031, - 304.472900390625, - 469.55108642578125, - 315.8608093261719 + [ + "semantic", + 324023167304456371, + "TEXT", + "#/texts/28", + "text", + 0.9800000190734863 ], - "iref": "#/texts/69", - "name": "subtitle-level-1", - "orig-order": 83, - "page": 6, - "span": [ - 0, - 48 + [ + "language", + 324023167304456371, + "TEXT", + "#/texts/28", + "en", + 0.9300000071525574 ], - "sref": "#/page-elements/85", - "text-order": 85, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 46.48820114135742, - 265.92974853515625, - 552.6448364257812, - 288.6134338378906 + [ + "semantic", + 4651508276868765576, + "TEXT", + "#/texts/29", + "text", + 0.9700000286102295 ], - "iref": "#/texts/70", - "name": "text", - "orig-order": 84, - "page": 6, - "span": [ - 0, - 166 + [ + "language", + 4651508276868765576, + "TEXT", + "#/texts/29", + "en", + 0.7300000190734863 ], - "sref": "#/page-elements/86", - "text-order": 86, - "type": "paragraph" - }, - { - "bbox": [ - 46.377140045166016, - 240.049560546875, - 429.5157165527344, - 249.76214599609375 + [ + "semantic", + 3052020526349962744, + "TEXT", + "#/texts/30", + "text", + 0.949999988079071 ], - "iref": "#/texts/71", - "name": "list-item", - "orig-order": 85, - "page": 6, - "span": [ - 0, - 92 + [ + "language", + 3052020526349962744, + "TEXT", + "#/texts/30", + "en", + 0.9300000071525574 ], - "sref": "#/page-elements/87", - "text-order": 87, - "type": "paragraph" - }, - { - "bbox": [ - 45.62164306640625, - 227.0850830078125, - 346.3638916015625, - 237.665283203125 + [ + "semantic", + 6725501529910185390, + "TEXT", + "#/texts/31", + "text", + 0.9800000190734863 ], - "iref": "#/texts/72", - "name": "list-item", - "orig-order": 86, - "page": 6, - "span": [ - 0, - 73 + [ + "language", + 6725501529910185390, + "TEXT", + "#/texts/31", + "en", + 0.9800000190734863 ], - "sref": "#/page-elements/88", - "text-order": 88, - "type": "paragraph" - }, - { - "bbox": [ - 45.322208404541016, - 162.0574493408203, - 553.8873901367188, - 210.65191650390625 + [ + "semantic", + 14814111183601762276, + "TEXT", + "#/texts/32", + "text", + 0.9700000286102295 + ], + [ + "language", + 14814111183601762276, + "TEXT", + "#/texts/32", + "en", + 0.9100000262260437 + ], + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/33", + "text", + 0.8999999761581421 ], - "iref": "#/texts/73", - "name": "text", - "orig-order": 87, - "page": 6, - "span": [ - 0, - 472 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/33", + "en", + 0.7799999713897705 ], - "sref": "#/page-elements/89", - "text-order": 89, - "type": "paragraph" - }, - { - "bbox": [ - 45.762847900390625, - 71.06684875488281, - 554.2275390625, - 158.80230712890625 + [ + "semantic", + 4361549266681704196, + "TEXT", + "#/texts/34", + "text", + 0.9700000286102295 ], - "iref": "#/texts/74", - "name": "text", - "orig-order": 88, - "page": 6, - "span": [ - 0, - 761 + [ + "language", + 4361549266681704196, + "TEXT", + "#/texts/34", + "en", + 0.4000000059604645 ], - "sref": "#/page-elements/90", - "text-order": 90, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "semantic", + 8043608144162608258, + "TEXT", + "#/texts/35", + "text", + 0.9900000095367432 ], - "iref": "#/texts/75", - "name": "text", - "orig-order": 91, - "page": 6, - "span": [ - 0, - 320 + [ + "language", + 8043608144162608258, + "TEXT", + "#/texts/35", + "en", + 0.9399999976158142 ], - "sref": "#/page-elements/91", - "text-order": 91, - "type": "paragraph" - }, - { - "bbox": [ - 44.35243225097656, - 751.4635620117188, - 85.42164611816406, - 758.9300537109375 + [ + "semantic", + 7159467829896778939, + "TEXT", + "#/texts/36", + "header", + 0.75 ], - "iref": "#/page-headers/9", - "name": "page-header", - "orig-order": 103, - "page": 7, - "span": [ - 0, - 13 + [ + "language", + 7159467829896778939, + "TEXT", + "#/texts/36", + "en", + 0.4399999976158142 ], - "sref": "#/page-elements/92", - "text-order": 92, - "type": "page-header" - }, - { - "bbox": [ - 528.5497436523438, - 751.4075317382812, - 550.62109375, - 758.0504760742188 + [ + "semantic", + 5617240156952377, + "TEXT", + "#/texts/37", + "text", + 0.9800000190734863 ], - "iref": "#/texts/76", - "name": "text", - "orig-order": 104, - "page": 7, - "span": [ - 0, - 5 + [ + "language", + 5617240156952377, + "TEXT", + "#/texts/37", + "en", + 0.9399999976158142 ], - "sref": "#/page-elements/93", - "text-order": 93, - "type": "paragraph" - }, - { - "bbox": [ - 44.78684997558594, - 695.0850830078125, - 549.5508422851562, - 730.6725463867188 + [ + "semantic", + 3276490574487379366, + "TEXT", + "#/texts/38", + "text", + 0.9800000190734863 ], - "iref": "#/texts/77", - "name": "text", - "orig-order": 92, - "page": 7, - "span": [ - 0, - 324 + [ + "language", + 3276490574487379366, + "TEXT", + "#/texts/38", + "en", + 0.8399999737739563 ], - "sref": "#/page-elements/94", - "text-order": 94, - "type": "paragraph" - }, - { - "bbox": [ - 44.71910095214844, - 655.5153198242188, - 236.7943572998047, - 666.9031982421875 + [ + "semantic", + 3367451956962330174, + "TEXT", + "#/texts/39", + "meta-data", + 1.0 ], - "iref": "#/texts/78", - "name": "subtitle-level-1", - "orig-order": 93, - "page": 7, - "span": [ - 0, - 32 + [ + "language", + 3367451956962330174, + "TEXT", + "#/texts/39", + "en", + 0.9399999976158142 ], - "sref": "#/page-elements/95", - "text-order": 95, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.78636169433594, - 578.0709838867188, - 549.254638671875, - 640.1705932617188 + [ + "semantic", + 5509744459704235873, + "TEXT", + "#/texts/40", + "text", + 0.9700000286102295 ], - "iref": "#/texts/79", - "name": "text", - "orig-order": 94, - "page": 7, - "span": [ - 0, - 502 + [ + "language", + 5509744459704235873, + "TEXT", + "#/texts/40", + "en", + 0.8799999952316284 ], - "sref": "#/page-elements/96", - "text-order": 96, - "type": "paragraph" - }, - { - "bbox": [ - 44.733577728271484, - 539.0667114257812, - 548.8603515625, - 576.5675048828125 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/41", + "text", + 0.8999999761581421 ], - "iref": "#/texts/80", - "name": "text", - "orig-order": 95, - "page": 7, - "span": [ - 0, - 324 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/41", + "en", + 0.7799999713897705 ], - "sref": "#/page-elements/97", - "text-order": 97, - "type": "paragraph" - }, - { - "bbox": [ - 214.75270080566406, - 498.5877685546875, - 548.7813110351562, - 529.3681030273438 + [ + "semantic", + 4361549176688508574, + "TEXT", + "#/texts/42", + "text", + 0.9700000286102295 ], - "iref": "#/texts/81", - "name": "formula", - "orig-order": 96, - "page": 7, - "span": [ - 0, - 92 + [ + "language", + 4361549176688508574, + "TEXT", + "#/texts/42", + "en", + 0.17000000178813934 ], - "sref": "#/page-elements/98", - "text-order": 98, - "type": "equation" - }, - { - "bbox": [ - 44.784271240234375, - 435.0351257324219, - 548.7523193359375, - 470.5306396484375 + [ + "semantic", + 12374482891052873875, + "TEXT", + "#/texts/43", + "header", + 0.5699999928474426 ], - "iref": "#/texts/82", - "name": "text", - "orig-order": 97, - "page": 7, - "span": [ - 0, - 327 + [ + "language", + 12374482891052873875, + "TEXT", + "#/texts/43", + "en", + 0.550000011920929 ], - "sref": "#/page-elements/99", - "text-order": 99, - "type": "paragraph" - }, - { - "bbox": [ - 234.89254760742188, - 399.494873046875, - 549.147216796875, - 425.90399169921875 + [ + "semantic", + 2755397864153233778, + "TEXT", + "#/texts/44", + "text", + 0.9900000095367432 ], - "iref": "#/texts/83", - "name": "formula", - "orig-order": 98, - "page": 7, - "span": [ - 0, - 114 + [ + "language", + 2755397864153233778, + "TEXT", + "#/texts/44", + "en", + 0.8999999761581421 ], - "sref": "#/page-elements/100", - "text-order": 100, - "type": "equation" - }, - { - "bbox": [ - 44.786224365234375, - 279.0730285644531, - 549.0149536132812, - 379.8307189941406 + [ + "semantic", + 4698316471746130896, + "TEXT", + "#/texts/45", + "text", + 0.9900000095367432 ], - "iref": "#/texts/84", - "name": "text", - "orig-order": 99, - "page": 7, - "span": [ - 0, - 960 + [ + "language", + 4698316471746130896, + "TEXT", + "#/texts/45", + "en", + 0.9100000262260437 ], - "sref": "#/page-elements/101", - "text-order": 101, - "type": "paragraph" - }, - { - "bbox": [ - 44.786224365234375, - 253.05079650878906, - 549.2977294921875, - 275.7553405761719 + [ + "semantic", + 11827267218358801841, + "TEXT", + "#/texts/46", + "text", + 0.9800000190734863 ], - "iref": "#/texts/85", - "name": "text", - "orig-order": 100, - "page": 7, - "span": [ - 0, - 204 + [ + "language", + 11827267218358801841, + "TEXT", + "#/texts/46", + "en", + 0.9300000071525574 ], - "sref": "#/page-elements/102", - "text-order": 102, - "type": "paragraph" - }, - { - "bbox": [ - 43.776466369628906, - 213.4808349609375, - 380.18682861328125, - 224.8687286376953 + [ + "semantic", + 6297710299044869343, + "TEXT", + "#/texts/47", + "header", + 0.8299999833106995 ], - "iref": "#/texts/86", - "name": "subtitle-level-1", - "orig-order": 101, - "page": 7, - "span": [ - 0, - 54 + [ + "language", + 6297710299044869343, + "TEXT", + "#/texts/47", + "fr", + 0.2800000011920929 ], - "sref": "#/page-elements/103", - "text-order": 103, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.78739929199219, - 58.08219528198242, - 550.3234252929688, - 197.4915771484375 + [ + "semantic", + 7158837349769150986, + "TEXT", + "#/texts/48", + "text", + 1.0 ], - "iref": "#/texts/87", - "name": "text", - "orig-order": 102, - "page": 7, - "span": [ - 0, - 1216 + [ + "language", + 7158837349769150986, + "TEXT", + "#/texts/48", + "en", + 0.8799999952316284 ], - "sref": "#/page-elements/104", - "text-order": 104, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "semantic", + 1150871476689677866, + "TEXT", + "#/texts/49", + "text", + 1.0 ], - "iref": "#/texts/88", - "name": "text", - "orig-order": 105, - "page": 7, - "span": [ - 0, - 320 + [ + "language", + 1150871476689677866, + "TEXT", + "#/texts/49", + "en", + 0.9300000071525574 ], - "sref": "#/page-elements/105", - "text-order": 105, - "type": "paragraph" - }, - { - "bbox": [ - 45.74378967285156, - 751.4075317382812, - 68.55958557128906, - 758.9868774414062 + [ + "semantic", + 5163702913945903725, + "TEXT", + "#/texts/50", + "text", + 0.9900000095367432 ], - "iref": "#/page-headers/10", - "name": "page-header", - "orig-order": 113, - "page": 8, - "span": [ - 0, - 6 + [ + "language", + 5163702913945903725, + "TEXT", + "#/texts/50", + "en", + 0.9599999785423279 ], - "sref": "#/page-elements/106", - "text-order": 106, - "type": "page-header" - }, - { - "bbox": [ - 510.634765625, - 751.4635620117188, - 550.921142578125, - 758.3907470703125 + [ + "semantic", + 5462319091745771382, + "TEXT", + "#/texts/51", + "text", + 0.5899999737739563 ], - "iref": "#/page-headers/11", - "name": "page-header", - "orig-order": 114, - "page": 8, - "span": [ - 0, - 13 + [ + "language", + 5462319091745771382, + "TEXT", + "#/texts/51", + "en", + 0.8999999761581421 ], - "sref": "#/page-elements/107", - "text-order": 107, - "type": "page-header" - }, - { - "bbox": [ - 96.34707641601562, - 537.8071899414062, - 496.8702697753906, - 731.7752075195312 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/52", + "text", + 0.8999999761581421 ], - "iref": "#/figures/2", - "name": "picture", - "orig-order": 116, - "page": 8, - "span": [ - 0, - 0 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/52", + "en", + 0.7799999713897705 + ], + [ + "semantic", + 958124839653591304, + "TEXT", + "#/texts/53", + "text", + 0.9900000095367432 + ], + [ + "language", + 958124839653591304, + "TEXT", + "#/texts/53", + "en", + 0.9399999976158142 ], - "sref": "#/page-elements/108", - "text-order": 108, - "type": "figure" - }, - { - "bbox": [ - 46.00423812866211, - 491.7976379394531, - 543.2025756835938, - 523.7771606445312 + [ + "semantic", + 1448405324616602032, + "TEXT", + "#/texts/54", + "text", + 0.9800000190734863 ], - "iref": "#/figures/2/captions/0", - "name": "caption", - "orig-order": 112, - "page": 8, - "span": [ - 0, - 268 + [ + "language", + 1448405324616602032, + "TEXT", + "#/texts/54", + "en", + 0.8700000047683716 ], - "sref": "#/page-elements/109", - "text-order": 109, - "type": "caption" - }, - { - "bbox": [ - 46.486663818359375, - 370.0644836425781, - 551.9771728515625, - 457.6360168457031 + [ + "semantic", + 2617775076168299948, + "TEXT", + "#/texts/55", + "header", + 0.800000011920929 ], - "iref": "#/texts/89", - "name": "text", - "orig-order": 106, - "page": 8, - "span": [ - 0, - 745 + [ + "language", + 2617775076168299948, + "TEXT", + "#/texts/55", + "en", + 0.7900000214576721 ], - "sref": "#/page-elements/110", - "text-order": 110, - "type": "paragraph" - }, - { - "bbox": [ - 46.486663818359375, - 239.97216796875, - 551.4871215820312, - 366.491455078125 + [ + "semantic", + 13974986056043304735, + "TEXT", + "#/texts/56", + "text", + 0.9900000095367432 ], - "iref": "#/texts/90", - "name": "text", - "orig-order": 107, - "page": 8, - "span": [ - 0, - 1027 + [ + "language", + 13974986056043304735, + "TEXT", + "#/texts/56", + "en", + 0.9300000071525574 ], - "sref": "#/page-elements/111", - "text-order": 111, - "type": "paragraph" - }, - { - "bbox": [ - 45.14011764526367, - 200.4981231689453, - 333.7398986816406, - 211.88601684570312 + [ + "semantic", + 5985285694705576020, + "TEXT", + "#/texts/57", + "header", + 0.8199999928474426 ], - "iref": "#/texts/91", - "name": "subtitle-level-1", - "orig-order": 108, - "page": 8, - "span": [ - 0, - 48 + [ + "language", + 5985285694705576020, + "TEXT", + "#/texts/57", + "en", + 0.8399999737739563 ], - "sref": "#/page-elements/112", - "text-order": 112, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 45.9116325378418, - 162.0589599609375, - 551.3727416992188, - 184.45217895507812 + [ + "semantic", + 11235296141350659290, + "TEXT", + "#/texts/58", + "text", + 0.9900000095367432 ], - "iref": "#/texts/92", - "name": "text", - "orig-order": 109, - "page": 8, - "span": [ - 0, - 179 + [ + "language", + 11235296141350659290, + "TEXT", + "#/texts/58", + "en", + 0.9599999785423279 ], - "sref": "#/page-elements/113", - "text-order": 113, - "type": "paragraph" - }, - { - "bbox": [ - 46.21662902832031, - 84.04818725585938, - 550.9126586914062, - 158.48593139648438 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/59", + "text", + 0.8999999761581421 ], - "iref": "#/texts/93", - "name": "text", - "orig-order": 110, - "page": 8, - "span": [ - 0, - 643 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/59", + "en", + 0.7799999713897705 ], - "sref": "#/page-elements/114", - "text-order": 114, - "type": "paragraph" - }, - { - "bbox": [ - 44.992271423339844, - 45.01641845703125, - 552.1865844726562, - 80.5264892578125 + [ + "semantic", + 4361549266576336732, + "TEXT", + "#/texts/60", + "text", + 0.9700000286102295 ], - "iref": "#/texts/94", - "name": "text", - "orig-order": 111, - "page": 8, - "span": [ - 0, - 262 + [ + "language", + 4361549266576336732, + "TEXT", + "#/texts/60", + "eu", + 0.20000000298023224 ], - "sref": "#/page-elements/115", - "text-order": 115, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "semantic", + 5771309285006424458, + "TEXT", + "#/texts/61", + "text", + 0.9700000286102295 ], - "iref": "#/texts/95", - "name": "text", - "orig-order": 115, - "page": 8, - "span": [ - 0, - 320 + [ + "language", + 5771309285006424458, + "TEXT", + "#/texts/61", + "en", + 0.9300000071525574 ], - "sref": "#/page-elements/116", - "text-order": 116, - "type": "paragraph" - }, - { - "bbox": [ - 44.34560012817383, - 751.4635620117188, - 84.67137145996094, - 758.0504760742188 + [ + "semantic", + 5371685212527510397, + "TEXT", + "#/texts/62", + "header", + 0.949999988079071 ], - "iref": "#/page-headers/12", - "name": "page-header", - "orig-order": 126, - "page": 9, - "span": [ - 0, - 13 + [ + "language", + 5371685212527510397, + "TEXT", + "#/texts/62", + "en", + 0.7699999809265137 ], - "sref": "#/page-elements/117", - "text-order": 117, - "type": "page-header" - }, - { - "bbox": [ - 528.5497436523438, - 751.4075317382812, - 550.62109375, - 758.0504760742188 + [ + "semantic", + 7817257645383866853, + "TEXT", + "#/texts/63", + "text", + 0.9399999976158142 ], - "iref": "#/texts/96", - "name": "text", - "orig-order": 127, - "page": 9, - "span": [ - 0, - 5 + [ + "language", + 7817257645383866853, + "TEXT", + "#/texts/63", + "en", + 0.9200000166893005 ], - "sref": "#/page-elements/118", - "text-order": 118, - "type": "paragraph" - }, - { - "bbox": [ - 116.26325988769531, - 507.8388977050781, - 473.644775390625, - 731.2719116210938 + [ + "semantic", + 2929626768872004841, + "TEXT", + "#/texts/64", + "text", + 0.9900000095367432 ], - "iref": "#/figures/3", - "name": "picture", - "orig-order": 129, - "page": 9, - "span": [ - 0, - 0 + [ + "language", + 2929626768872004841, + "TEXT", + "#/texts/64", + "en", + 0.8100000023841858 ], - "sref": "#/page-elements/119", - "text-order": 119, - "type": "figure" - }, - { - "bbox": [ - 44.78739929199219, - 447.43023681640625, - 541.6075439453125, - 491.6891174316406 + [ + "semantic", + 15879756297712818143, + "TEXT", + "#/texts/65", + "text", + 0.9800000190734863 ], - "iref": "#/figures/3/captions/0", - "name": "caption", - "orig-order": 125, - "page": 9, - "span": [ - 0, - 473 + [ + "language", + 15879756297712818143, + "TEXT", + "#/texts/65", + "en", + 0.75 ], - "sref": "#/page-elements/120", - "text-order": 120, - "type": "caption" - }, - { - "bbox": [ - 44.418067932128906, - 395.521728515625, - 176.333251953125, - 406.9096374511719 + [ + "semantic", + 16116531546352845311, + "TEXT", + "#/texts/66", + "text", + 0.8899999856948853 ], - "iref": "#/texts/97", - "name": "subtitle-level-1", - "orig-order": 117, - "page": 9, - "span": [ - 0, - 22 + [ + "language", + 16116531546352845311, + "TEXT", + "#/texts/66", + "en", + 0.9700000286102295 ], - "sref": "#/page-elements/121", - "text-order": 121, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.78739929199219, - 343.8106384277344, - 548.7684326171875, - 379.5713806152344 + [ + "semantic", + 9541434157786316356, + "TEXT", + "#/texts/67", + "text", + 0.9599999785423279 ], - "iref": "#/texts/98", - "name": "text", - "orig-order": 118, - "page": 9, - "span": [ - 0, - 270 + [ + "language", + 9541434157786316356, + "TEXT", + "#/texts/67", + "en", + 0.9599999785423279 ], - "sref": "#/page-elements/122", - "text-order": 122, - "type": "paragraph" - }, - { - "bbox": [ - 245.61886596679688, - 303.5643005371094, - 549.354736328125, - 334.3446350097656 + [ + "semantic", + 997682002692959482, + "TEXT", + "#/texts/68", + "text", + 0.9599999785423279 ], - "iref": "#/texts/99", - "name": "formula", - "orig-order": 119, - "page": 9, - "span": [ - 0, - 72 + [ + "language", + 997682002692959482, + "TEXT", + "#/texts/68", + "en", + 0.8999999761581421 ], - "sref": "#/page-elements/123", - "text-order": 123, - "type": "equation" - }, - { - "bbox": [ - 44.27131652832031, - 266.0909118652344, - 323.5520935058594, - 275.5295104980469 + [ + "semantic", + 11590138063543342276, + "TEXT", + "#/texts/69", + "header", + 0.8799999952316284 ], - "iref": "#/texts/100", - "name": "text", - "orig-order": 120, - "page": 9, - "span": [ - 0, - 69 + [ + "language", + 11590138063543342276, + "TEXT", + "#/texts/69", + "en", + 0.5099999904632568 ], - "sref": "#/page-elements/124", - "text-order": 124, - "type": "paragraph" - }, - { - "bbox": [ - 44.087921142578125, - 226.52023315429688, - 183.25424194335938, - 237.9081268310547 + [ + "semantic", + 16380310806374538602, + "TEXT", + "#/texts/70", + "text", + 0.9800000190734863 ], - "iref": "#/texts/101", - "name": "subtitle-level-1", - "orig-order": 121, - "page": 9, - "span": [ - 0, - 23 + [ + "language", + 16380310806374538602, + "TEXT", + "#/texts/70", + "en", + 0.8700000047683716 ], - "sref": "#/page-elements/125", - "text-order": 125, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.12942886352539, - 149.07611083984375, - 549.1555786132812, - 210.865478515625 + [ + "semantic", + 5393976293631695754, + "TEXT", + "#/texts/71", + "text", + 0.8799999952316284 ], - "iref": "#/texts/102", - "name": "text", - "orig-order": 122, - "page": 9, - "span": [ - 0, - 580 + [ + "language", + 5393976293631695754, + "TEXT", + "#/texts/71", + "en", + 0.8999999761581421 ], - "sref": "#/page-elements/126", - "text-order": 126, - "type": "paragraph" - }, - { - "bbox": [ - 213.45111083984375, - 107.99786376953125, - 548.7833251953125, - 139.26446533203125 + [ + "semantic", + 1988335831916069382, + "TEXT", + "#/texts/72", + "text", + 0.6200000047683716 ], - "iref": "#/texts/103", - "name": "formula", - "orig-order": 123, - "page": 9, - "span": [ - 0, - 147 + [ + "language", + 1988335831916069382, + "TEXT", + "#/texts/72", + "en", + 0.9399999976158142 ], - "sref": "#/page-elements/127", - "text-order": 127, - "type": "equation" - }, - { - "bbox": [ - 44.78630447387695, - 45.0455436706543, - 548.7993774414062, - 80.76483154296875 + [ + "semantic", + 5147764798816678886, + "TEXT", + "#/texts/73", + "text", + 0.8600000143051147 + ], + [ + "language", + 5147764798816678886, + "TEXT", + "#/texts/73", + "en", + 0.8799999952316284 ], - "iref": "#/texts/104", - "name": "text", - "orig-order": 124, - "page": 9, - "span": [ - 0, - 307 + [ + "semantic", + 285583876932865368, + "TEXT", + "#/texts/74", + "text", + 0.9800000190734863 ], - "sref": "#/page-elements/128", - "text-order": 128, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "language", + 285583876932865368, + "TEXT", + "#/texts/74", + "en", + 0.9700000286102295 ], - "iref": "#/texts/105", - "name": "text", - "orig-order": 128, - "page": 9, - "span": [ - 0, - 320 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/75", + "text", + 0.8999999761581421 ], - "sref": "#/page-elements/129", - "text-order": 129, - "type": "paragraph" - }, - { - "bbox": [ - 45.890689849853516, - 743.98095703125, - 143.1890869140625, - 761.30615234375 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/75", + "en", + 0.7799999713897705 ], - "iref": "#/figures/4", - "name": "picture", - "orig-order": 142, - "page": 10, - "span": [ - 0, - 0 + [ + "semantic", + 4361549257370278754, + "TEXT", + "#/texts/76", + "text", + 0.9700000286102295 ], - "sref": "#/page-elements/130", - "text-order": 130, - "type": "figure" - }, - { - "bbox": [ - 510.634765625, - 751.4635620117188, - 550.8926391601562, - 758.5383911132812 + [ + "language", + 4361549257370278754, + "TEXT", + "#/texts/76", + "zh", + 0.41999998688697815 ], - "iref": "#/page-headers/13", - "name": "page-header", - "orig-order": 143, - "page": 10, - "span": [ - 0, - 13 + [ + "semantic", + 13183039880198077038, + "TEXT", + "#/texts/77", + "text", + 0.9800000190734863 ], - "sref": "#/page-elements/131", - "text-order": 131, - "type": "page-header" - }, - { - "bbox": [ - 44.981788635253906, - 720.4783935546875, - 201.29905700683594, - 731.9963989257812 + [ + "language", + 13183039880198077038, + "TEXT", + "#/texts/77", + "en", + 0.9200000166893005 ], - "iref": "#/texts/106", - "name": "subtitle-level-1", - "orig-order": 130, - "page": 10, - "span": [ - 0, - 26 + [ + "semantic", + 13428900458866068249, + "TEXT", + "#/texts/78", + "header", + 0.800000011920929 ], - "sref": "#/page-elements/132", - "text-order": 132, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 46.0963020324707, - 656.0805053710938, - 554.1248779296875, - 705.2210693359375 + [ + "language", + 13428900458866068249, + "TEXT", + "#/texts/78", + "en", + 0.8999999761581421 ], - "iref": "#/texts/107", - "name": "text", - "orig-order": 131, - "page": 10, - "span": [ - 0, - 390 + [ + "semantic", + 1430911655724119030, + "TEXT", + "#/texts/79", + "text", + 0.9599999785423279 ], - "sref": "#/page-elements/133", - "text-order": 133, - "type": "paragraph" - }, - { - "bbox": [ - 45.49040985107422, - 616.5106201171875, - 214.94256591796875, - 627.93359375 + [ + "language", + 1430911655724119030, + "TEXT", + "#/texts/79", + "en", + 0.9300000071525574 ], - "iref": "#/texts/108", - "name": "subtitle-level-1", - "orig-order": 132, - "page": 10, - "span": [ - 0, - 27 + [ + "semantic", + 13770706479324480755, + "TEXT", + "#/texts/80", + "text", + 0.8899999856948853 ], - "sref": "#/page-elements/134", - "text-order": 134, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 45.356536865234375, - 578.0712890625, - 552.450927734375, - 600.5599365234375 + [ + "language", + 13770706479324480755, + "TEXT", + "#/texts/80", + "en", + 0.9300000071525574 ], - "iref": "#/texts/109", - "name": "text", - "orig-order": 133, - "page": 10, - "span": [ - 0, - 172 + [ + "semantic", + 11165481757050847950, + "TEXT", + "#/texts/81", + "text", + 1.0 ], - "sref": "#/page-elements/135", - "text-order": 135, - "type": "paragraph" - }, - { - "bbox": [ - 46.00928497314453, - 500.0617370605469, - 551.898193359375, - 574.4982299804688 + [ + "language", + 11165481757050847950, + "TEXT", + "#/texts/81", + "en", + 0.11999999731779099 ], - "iref": "#/texts/110", - "name": "text", - "orig-order": 134, - "page": 10, - "span": [ - 0, - 691 + [ + "semantic", + 9572077971492738329, + "TEXT", + "#/texts/82", + "text", + 0.9700000286102295 ], - "sref": "#/page-elements/136", - "text-order": 136, - "type": "paragraph" - }, - { - "bbox": [ - 45.801177978515625, - 448.0732421875, - 552.126953125, - 496.556396484375 + [ + "language", + 9572077971492738329, + "TEXT", + "#/texts/82", + "en", + 0.9399999976158142 ], - "iref": "#/texts/111", - "name": "text", - "orig-order": 135, - "page": 10, - "span": [ - 0, - 420 + [ + "semantic", + 14951391138799557075, + "TEXT", + "#/texts/83", + "text", + 1.0 ], - "sref": "#/page-elements/137", - "text-order": 137, - "type": "paragraph" - }, - { - "bbox": [ - 46.02473449707031, - 408.5044250488281, - 321.5076904296875, - 419.892333984375 + [ + "language", + 14951391138799557075, + "TEXT", + "#/texts/83", + "pl", + 0.12999999523162842 ], - "iref": "#/texts/112", - "name": "subtitle-level-1", - "orig-order": 136, - "page": 10, - "span": [ - 0, - 31 + [ + "semantic", + 16602156009514813718, + "TEXT", + "#/texts/84", + "text", + 0.9700000286102295 ], - "sref": "#/page-elements/138", - "text-order": 138, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 46.301429748535156, - 357.0820007324219, - 550.6118774414062, - 392.4583435058594 + [ + "language", + 16602156009514813718, + "TEXT", + "#/texts/84", + "en", + 0.9599999785423279 ], - "iref": "#/texts/113", - "name": "text", - "orig-order": 137, - "page": 10, - "span": [ - 0, - 334 + [ + "semantic", + 7162849562576593449, + "TEXT", + "#/texts/85", + "text", + 0.7900000214576721 ], - "sref": "#/page-elements/139", - "text-order": 139, - "type": "paragraph" - }, - { - "bbox": [ - 46.488189697265625, - 253.0490264892578, - 551.0360107421875, - 353.4529724121094 + [ + "language", + 7162849562576593449, + "TEXT", + "#/texts/85", + "en", + 0.9599999785423279 ], - "iref": "#/texts/114", - "name": "text", - "orig-order": 138, - "page": 10, - "span": [ - 0, - 847 + [ + "semantic", + 15385417954505503552, + "TEXT", + "#/texts/86", + "meta-data", + 0.9900000095367432 ], - "sref": "#/page-elements/140", - "text-order": 140, - "type": "paragraph" - }, - { - "bbox": [ - 46.440311431884766, - 188.080810546875, - 551.396484375, - 249.4759979248047 + [ + "language", + 15385417954505503552, + "TEXT", + "#/texts/86", + "en", + 0.8399999737739563 ], - "iref": "#/texts/115", - "name": "text", - "orig-order": 139, - "page": 10, - "span": [ - 0, - 477 + [ + "semantic", + 10815650641518265876, + "TEXT", + "#/texts/87", + "text", + 0.9100000262260437 ], - "sref": "#/page-elements/141", - "text-order": 141, - "type": "paragraph" - }, - { - "bbox": [ - 46.27632141113281, - 136.03631591796875, - 550.9563598632812, - 184.4517822265625 + [ + "language", + 10815650641518265876, + "TEXT", + "#/texts/87", + "en", + 0.949999988079071 ], - "iref": "#/texts/116", - "name": "text", - "orig-order": 140, - "page": 10, - "span": [ - 0, - 404 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/88", + "text", + 0.8999999761581421 ], - "sref": "#/page-elements/142", - "text-order": 142, - "type": "paragraph" - }, - { - "bbox": [ - 46.42215347290039, - 58.08152389526367, - 551.0359497070312, - 132.46327209472656 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/88", + "en", + 0.7799999713897705 ], - "iref": "#/texts/117", - "name": "text", - "orig-order": 141, - "page": 10, - "span": [ - 0, - 572 + [ + "semantic", + 12004249365408683930, + "TEXT", + "#/texts/89", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/143", - "text-order": 143, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "language", + 12004249365408683930, + "TEXT", + "#/texts/89", + "en", + 0.9200000166893005 ], - "iref": "#/texts/118", - "name": "text", - "orig-order": 144, - "page": 10, - "span": [ - 0, - 320 + [ + "semantic", + 7223381657047466215, + "TEXT", + "#/texts/90", + "text", + 0.9800000190734863 ], - "sref": "#/page-elements/144", - "text-order": 144, - "type": "paragraph" - }, - { - "bbox": [ - 43.98883056640625, - 751.4635620117188, - 84.67137145996094, - 758.0504760742188 + [ + "language", + 7223381657047466215, + "TEXT", + "#/texts/90", + "en", + 0.9300000071525574 ], - "iref": "#/page-headers/14", - "name": "page-header", - "orig-order": 150, - "page": 11, - "span": [ - 0, - 13 + [ + "semantic", + 15132906055887224772, + "TEXT", + "#/texts/91", + "header", + 0.7099999785423279 ], - "sref": "#/page-elements/145", - "text-order": 145, - "type": "page-header" - }, - { - "bbox": [ - 525.1477661132812, - 751.4075317382812, - 548.775146484375, - 758.0504760742188 + [ + "language", + 15132906055887224772, + "TEXT", + "#/texts/91", + "en", + 0.8299999833106995 ], - "iref": "#/texts/119", - "name": "text", - "orig-order": 151, - "page": 11, - "span": [ - 0, - 6 + [ + "semantic", + 17129434987283608290, + "TEXT", + "#/texts/92", + "text", + 0.8999999761581421 ], - "sref": "#/page-elements/146", - "text-order": 146, - "type": "paragraph" - }, - { - "bbox": [ - 48.36570739746094, - 477.8360900878906, - 548.3624267578125, - 732.3331298828125 + [ + "language", + 17129434987283608290, + "TEXT", + "#/texts/92", + "en", + 0.949999988079071 + ], + [ + "semantic", + 10350406469077463155, + "TEXT", + "#/texts/93", + "text", + 0.9300000071525574 + ], + [ + "language", + 10350406469077463155, + "TEXT", + "#/texts/93", + "en", + 0.9300000071525574 ], - "iref": "#/figures/5", - "name": "picture", - "orig-order": 153, - "page": 11, - "span": [ - 0, - 0 + [ + "semantic", + 16949854269270315165, + "TEXT", + "#/texts/94", + "text", + 0.9599999785423279 ], - "sref": "#/page-elements/147", - "text-order": 147, - "type": "figure" - }, - { - "bbox": [ - 44.78739929199219, - 428.34173583984375, - 541.0477905273438, - 460.564697265625 + [ + "language", + 16949854269270315165, + "TEXT", + "#/texts/94", + "en", + 0.9100000262260437 ], - "iref": "#/figures/5/captions/0", - "name": "caption", - "orig-order": 149, - "page": 11, - "span": [ - 0, - 275 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/95", + "text", + 0.8999999761581421 ], - "sref": "#/page-elements/148", - "text-order": 148, - "type": "caption" - }, - { - "bbox": [ - 44.78684997558594, - 331.06005859375, - 550.6510620117188, - 405.4977722167969 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/95", + "en", + 0.7799999713897705 ], - "iref": "#/texts/120", - "name": "text", - "orig-order": 145, - "page": 11, - "span": [ - 0, - 596 + [ + "semantic", + 4361549266593946746, + "TEXT", + "#/texts/96", + "text", + 0.9700000286102295 ], - "sref": "#/page-elements/149", - "text-order": 149, - "type": "paragraph" - }, - { - "bbox": [ - 44.489322662353516, - 291.4902038574219, - 365.9893798828125, - 302.87811279296875 + [ + "language", + 4361549266593946746, + "TEXT", + "#/texts/96", + "fr", + 0.3700000047683716 ], - "iref": "#/texts/121", - "name": "subtitle-level-1", - "orig-order": 146, - "page": 11, - "span": [ - 0, - 39 + [ + "semantic", + 9802652237802670052, + "TEXT", + "#/texts/97", + "header", + 0.7099999785423279 ], - "sref": "#/page-elements/150", - "text-order": 150, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.785736083984375, - 175.04168701171875, - 549.7868041992188, - 275.5009460449219 + [ + "language", + 9802652237802670052, + "TEXT", + "#/texts/97", + "zh", + 0.18000000715255737 ], - "iref": "#/texts/122", - "name": "text", - "orig-order": 147, - "page": 11, - "span": [ - 0, - 861 + [ + "semantic", + 5524728206729419689, + "TEXT", + "#/texts/98", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/151", - "text-order": 151, - "type": "paragraph" - }, - { - "bbox": [ - 44.785736083984375, - 45.043888092041016, - 549.4429931640625, - 171.5908203125 + [ + "language", + 5524728206729419689, + "TEXT", + "#/texts/98", + "en", + 0.9100000262260437 ], - "iref": "#/texts/123", - "name": "text", - "orig-order": 148, - "page": 11, - "span": [ - 0, - 1189 + [ + "semantic", + 4043385013945968936, + "TEXT", + "#/texts/99", + "text", + 1.0 ], - "sref": "#/page-elements/152", - "text-order": 152, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "language", + 4043385013945968936, + "TEXT", + "#/texts/99", + "sv", + 0.10999999940395355 ], - "iref": "#/texts/124", - "name": "text", - "orig-order": 152, - "page": 11, - "span": [ - 0, - 320 + [ + "semantic", + 11778884428660217326, + "TEXT", + "#/texts/100", + "text", + 1.0 ], - "sref": "#/page-elements/153", - "text-order": 153, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 751.4075317382812, - 51.251686096191406, - 758.0504760742188 + [ + "language", + 11778884428660217326, + "TEXT", + "#/texts/100", + "en", + 0.8299999833106995 ], - "iref": "#/texts/125", - "name": "text", - "orig-order": 166, - "page": 12, - "span": [ - 0, - 2 + [ + "semantic", + 12875050310340408203, + "TEXT", + "#/texts/101", + "meta-data", + 0.9900000095367432 ], - "sref": "#/page-elements/154", - "text-order": 154, - "type": "paragraph" - }, - { - "bbox": [ - 56.12232208251953, - 751.4075317382812, - 70.11566162109375, - 758.0504760742188 + [ + "language", + 12875050310340408203, + "TEXT", + "#/texts/101", + "en", + 0.3700000047683716 ], - "iref": "#/texts/126", - "name": "text", - "orig-order": 167, - "page": 12, - "span": [ - 0, - 5 + [ + "semantic", + 3785875504044487339, + "TEXT", + "#/texts/102", + "text", + 0.9800000190734863 ], - "sref": "#/page-elements/155", - "text-order": 155, - "type": "paragraph" - }, - { - "bbox": [ - 510.634765625, - 751.4635620117188, - 550.7427368164062, - 758.252197265625 + [ + "language", + 3785875504044487339, + "TEXT", + "#/texts/102", + "en", + 0.9300000071525574 ], - "iref": "#/page-headers/15", - "name": "page-header", - "orig-order": 168, - "page": 12, - "span": [ - 0, - 13 + [ + "semantic", + 12105626155924658285, + "TEXT", + "#/texts/103", + "text", + 1.0 ], - "sref": "#/page-elements/156", - "text-order": 156, - "type": "page-header" - }, - { - "bbox": [ - 55.876461029052734, - 606.848876953125, - 541.853759765625, - 729.6771850585938 + [ + "language", + 12105626155924658285, + "TEXT", + "#/texts/103", + "ja", + 0.11999999731779099 ], - "iref": "#/figures/6", - "name": "picture", - "orig-order": 164, - "page": 12, - "span": [ - 0, - 0 + [ + "semantic", + 16265612055607243129, + "TEXT", + "#/texts/104", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/157", - "text-order": 157, - "type": "figure" - }, - { - "bbox": [ - 44.766658782958984, - 585.4602661132812, - 387.12310791015625, - 593.5936279296875 + [ + "language", + 16265612055607243129, + "TEXT", + "#/texts/104", + "en", + 0.9200000166893005 ], - "iref": "#/figures/6/captions/0", - "name": "caption", - "orig-order": 165, - "page": 12, - "span": [ - 0, - 88 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/105", + "text", + 0.8999999761581421 ], - "sref": "#/page-elements/158", - "text-order": 158, - "type": "caption" - }, - { - "bbox": [ - 45.36357116699219, - 526.083984375, - 552.5618286132812, - 548.4772338867188 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/105", + "en", + 0.7799999713897705 ], - "iref": "#/texts/127", - "name": "text", - "orig-order": 154, - "page": 12, - "span": [ - 0, - 171 + [ + "semantic", + 10252446451495472512, + "TEXT", + "#/texts/106", + "meta-data", + 0.9599999785423279 ], - "sref": "#/page-elements/159", - "text-order": 159, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 448.0732421875, - 552.16748046875, - 522.4549560546875 + [ + "language", + 10252446451495472512, + "TEXT", + "#/texts/106", + "en", + 0.8299999833106995 ], - "iref": "#/texts/128", - "name": "text", - "orig-order": 155, - "page": 12, - "span": [ - 0, - 596 + [ + "semantic", + 17011944206067158637, + "TEXT", + "#/texts/107", + "text", + 0.9399999976158142 ], - "sref": "#/page-elements/160", - "text-order": 160, - "type": "paragraph" - }, - { - "bbox": [ - 46.228458404541016, - 382.8196716308594, - 552.1286010742188, - 444.5987854003906 + [ + "language", + 17011944206067158637, + "TEXT", + "#/texts/107", + "en", + 0.9300000071525574 ], - "iref": "#/texts/129", - "name": "text", - "orig-order": 156, - "page": 12, - "span": [ - 0, - 460 + [ + "semantic", + 16289627123982758705, + "TEXT", + "#/texts/108", + "meta-data", + 0.4399999976158142 ], - "sref": "#/page-elements/161", - "text-order": 161, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 357.0803527832031, - 309.6529846191406, - 366.4904479980469 + [ + "language", + 16289627123982758705, + "TEXT", + "#/texts/108", + "en", + 0.5199999809265137 ], - "iref": "#/texts/130", - "name": "list-item", - "orig-order": 157, - "page": 12, - "span": [ - 0, - 57 + [ + "semantic", + 13969801897340997317, + "TEXT", + "#/texts/109", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/162", - "text-order": 162, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 344.0412292480469, - 336.8304748535156, - 353.6436767578125 + [ + "language", + 13969801897340997317, + "TEXT", + "#/texts/109", + "en", + 0.9800000190734863 ], - "iref": "#/texts/131", - "name": "list-item", - "orig-order": 158, - "page": 12, - "span": [ - 0, - 65 + [ + "semantic", + 105697770555684555, + "TEXT", + "#/texts/110", + "text", + 0.9700000286102295 ], - "sref": "#/page-elements/163", - "text-order": 163, - "type": "paragraph" - }, - { - "bbox": [ - 45.47064971923828, - 331.05810546875, - 478.3088684082031, - 340.54962158203125 + [ + "language", + 105697770555684555, + "TEXT", + "#/texts/110", + "en", + 0.9399999976158142 ], - "iref": "#/texts/132", - "name": "list-item", - "orig-order": 159, - "page": 12, - "span": [ - 0, - 101 + [ + "semantic", + 15938840672015995359, + "TEXT", + "#/texts/111", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/164", - "text-order": 164, - "type": "paragraph" - }, - { - "bbox": [ - 46.16604232788086, - 214.04542541503906, - 551.7832641601562, - 314.4459533691406 + [ + "language", + 15938840672015995359, + "TEXT", + "#/texts/111", + "en", + 0.9700000286102295 ], - "iref": "#/texts/133", - "name": "text", - "orig-order": 160, - "page": 12, - "span": [ - 0, - 923 + [ + "semantic", + 16505790528099785698, + "TEXT", + "#/texts/112", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/165", - "text-order": 165, - "type": "paragraph" - }, - { - "bbox": [ - 46.26358413696289, - 149.0762481689453, - 551.3743896484375, - 210.68536376953125 + [ + "language", + 16505790528099785698, + "TEXT", + "#/texts/112", + "en", + 0.36000001430511475 ], - "iref": "#/texts/134", - "name": "text", - "orig-order": 161, - "page": 12, - "span": [ - 0, - 569 + [ + "semantic", + 14738723905055920039, + "TEXT", + "#/texts/113", + "text", + 1.0 + ], + [ + "language", + 14738723905055920039, + "TEXT", + "#/texts/113", + "en", + 0.8700000047683716 + ], + [ + "semantic", + 5699550326698755904, + "TEXT", + "#/texts/114", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/166", - "text-order": 166, - "type": "paragraph" - }, - { - "bbox": [ - 45.70681381225586, - 71.06546783447266, - 551.875732421875, - 145.5064697265625 + [ + "language", + 5699550326698755904, + "TEXT", + "#/texts/114", + "en", + 0.8899999856948853 ], - "iref": "#/texts/135", - "name": "text", - "orig-order": 162, - "page": 12, - "span": [ - 0, - 698 + [ + "semantic", + 11609131422778723150, + "TEXT", + "#/texts/115", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/167", - "text-order": 167, - "type": "paragraph" - }, - { - "bbox": [ - 46.488380432128906, - 45.0432014465332, - 551.8381958007812, - 67.6728515625 + [ + "language", + 11609131422778723150, + "TEXT", + "#/texts/115", + "en", + 0.9100000262260437 ], - "iref": "#/texts/136", - "name": "text", - "orig-order": 163, - "page": 12, - "span": [ - 0, - 218 + [ + "semantic", + 788128893109726279, + "TEXT", + "#/texts/116", + "text", + 0.9100000262260437 ], - "sref": "#/page-elements/168", - "text-order": 168, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "language", + 788128893109726279, + "TEXT", + "#/texts/116", + "en", + 0.9700000286102295 ], - "iref": "#/texts/137", - "name": "text", - "orig-order": 169, - "page": 12, - "span": [ - 0, - 320 + [ + "semantic", + 7029344862946908483, + "TEXT", + "#/texts/117", + "text", + 0.9800000190734863 ], - "sref": "#/page-elements/169", - "text-order": 169, - "type": "paragraph" - }, - { - "bbox": [ - 44.31840515136719, - 751.4635620117188, - 84.67137145996094, - 758.0541381835938 + [ + "language", + 7029344862946908483, + "TEXT", + "#/texts/117", + "en", + 0.9200000166893005 ], - "iref": "#/page-headers/16", - "name": "page-header", - "orig-order": 177, - "page": 13, - "span": [ - 0, - 13 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/118", + "text", + 0.8999999761581421 ], - "sref": "#/page-elements/170", - "text-order": 170, - "type": "page-header" - }, - { - "bbox": [ - 525.1477661132812, - 751.4075317382812, - 529.9112548828125, - 758.0504760742188 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/118", + "en", + 0.7799999713897705 ], - "iref": "#/texts/138", - "name": "text", - "orig-order": 178, - "page": 13, - "span": [ - 0, - 2 + [ + "semantic", + 2144926686518491811, + "TEXT", + "#/texts/119", + "text", + 0.9700000286102295 ], - "sref": "#/page-elements/171", - "text-order": 171, - "type": "paragraph" - }, - { - "bbox": [ - 534.7818603515625, - 751.4075317382812, - 548.775146484375, - 758.0504760742188 + [ + "language", + 2144926686518491811, + "TEXT", + "#/texts/119", + "fr", + 0.2199999988079071 ], - "iref": "#/texts/139", - "name": "text", - "orig-order": 179, - "page": 13, - "span": [ - 0, - 5 + [ + "semantic", + 18333396269095847693, + "TEXT", + "#/texts/120", + "text", + 0.8999999761581421 ], - "sref": "#/page-elements/172", - "text-order": 172, - "type": "paragraph" - }, - { - "bbox": [ - 45.15538024902344, - 607.3761596679688, - 548.95361328125, - 731.4898681640625 + [ + "language", + 18333396269095847693, + "TEXT", + "#/texts/120", + "en", + 0.9599999785423279 ], - "iref": "#/figures/7", - "name": "picture", - "orig-order": 181, - "page": 13, - "span": [ - 0, - 0 + [ + "semantic", + 4030998538427149966, + "TEXT", + "#/texts/121", + "header", + 0.7599999904632568 ], - "sref": "#/page-elements/173", - "text-order": 173, - "type": "figure" - }, - { - "bbox": [ - 44.35472869873047, - 537.0355224609375, - 539.2632446289062, - 593.7362670898438 + [ + "language", + 4030998538427149966, + "TEXT", + "#/texts/121", + "en", + 0.5099999904632568 ], - "iref": "#/figures/7/captions/0", - "name": "text", - "orig-order": 174, - "page": 13, - "span": [ - 0, - 608 + [ + "semantic", + 10295608624766759271, + "TEXT", + "#/texts/122", + "text", + 0.9700000286102295 ], - "sref": "#/page-elements/174", - "text-order": 174, - "type": "paragraph" - }, - { - "bbox": [ - 44.49153518676758, - 441.90771484375, - 181.1155242919922, - 498.2774658203125 + [ + "language", + 10295608624766759271, + "TEXT", + "#/texts/122", + "en", + 0.9399999976158142 ], - "iref": "#/tables/0/captions/0", - "name": "caption", - "orig-order": 175, - "page": 13, - "span": [ - 0, - 160 + [ + "semantic", + 10633780781731536747, + "TEXT", + "#/texts/123", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/175", - "text-order": 175, - "type": "caption" - }, - { - "bbox": [ - 210.0027313232422, - 346.577880859375, - 549.0220336914062, - 499.1263427734375 + [ + "language", + 10633780781731536747, + "TEXT", + "#/texts/123", + "en", + 0.949999988079071 ], - "iref": "#/tables/0", - "name": "table", - "orig-order": 176, - "page": 13, - "span": [ - 0, - 0 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/124", + "text", + 0.8999999761581421 ], - "sref": "#/page-elements/176", - "text-order": 176, - "type": "table" - }, - { - "bbox": [ - 44.78739929199219, - 292.05572509765625, - 549.0201416015625, - 314.4489440917969 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/124", + "en", + 0.7799999713897705 ], - "iref": "#/texts/140", - "name": "text", - "orig-order": 170, - "page": 13, - "span": [ - 0, - 191 + [ + "semantic", + 1080447728722590413, + "TEXT", + "#/texts/125", + "header", + 1.0 ], - "sref": "#/page-elements/177", - "text-order": 177, - "type": "paragraph" - }, - { - "bbox": [ - 44.786376953125, - 188.07875061035156, - 550.8748779296875, - 288.5342712402344 + [ + "language", + 1080447728722590413, + "TEXT", + "#/texts/125", + "en", + 0.14000000059604645 ], - "iref": "#/texts/141", - "name": "text", - "orig-order": 171, - "page": 13, - "span": [ - 0, - 834 + [ + "semantic", + 4361549257087816853, + "TEXT", + "#/texts/126", + "text", + 0.8899999856948853 ], - "sref": "#/page-elements/178", - "text-order": 178, - "type": "paragraph" - }, - { - "bbox": [ - 44.73537826538086, - 148.51072692871094, - 178.22747802734375, - 159.89862060546875 + [ + "language", + 4361549257087816853, + "TEXT", + "#/texts/126", + "en", + 0.949999988079071 ], - "iref": "#/texts/142", - "name": "subtitle-level-1", - "orig-order": 172, - "page": 13, - "span": [ - 0, - 15 + [ + "semantic", + 10195664788154887804, + "TEXT", + "#/texts/127", + "text", + 1.0 ], - "sref": "#/page-elements/179", - "text-order": 179, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.78739929199219, - 58.0830192565918, - 549.515625, - 132.5465087890625 + [ + "language", + 10195664788154887804, + "TEXT", + "#/texts/127", + "en", + 0.9900000095367432 ], - "iref": "#/texts/143", - "name": "text", - "orig-order": 173, - "page": 13, - "span": [ - 0, - 699 + [ + "semantic", + 7538054744015619336, + "TEXT", + "#/texts/128", + "text", + 1.0 ], - "sref": "#/page-elements/180", - "text-order": 180, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "language", + 7538054744015619336, + "TEXT", + "#/texts/128", + "en", + 0.9300000071525574 ], - "iref": "#/texts/144", - "name": "text", - "orig-order": 180, - "page": 13, - "span": [ - 0, - 320 + [ + "semantic", + 12426662601736619109, + "TEXT", + "#/texts/129", + "text", + 1.0 ], - "sref": "#/page-elements/181", - "text-order": 181, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 751.4075317382812, - 70.11566162109375, - 758.0504760742188 + [ + "language", + 12426662601736619109, + "TEXT", + "#/texts/129", + "en", + 0.949999988079071 ], - "iref": "#/texts/145", - "name": "text", - "orig-order": 213, - "page": 14, - "span": [ - 0, - 6 + [ + "semantic", + 4162783521620221579, + "TEXT", + "#/texts/130", + "header", + 0.46000000834465027 ], - "sref": "#/page-elements/182", - "text-order": 182, - "type": "paragraph" - }, - { - "bbox": [ - 510.634765625, - 751.3934326171875, - 551.0859985351562, - 759.209228515625 + [ + "language", + 4162783521620221579, + "TEXT", + "#/texts/130", + "en", + 0.8700000047683716 ], - "iref": "#/page-headers/17", - "name": "page-header", - "orig-order": 214, - "page": 14, - "span": [ - 0, - 13 + [ + "semantic", + 5135259059216244866, + "TEXT", + "#/texts/131", + "text", + 0.7599999904632568 ], - "sref": "#/page-elements/183", - "text-order": 183, - "type": "page-header" - }, - { - "bbox": [ - 46.38566589355469, - 708.0682373046875, - 552.190673828125, - 731.0924072265625 + [ + "language", + 5135259059216244866, + "TEXT", + "#/texts/131", + "en", + 0.9399999976158142 ], - "iref": "#/texts/146", - "name": "text", - "orig-order": 182, - "page": 14, - "span": [ - 0, - 119 + [ + "semantic", + 16998817296948099535, + "TEXT", + "#/texts/132", + "text", + 0.8700000047683716 ], - "sref": "#/page-elements/184", - "text-order": 184, - "type": "paragraph" - }, - { - "bbox": [ - 45.289154052734375, - 669.0628051757812, - 553.278076171875, - 705.6804809570312 + [ + "language", + 16998817296948099535, + "TEXT", + "#/texts/132", + "en", + 0.9700000286102295 ], - "iref": "#/texts/147", - "name": "text", - "orig-order": 183, - "page": 14, - "span": [ - 0, - 322 + [ + "semantic", + 1205649569241141618, + "TEXT", + "#/texts/133", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/185", - "text-order": 185, - "type": "paragraph" - }, - { - "bbox": [ - 44.96582794189453, - 643.04052734375, - 553.867431640625, - 666.6377563476562 + [ + "language", + 1205649569241141618, + "TEXT", + "#/texts/133", + "en", + 0.9399999976158142 ], - "iref": "#/texts/148", - "name": "text", - "orig-order": 184, - "page": 14, - "span": [ - 0, - 172 + [ + "semantic", + 12257840490666828590, + "TEXT", + "#/texts/134", + "text", + 1.0 ], - "sref": "#/page-elements/186", - "text-order": 186, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 616.512939453125, - 242.9811553955078, - 628.0685424804688 + [ + "language", + 12257840490666828590, + "TEXT", + "#/texts/134", + "en", + 0.9200000166893005 ], - "iref": "#/texts/149", - "name": "subtitle-level-1", - "orig-order": 185, - "page": 14, - "span": [ - 0, - 27 + [ + "semantic", + 7040847965650746591, + "TEXT", + "#/texts/135", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/187", - "text-order": 187, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 46.48820114135742, - 603.7968139648438, - 209.16476440429688, - 615.1295166015625 + [ + "language", + 7040847965650746591, + "TEXT", + "#/texts/135", + "en", + 0.8799999952316284 ], - "iref": "#/texts/150", - "name": "text", - "orig-order": 186, - "page": 14, - "span": [ - 0, - 41 + [ + "semantic", + 7927601225025519287, + "TEXT", + "#/texts/136", + "text", + 1.0 ], - "sref": "#/page-elements/188", - "text-order": 188, - "type": "paragraph" - }, - { - "bbox": [ - 45.64805603027344, - 577.8392333984375, - 84.40357971191406, - 589.0214233398438 + [ + "language", + 7927601225025519287, + "TEXT", + "#/texts/136", + "en", + 0.8899999856948853 ], - "iref": "#/texts/151", - "name": "subtitle-level-1", - "orig-order": 187, - "page": 14, - "span": [ - 0, - 5 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/137", + "text", + 0.8999999761581421 ], - "sref": "#/page-elements/189", - "text-order": 189, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 45.716941833496094, - 539.067138671875, - 288.83966064453125, - 575.9967041015625 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/137", + "en", + 0.7799999713897705 ], - "iref": "#/texts/152", - "name": "text", - "orig-order": 188, - "page": 14, - "span": [ - 0, - 160 + [ + "semantic", + 1080447728722590402, + "TEXT", + "#/texts/138", + "header", + 1.0 ], - "sref": "#/page-elements/190", - "text-order": 190, - "type": "paragraph" - }, - { - "bbox": [ - 45.982421875, - 512.6180419921875, - 110.57768249511719, - 524.0657958984375 + [ + "language", + 1080447728722590402, + "TEXT", + "#/texts/138", + "ja", + 0.12999999523162842 ], - "iref": "#/texts/153", - "name": "subtitle-level-1", - "orig-order": 189, - "page": 14, - "span": [ - 0, - 8 + [ + "semantic", + 4361549257087816853, + "TEXT", + "#/texts/139", + "text", + 0.8899999856948853 ], - "sref": "#/page-elements/191", - "text-order": 191, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 46.48820114135742, - 498.1862487792969, - 411.1214904785156, - 507.86468505859375 + [ + "language", + 4361549257087816853, + "TEXT", + "#/texts/139", + "en", + 0.949999988079071 ], - "iref": "#/texts/154", - "name": "list-item", - "orig-order": 190, - "page": 14, - "span": [ - 0, - 99 + [ + "semantic", + 8207961846673301043, + "TEXT", + "#/texts/140", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/192", - "text-order": 192, - "type": "paragraph" - }, - { - "bbox": [ - 46.17177200317383, - 472.4082946777344, - 552.9000854492188, - 493.8719482421875 + [ + "language", + 8207961846673301043, + "TEXT", + "#/texts/140", + "en", + 0.8999999761581421 ], - "iref": "#/texts/155", - "name": "list-item", - "orig-order": 191, - "page": 14, - "span": [ - 0, - 285 + [ + "semantic", + 11998199584890640594, + "TEXT", + "#/texts/141", + "text", + 1.0 ], - "sref": "#/page-elements/193", - "text-order": 193, - "type": "paragraph" - }, - { - "bbox": [ - 46.39039993286133, - 457.71929931640625, - 129.30548095703125, - 468.0890197753906 + [ + "language", + 11998199584890640594, + "TEXT", + "#/texts/141", + "en", + 0.9599999785423279 ], - "iref": "#/texts/156", - "name": "list-item", - "orig-order": 192, - "page": 14, - "span": [ - 0, - 24 + [ + "semantic", + 16446129547721407877, + "TEXT", + "#/texts/142", + "meta-data", + 1.0 ], - "sref": "#/page-elements/194", - "text-order": 194, - "type": "paragraph" - }, - { - "bbox": [ - 45.71389389038086, - 443.1494140625, - 242.0704345703125, - 453.0476989746094 + [ + "language", + 16446129547721407877, + "TEXT", + "#/texts/142", + "en", + 0.6899999976158142 ], - "iref": "#/texts/157", - "name": "list-item", - "orig-order": 193, - "page": 14, - "span": [ - 0, - 53 + [ + "semantic", + 6720443978031524294, + "TEXT", + "#/texts/143", + "text", + 0.800000011920929 ], - "sref": "#/page-elements/195", - "text-order": 195, - "type": "paragraph" - }, - { - "bbox": [ - 46.020606994628906, - 417.41619873046875, - 554.6400756835938, - 438.90777587890625 + [ + "language", + 6720443978031524294, + "TEXT", + "#/texts/143", + "en", + 0.8899999856948853 ], - "iref": "#/texts/158", - "name": "list-item", - "orig-order": 194, - "page": 14, - "span": [ - 0, - 248 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/144", + "text", + 0.8999999761581421 ], - "sref": "#/page-elements/196", - "text-order": 196, - "type": "paragraph" - }, - { - "bbox": [ - 46.48814010620117, - 402.9024353027344, - 321.26422119140625, - 412.63861083984375 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/144", + "en", + 0.7799999713897705 ], - "iref": "#/texts/159", - "name": "list-item", - "orig-order": 195, - "page": 14, - "span": [ - 0, - 70 + [ + "semantic", + 2144926730621142072, + "TEXT", + "#/texts/145", + "text", + 0.9700000286102295 ], - "sref": "#/page-elements/197", - "text-order": 197, - "type": "paragraph" - }, - { - "bbox": [ - 46.00100326538086, - 376.937744140625, - 554.378662109375, - 398.0555114746094 + [ + "language", + 2144926730621142072, + "TEXT", + "#/texts/145", + "pms", + 0.7599999904632568 ], - "iref": "#/texts/160", - "name": "list-item", - "orig-order": 196, - "page": 14, - "span": [ - 0, - 211 + [ + "semantic", + 14222671032550229818, + "TEXT", + "#/texts/146", + "text", + 0.6000000238418579 ], - "sref": "#/page-elements/198", - "text-order": 198, - "type": "paragraph" - }, - { - "bbox": [ - 46.0579719543457, - 350.9154052734375, - 553.2630004882812, - 372.03350830078125 + [ + "language", + 14222671032550229818, + "TEXT", + "#/texts/146", + "en", + 0.8899999856948853 ], - "iref": "#/texts/161", - "name": "list-item", - "orig-order": 197, - "page": 14, - "span": [ - 0, - 156 + [ + "semantic", + 17486770941839589126, + "TEXT", + "#/texts/147", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/199", - "text-order": 199, - "type": "paragraph" - }, - { - "bbox": [ - 45.94832229614258, - 335.78765869140625, - 129.86572265625, - 346.3191833496094 + [ + "language", + 17486770941839589126, + "TEXT", + "#/texts/147", + "en", + 0.9900000095367432 ], - "iref": "#/texts/162", - "name": "list-item", - "orig-order": 198, - "page": 14, - "span": [ - 0, - 25 + [ + "semantic", + 16574813224778118841, + "TEXT", + "#/texts/148", + "text", + 0.9700000286102295 ], - "sref": "#/page-elements/200", - "text-order": 200, - "type": "paragraph" - }, - { - "bbox": [ - 45.82542419433594, - 321.9457092285156, - 234.11181640625, - 331.8630065917969 + [ + "language", + 16574813224778118841, + "TEXT", + "#/texts/148", + "en", + 0.9100000262260437 ], - "iref": "#/texts/163", - "name": "list-item", - "orig-order": 199, - "page": 14, - "span": [ - 0, - 54 + [ + "semantic", + 3356142343274371864, + "TEXT", + "#/texts/149", + "header", + 1.0 ], - "sref": "#/page-elements/201", - "text-order": 201, - "type": "paragraph" - }, - { - "bbox": [ - 46.478782653808594, - 307.19293212890625, - 269.6688537597656, - 316.9698486328125 + [ + "language", + 3356142343274371864, + "TEXT", + "#/texts/149", + "en", + 0.20000000298023224 ], - "iref": "#/texts/164", - "name": "list-item", - "orig-order": 200, - "page": 14, - "span": [ - 0, - 61 + [ + "semantic", + 4778022085288441371, + "TEXT", + "#/texts/150", + "text", + 0.6299999952316284 ], - "sref": "#/page-elements/202", - "text-order": 202, - "type": "paragraph" - }, - { - "bbox": [ - 46.01924514770508, - 292.9189147949219, - 301.0096130371094, - 302.8531799316406 + [ + "language", + 4778022085288441371, + "TEXT", + "#/texts/150", + "en", + 0.949999988079071 ], - "iref": "#/texts/165", - "name": "list-item", - "orig-order": 201, - "page": 14, - "span": [ - 0, - 75 + [ + "semantic", + 4361549257598904601, + "TEXT", + "#/texts/151", + "header", + 0.8500000238418579 ], - "sref": "#/page-elements/203", - "text-order": 203, - "type": "paragraph" - }, - { - "bbox": [ - 46.444217681884766, - 278.1666564941406, - 187.92904663085938, - 288.1064453125 + [ + "language", + 4361549257598904601, + "TEXT", + "#/texts/151", + "it", + 0.36000001430511475 ], - "iref": "#/texts/166", - "name": "list-item", - "orig-order": 202, - "page": 14, - "span": [ - 0, - 43 + [ + "semantic", + 3523281823889115814, + "TEXT", + "#/texts/152", + "meta-data", + 0.5799999833106995 ], - "sref": "#/page-elements/204", - "text-order": 204, - "type": "paragraph" - }, - { - "bbox": [ - 46.00947952270508, - 263.8026123046875, - 169.3743896484375, - 274.1329345703125 + [ + "language", + 3523281823889115814, + "TEXT", + "#/texts/152", + "en", + 0.30000001192092896 ], - "iref": "#/texts/167", - "name": "list-item", - "orig-order": 203, - "page": 14, - "span": [ - 0, - 36 + [ + "semantic", + 8500729849894221215, + "TEXT", + "#/texts/153", + "header", + 1.0 ], - "sref": "#/page-elements/205", - "text-order": 205, - "type": "paragraph" - }, - { - "bbox": [ - 46.049869537353516, - 231.931396484375, - 123.2709732055664, - 244.548095703125 + [ + "language", + 8500729849894221215, + "TEXT", + "#/texts/153", + "en", + 0.30000001192092896 ], - "iref": "#/texts/168", - "name": "subtitle-level-1", - "orig-order": 204, - "page": 14, - "span": [ - 0, - 10 + [ + "semantic", + 7813503946963688644, + "TEXT", + "#/texts/154", + "text", + 0.9900000095367432 ], - "sref": "#/page-elements/206", - "text-order": 206, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 50.6671142578125, - 207.4257049560547, - 552.3800659179688, - 228.917724609375 + [ + "language", + 7813503946963688644, + "TEXT", + "#/texts/154", + "en", + 0.47999998927116394 ], - "iref": "#/texts/169", - "name": "list-item", - "orig-order": 205, - "page": 14, - "span": [ - 0, - 179 + [ + "semantic", + 9230987401345399746, + "TEXT", + "#/texts/155", + "text", + 0.9100000262260437 ], - "sref": "#/page-elements/207", - "text-order": 207, - "type": "paragraph" - }, - { - "bbox": [ - 50.74010467529297, - 184.40769958496094, - 552.61669921875, - 205.76568603515625 + [ + "language", + 9230987401345399746, + "TEXT", + "#/texts/155", + "en", + 0.9700000286102295 ], - "iref": "#/texts/170", - "name": "list-item", - "orig-order": 206, - "page": 14, - "span": [ - 0, - 163 + [ + "semantic", + 1997735398126013155, + "TEXT", + "#/texts/156", + "text", + 0.800000011920929 ], - "sref": "#/page-elements/208", - "text-order": 208, - "type": "paragraph" - }, - { - "bbox": [ - 50.74015808105469, - 161.3896942138672, - 552.6810302734375, - 182.65234375 + [ + "language", + 1997735398126013155, + "TEXT", + "#/texts/156", + "en", + 0.6600000262260437 ], - "iref": "#/texts/171", - "name": "list-item", - "orig-order": 207, - "page": 14, - "span": [ - 0, - 168 + [ + "semantic", + 13566764974477978642, + "TEXT", + "#/texts/157", + "text", + 1.0 ], - "sref": "#/page-elements/209", - "text-order": 209, - "type": "paragraph" - }, - { - "bbox": [ - 50.16819763183594, - 126.91963195800781, - 552.5728759765625, - 159.62261962890625 + [ + "language", + 13566764974477978642, + "TEXT", + "#/texts/157", + "en", + 0.75 ], - "iref": "#/texts/172", - "name": "list-item", - "orig-order": 208, - "page": 14, - "span": [ - 0, - 292 + [ + "semantic", + 4925537010788978399, + "TEXT", + "#/texts/158", + "text", + 1.0 ], - "sref": "#/page-elements/210", - "text-order": 210, - "type": "paragraph" - }, - { - "bbox": [ - 50.49177551269531, - 103.90162658691406, - 553.5820922851562, - 124.90191650390625 + [ + "language", + 4925537010788978399, + "TEXT", + "#/texts/158", + "en", + 0.8899999856948853 ], - "iref": "#/texts/173", - "name": "list-item", - "orig-order": 209, - "page": 14, - "span": [ - 0, - 171 + [ + "semantic", + 16552665876195410077, + "TEXT", + "#/texts/159", + "text", + 0.9800000190734863 ], - "sref": "#/page-elements/211", - "text-order": 211, - "type": "paragraph" - }, - { - "bbox": [ - 50.74018859863281, - 92.39262390136719, - 436.9924011230469, - 101.68670654296875 + [ + "language", + 16552665876195410077, + "TEXT", + "#/texts/159", + "en", + 0.3199999928474426 ], - "iref": "#/texts/174", - "name": "list-item", - "orig-order": 210, - "page": 14, - "span": [ - 0, - 102 + [ + "semantic", + 17579390613842440572, + "TEXT", + "#/texts/160", + "text", + 0.800000011920929 ], - "sref": "#/page-elements/212", - "text-order": 212, - "type": "paragraph" - }, - { - "bbox": [ - 50.74017333984375, - 69.43157196044922, - 552.4933471679688, - 90.58172607421875 + [ + "language", + 17579390613842440572, + "TEXT", + "#/texts/160", + "en", + 0.7200000286102295 ], - "iref": "#/texts/175", - "name": "list-item", - "orig-order": 211, - "page": 14, - "span": [ - 0, - 156 + [ + "semantic", + 722212543953276862, + "TEXT", + "#/texts/161", + "text", + 0.9800000190734863 ], - "sref": "#/page-elements/213", - "text-order": 213, - "type": "paragraph" - }, - { - "bbox": [ - 50.37576675415039, - 46.413570404052734, - 553.1749267578125, - 67.59844970703125 + [ + "language", + 722212543953276862, + "TEXT", + "#/texts/161", + "en", + 0.9399999976158142 ], - "iref": "#/texts/176", - "name": "list-item", - "orig-order": 212, - "page": 14, - "span": [ - 0, - 184 + [ + "semantic", + 11085577343317113173, + "TEXT", + "#/texts/162", + "header", + 0.8199999928474426 ], - "sref": "#/page-elements/214", - "text-order": 214, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "language", + 11085577343317113173, + "TEXT", + "#/texts/162", + "en", + 0.699999988079071 ], - "iref": "#/texts/177", - "name": "text", - "orig-order": 215, - "page": 14, - "span": [ - 0, - 320 + [ + "semantic", + 1792096630133661292, + "TEXT", + "#/texts/163", + "reference", + 0.6000000238418579 ], - "sref": "#/page-elements/215", - "text-order": 215, - "type": "paragraph" - }, - { - "bbox": [ - 44.473201751708984, - 751.4635620117188, - 84.89160919189453, - 758.80615234375 + [ + "language", + 1792096630133661292, + "TEXT", + "#/texts/163", + "pl", + 0.1899999976158142 ], - "iref": "#/page-headers/18", - "name": "page-header", - "orig-order": 228, - "page": 15, - "span": [ - 0, - 13 + [ + "semantic", + 11462638369524745676, + "TEXT", + "#/texts/164", + "text", + 1.0 ], - "sref": "#/page-elements/216", - "text-order": 216, - "type": "page-header" - }, - { - "bbox": [ - 454.5641784667969, - 745.4571533203125, - 549.099365234375, - 761.863037109375 + [ + "language", + 11462638369524745676, + "TEXT", + "#/texts/164", + "en", + 0.9100000262260437 ], - "iref": "#/figures/8", - "name": "picture", - "orig-order": 229, - "page": 15, - "span": [ - 0, - 0 + [ + "semantic", + 16611805225457383637, + "TEXT", + "#/texts/165", + "reference", + 0.4300000071525574 ], - "sref": "#/page-elements/217", - "text-order": 217, - "type": "figure" - }, - { - "bbox": [ - 46.63217544555664, - 722.4282836914062, - 362.7469787597656, - 731.7239990234375 + [ + "language", + 16611805225457383637, + "TEXT", + "#/texts/165", + "en", + 0.8299999833106995 ], - "iref": "#/texts/178", - "name": "list-item", - "orig-order": 216, - "page": 15, - "span": [ - 0, - 85 + [ + "semantic", + 1531505125666754945, + "TEXT", + "#/texts/166", + "reference", + 0.6600000262260437 ], - "sref": "#/page-elements/218", - "text-order": 218, - "type": "paragraph" - }, - { - "bbox": [ - 44.78684997558594, - 699.5198364257812, - 549.7481689453125, - 720.4119262695312 + [ + "language", + 1531505125666754945, + "TEXT", + "#/texts/166", + "en", + 0.25999999046325684 ], - "iref": "#/texts/179", - "name": "list-item", - "orig-order": 217, - "page": 15, - "span": [ - 0, - 168 + [ + "semantic", + 15684389308320953629, + "TEXT", + "#/texts/167", + "reference", + 0.6600000262260437 ], - "sref": "#/page-elements/219", - "text-order": 219, - "type": "paragraph" - }, - { - "bbox": [ - 44.7877197265625, - 688.0108642578125, - 238.66644287109375, - 697.144287109375 + [ + "language", + 15684389308320953629, + "TEXT", + "#/texts/167", + "en", + 0.5899999737739563 ], - "iref": "#/texts/180", - "name": "list-item", - "orig-order": 218, - "page": 15, - "span": [ - 0, - 50 + [ + "semantic", + 14590754343934702701, + "TEXT", + "#/texts/168", + "header", + 1.0 ], - "sref": "#/page-elements/220", - "text-order": 220, - "type": "paragraph" - }, - { - "bbox": [ - 44.54977798461914, - 676.5018920898438, - 243.0414581298828, - 685.6976318359375 + [ + "language", + 14590754343934702701, + "TEXT", + "#/texts/168", + "en", + 0.33000001311302185 ], - "iref": "#/texts/181", - "name": "list-item", - "orig-order": 219, - "page": 15, - "span": [ - 0, - 52 + [ + "semantic", + 10480452763767134455, + "TEXT", + "#/texts/169", + "reference", + 0.8299999833106995 ], - "sref": "#/page-elements/221", - "text-order": 221, - "type": "paragraph" - }, - { - "bbox": [ - 44.7877197265625, - 653.5408935546875, - 548.7638549804688, - 674.378662109375 + [ + "language", + 10480452763767134455, + "TEXT", + "#/texts/169", + "en", + 0.5199999809265137 ], - "iref": "#/texts/182", - "name": "list-item", - "orig-order": 220, - "page": 15, - "span": [ - 0, - 145 + [ + "semantic", + 11866471329779366855, + "TEXT", + "#/texts/170", + "reference", + 0.949999988079071 ], - "sref": "#/page-elements/222", - "text-order": 222, - "type": "paragraph" - }, - { - "bbox": [ - 44.7877197265625, - 630.52294921875, - 548.82861328125, - 651.5768432617188 + [ + "language", + 11866471329779366855, + "TEXT", + "#/texts/170", + "en", + 0.5 ], - "iref": "#/texts/183", - "name": "list-item", - "orig-order": 221, - "page": 15, - "span": [ - 0, - 252 + [ + "semantic", + 6016885898370676469, + "TEXT", + "#/texts/171", + "reference", + 0.9200000166893005 ], - "sref": "#/page-elements/223", - "text-order": 223, - "type": "paragraph" - }, - { - "bbox": [ - 44.787750244140625, - 607.5050048828125, - 550.8438720703125, - 628.0836181640625 + [ + "language", + 6016885898370676469, + "TEXT", + "#/texts/171", + "en", + 0.699999988079071 ], - "iref": "#/texts/184", - "name": "list-item", - "orig-order": 222, - "page": 15, - "span": [ - 0, - 147 + [ + "semantic", + 13946275785662847920, + "TEXT", + "#/texts/172", + "reference", + 0.8199999928474426 ], - "sref": "#/page-elements/224", - "text-order": 224, - "type": "paragraph" - }, - { - "bbox": [ - 44.787750244140625, - 595.9960327148438, - 474.9829406738281, - 604.6593627929688 + [ + "language", + 13946275785662847920, + "TEXT", + "#/texts/172", + "en", + 0.6299999952316284 ], - "iref": "#/texts/185", - "name": "list-item", - "orig-order": 223, - "page": 15, - "span": [ - 0, - 114 + [ + "semantic", + 7693798302433367973, + "TEXT", + "#/texts/173", + "reference", + 0.9300000071525574 ], - "sref": "#/page-elements/225", - "text-order": 225, - "type": "paragraph" - }, - { - "bbox": [ - 44.786895751953125, - 573.0350341796875, - 548.8020629882812, - 592.54248046875 + [ + "language", + 7693798302433367973, + "TEXT", + "#/texts/173", + "en", + 0.5 ], - "iref": "#/texts/186", - "name": "list-item", - "orig-order": 224, - "page": 15, - "span": [ - 0, - 197 + [ + "semantic", + 3109792572574236398, + "TEXT", + "#/texts/174", + "reference", + 0.949999988079071 ], - "sref": "#/page-elements/226", - "text-order": 226, - "type": "paragraph" - }, - { - "bbox": [ - 44.786865234375, - 550.01708984375, - 548.7230834960938, - 569.8275146484375 + [ + "language", + 3109792572574236398, + "TEXT", + "#/texts/174", + "en", + 0.6899999976158142 ], - "iref": "#/texts/187", - "name": "list-item", - "orig-order": 225, - "page": 15, - "span": [ - 0, - 142 + [ + "semantic", + 8111170387462350170, + "TEXT", + "#/texts/175", + "reference", + 0.9200000166893005 ], - "sref": "#/page-elements/227", - "text-order": 227, - "type": "paragraph" - }, - { - "bbox": [ - 44.78601837158203, - 526.9991455078125, - 550.565185546875, - 546.7464599609375 + [ + "language", + 8111170387462350170, + "TEXT", + "#/texts/175", + "en", + 0.75 ], - "iref": "#/texts/188", - "name": "list-item", - "orig-order": 226, - "page": 15, - "span": [ - 0, - 176 + [ + "semantic", + 14682702346227170925, + "TEXT", + "#/texts/176", + "reference", + 0.8600000143051147 ], - "sref": "#/page-elements/228", - "text-order": 228, - "type": "paragraph" - }, - { - "bbox": [ - 57.16337966918945, - 468.5407409667969, - 529.73583984375, - 491.138916015625 + [ + "language", + 14682702346227170925, + "TEXT", + "#/texts/176", + "en", + 0.5 ], - "iref": "#/texts/189", - "name": "text", - "orig-order": 227, - "page": 15, - "span": [ - 0, - 216 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/177", + "text", + 0.8999999761581421 ], - "sref": "#/page-elements/229", - "text-order": 229, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/177", + "en", + 0.7799999713897705 ], - "iref": "#/texts/190", - "name": "text", - "orig-order": 230, - "page": 15, - "span": [ - 0, - 320 + [ + "semantic", + 11430385775112165283, + "TEXT", + "#/texts/178", + "reference", + 1.0 ], - "sref": "#/page-elements/230", - "text-order": 230, - "type": "paragraph" - } - ], - "page-footers": [ - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-footers/0", - "hash": 12400883656433726216, - "orig": "Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", - "prov": [ - { - "$ref": "#/page-elements/21" - } + [ + "language", + 11430385775112165283, + "TEXT", + "#/texts/178", + "en", + 0.6700000166893005 ], - "text": "Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", - "text-hash": 8372141692634509619, - "type": "page-footer" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-footers/1", - "hash": 10244115652970867690, - "orig": "wileyonlinelibrary.com/journal/ail2 1of15", - "prov": [ - { - "$ref": "#/page-elements/22" - } + [ + "semantic", + 5825495964576843004, + "TEXT", + "#/texts/179", + "reference", + 0.699999988079071 ], - "text": "wileyonlinelibrary.com/journal/ail2 1of15", - "text-hash": 6196517219334265105, - "type": "page-footer" - } - ], - "page-headers": [ - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/0", - "hash": 1841431076736563689, - "orig": "Received: 15 September 2020", - "prov": [ - { - "$ref": "#/page-elements/0" - } + [ + "language", + 5825495964576843004, + "TEXT", + "#/texts/179", + "en", + 0.5 ], - "text": "Received: 15 September 2020", - "text-hash": 16688788223092401940, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/1", - "hash": 3915126318503464014, - "orig": "Revised: 23 November 2020", - "prov": [ - { - "$ref": "#/page-elements/1" - } + [ + "semantic", + 5698421097735371040, + "TEXT", + "#/texts/180", + "text", + 0.5899999737739563 ], - "text": "Revised: 23 November 2020", - "text-hash": 1000711515083668085, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/2", - "hash": 1727876228376027809, - "orig": "Accepted: 25 November 2020", - "prov": [ - { - "$ref": "#/page-elements/2" - } + [ + "language", + 5698421097735371040, + "TEXT", + "#/texts/180", + "en", + 0.3100000023841858 ], - "text": "Accepted: 25 November 2020", - "text-hash": 17099649843681009628, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/3", - "hash": 4558221577189246496, - "orig": "DOI: 10.1002/ail2.20", - "prov": [ - { - "$ref": "#/page-elements/3" - } + [ + "semantic", + 5870535063942256428, + "TEXT", + "#/texts/181", + "reference", + 0.550000011920929 ], - "text": "DOI: 10.1002/ail2.20", - "text-hash": 348625343742526555, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/4", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/24" - } + [ + "language", + 5870535063942256428, + "TEXT", + "#/texts/181", + "en", + 0.44999998807907104 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/5", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/43" - } + [ + "semantic", + 18196767266655606709, + "TEXT", + "#/texts/182", + "reference", + 0.949999988079071 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/6", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/55" - } + [ + "language", + 18196767266655606709, + "TEXT", + "#/texts/182", + "en", + 0.6899999976158142 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/7", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/66" - } + [ + "semantic", + 3623403683642367845, + "TEXT", + "#/texts/183", + "reference", + 0.7799999713897705 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/8", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/76" - } + [ + "language", + 3623403683642367845, + "TEXT", + "#/texts/183", + "en", + 0.44999998807907104 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/9", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/92" - } + [ + "semantic", + 13936866850854297069, + "TEXT", + "#/texts/184", + "reference", + 0.9700000286102295 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/10", - "hash": 4361549266732238272, - "orig": "8of15", - "prov": [ - { - "$ref": "#/page-elements/106" - } + [ + "language", + 13936866850854297069, + "TEXT", + "#/texts/184", + "en", + 0.5899999737739563 ], - "text": "8of15", - "text-hash": 329104147727696635, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/11", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/107" - } + [ + "semantic", + 8497015665124263236, + "TEXT", + "#/texts/185", + "reference", + 0.9800000190734863 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/12", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/117" - } + [ + "language", + 8497015665124263236, + "TEXT", + "#/texts/185", + "en", + 0.4099999964237213 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/13", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/131" - } + [ + "semantic", + 15947529491299956047, + "TEXT", + "#/texts/186", + "reference", + 0.7900000214576721 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/14", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/145" - } + [ + "language", + 15947529491299956047, + "TEXT", + "#/texts/186", + "en", + 0.6200000047683716 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/15", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/156" - } + [ + "semantic", + 14843401725435831033, + "TEXT", + "#/texts/187", + "reference", + 0.6600000262260437 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/16", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/170" - } + [ + "language", + 14843401725435831033, + "TEXT", + "#/texts/187", + "en", + 0.6299999952316284 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/17", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/183" - } + [ + "semantic", + 16676439669743530711, + "TEXT", + "#/texts/188", + "reference", + 0.8899999856948853 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/18", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/216" - } + [ + "language", + 16676439669743530711, + "TEXT", + "#/texts/188", + "en", + 0.550000011920929 + ], + [ + "semantic", + 2986547206451163051, + "TEXT", + "#/texts/189", + "reference", + 0.699999988079071 + ], + [ + "language", + 2986547206451163051, + "TEXT", + "#/texts/189", + "en", + 0.5600000023841858 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - } - ], - "properties": { - "data": [ [ "semantic", + 18391264192891079539, + "TEXT", + "#/texts/190", "text", - 0.8700000047683716 + 0.8999999761581421 + ], + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/190", + "en", + 0.7799999713897705 ] ], "headers": [ "type", + "subj_hash", + "subj_name", + "subj_path", "label", "confidence" ] }, + "sref": "#", "tables": [ { "#-cols": 6, @@ -18323,6 +78598,7 @@ "$ref": "#/page-elements/175" } ], + "sref": "#/tables/0/captions/0", "text": "TABLE 1 Top-k accuracies validation of KG query results. Numbers represent the fraction in which any of the k highest ranked answers matches the expected answer", "text-hash": 14400864471075544784, "type": "caption" @@ -20011,6 +80287,7 @@ "$ref": "#/page-elements/176" } ], + "sref": "#/tables/0", "type": "table" } ], @@ -20019,25 +80296,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/0", "hash": 2144509362215609527, "orig": "LETTER", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/4" } ], + "sref": "#/texts/0", "text": "LETTER", "text-hash": 16381206540184854990, "type": "subtitle-level-1" @@ -20046,25 +80310,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/1", "hash": 16672720454366774824, "orig": "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/5" } ], + "sref": "#/texts/1", "text": "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", "text-hash": 4375081646508065875, "type": "subtitle-level-1" @@ -20073,25 +80324,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/2", "hash": 16781763356419781679, "orig": "Peter W. J. Staar", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.6100000143051147 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/6" } ], + "sref": "#/texts/2", "text": "Peter W. J. Staar", "text-hash": 4049808513512976982, "type": "subtitle-level-1" @@ -20100,25 +80338,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/3", "hash": 3352447812305581329, "orig": "|", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/7" } ], + "sref": "#/texts/3", "text": "|", "text-hash": 17767354399704232748, "type": "paragraph" @@ -20127,25 +80352,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/4", "hash": 14877831450145300436, "orig": "Michele Dolfi", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/8" } ], + "sref": "#/texts/4", "text": "Michele Dolfi", "text-hash": 1571808557594152175, "type": "subtitle-level-1" @@ -20154,25 +80366,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/5", "hash": 3352447812305581329, "orig": "|", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/9" } ], + "sref": "#/texts/5", "text": "|", "text-hash": 17767354399704232748, "type": "paragraph" @@ -20181,25 +80380,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/6", "hash": 13336841394978214677, "orig": "Christoph Auer", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.5899999737739563 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/10" } ], + "sref": "#/texts/6", "text": "Christoph Auer", "text-hash": 9737597816447750448, "type": "paragraph" @@ -20208,25 +80394,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/7", "hash": 15325526562897377208, "orig": "IBM Research, Rueschlikon, Switzerland", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/11" } ], + "sref": "#/texts/7", "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "paragraph" @@ -20235,25 +80408,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/8", "hash": 4017434568255781081, "orig": "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland. Email: taa@zurich.ibm.com", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.9300000071525574 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/12" } ], + "sref": "#/texts/8", "text": "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland. Email: taa@zurich.ibm.com", "text-hash": 961470147553945060, "type": "paragraph" @@ -20262,25 +80422,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/9", "hash": 8487024695951375934, "orig": "Abstract", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/13" } ], + "sref": "#/texts/9", "text": "Abstract", "text-hash": 14650447666970618949, "type": "subtitle-level-1" @@ -20289,25 +80436,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/10", "hash": 11695737263227886476, "orig": "Knowledge Graphs have been fast emerging as the de facto standard to model and explore knowledge in weakly structured data. Large corpora of documents constitute a source of weakly structured data of particular interest for both the academic and business world. Key examples include scientific publications, technical reports, manuals, patents, regulations, etc. Such corpora embed many facts that are elementary to critical decision making or enabling new discoveries. In this paper, we present a scalable cloud platform to create and serve Knowledge Graphs, which we named corpus processing service (CPS). Its purpose is to process large document corpora, extract the content and embedded facts, and ultimately represent these in a consistent knowledge graph that can be intuitively queried. To accomplish this, we use state-of-the-art natural language understanding models to extract entities and relationships from documents converted with our previously presented corpus conversion service platform. This pipeline is complemented with a newly developed graph engine which ensures extremely performant graph queries and provides powerful graph analytics capabilities. Both components are tightly integrated and can be easily consumed through REST APIs. Additionally, we provide user interfaces to control the data ingestion flow and formulate queries using a visual programming approach. The CPS platform is designed as a modular microservice system operating on Kubernetes clusters. Finally, we validate the quality of queries on our endto-end knowledge pipeline in a real-world application in the oil and gas industry.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/14" } ], + "sref": "#/texts/10", "text": "Knowledge Graphs have been fast emerging as the de facto standard to model and explore knowledge in weakly structured data. Large corpora of documents constitute a source of weakly structured data of particular interest for both the academic and business world. Key examples include scientific publications, technical reports, manuals, patents, regulations, etc. Such corpora embed many facts that are elementary to critical decision making or enabling new discoveries. In this paper, we present a scalable cloud platform to create and serve Knowledge Graphs, which we named corpus processing service (CPS). Its purpose is to process large document corpora, extract the content and embedded facts, and ultimately represent these in a consistent knowledge graph that can be intuitively queried. To accomplish this, we use state-of-the-art natural language understanding models to extract entities and relationships from documents converted with our previously presented corpus conversion service platform. This pipeline is complemented with a newly developed graph engine which ensures extremely performant graph queries and provides powerful graph analytics capabilities. Both components are tightly integrated and can be easily consumed through REST APIs. Additionally, we provide user interfaces to control the data ingestion flow and formulate queries using a visual programming approach. The CPS platform is designed as a modular microservice system operating on Kubernetes clusters. Finally, we validate the quality of queries on our endto-end knowledge pipeline in a real-world application in the oil and gas industry.", "text-hash": 9356514212507371703, "type": "paragraph" @@ -20316,25 +80450,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/11", "hash": 8500733160758672230, "orig": "KEYWORDS", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/15" } ], + "sref": "#/texts/11", "text": "KEYWORDS", "text-hash": 14650267244735310237, "type": "subtitle-level-1" @@ -20343,25 +80464,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/12", "hash": 4452030907228745864, "orig": "document processing, knowledge graph, semantic search", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8700000047683716 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/16" } ], + "sref": "#/texts/12", "text": "document processing, knowledge graph, semantic search", "text-hash": 243147861724212659, "type": "paragraph" @@ -20370,25 +80478,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/13", "hash": 11913688961435238004, "orig": "1 | INTRODUCTION", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/17" } ], + "sref": "#/texts/13", "text": "1 | INTRODUCTION", "text-hash": 8854903187485535375, "type": "subtitle-level-1" @@ -20397,25 +80492,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/14", "hash": 9977041563469582014, "orig": "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally. It is self-evident that this number has increased ever since. The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world. The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings. Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/18" } ], + "sref": "#/texts/14", "text": "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally. It is self-evident that this number has increased ever since. The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world. The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings. Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable.", "text-hash": 6468010182398147525, "type": "paragraph" @@ -20424,25 +80506,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/15", "hash": 4361549266817300114, "orig": "2of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/23" } ], + "sref": "#/texts/15", "text": "2of15", "text-hash": 329104147827159977, "type": "paragraph" @@ -20451,25 +80520,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/16", "hash": 8425126282903547933, "orig": "In a previous publication, we presented the corpus conversion service (CCS). 1 The CCS is a scalable cloud service, which leverages state-of-the-art machine learning to convert complex formats (eg, PDF, Word, and Bitmap) into a richly structured JSON representation of their content. As such, the CCS solves the first problem when confronted with a large corpus of documents, that is, make the content of the documents programmatically accessible. Examples of the latter would be ' List all images with their caption from the corpus or list all titles with their publication date. ' The second problem is to obviously search or explore the content of the documents in a large corpus. For this problem, we have developed the corpus processing service (CPS), which we present in this paper. The CPS is intended to create knowledge bases (KBs) from the converted JSON corpus and serve these KBs through in-memory knowledge graph stores. As such, the CPS is the natural extension of the CCS and has as an express purpose to make corpora of documents available for deep data exploration.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9399999976158142 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/25" } ], + "sref": "#/texts/16", "text": "In a previous publication, we presented the corpus conversion service (CCS). 1 The CCS is a scalable cloud service, which leverages state-of-the-art machine learning to convert complex formats (eg, PDF, Word, and Bitmap) into a richly structured JSON representation of their content. As such, the CCS solves the first problem when confronted with a large corpus of documents, that is, make the content of the documents programmatically accessible. Examples of the latter would be ' List all images with their caption from the corpus or list all titles with their publication date. ' The second problem is to obviously search or explore the content of the documents in a large corpus. For this problem, we have developed the corpus processing service (CPS), which we present in this paper. The CPS is intended to create knowledge bases (KBs) from the converted JSON corpus and serve these KBs through in-memory knowledge graph stores. As such, the CPS is the natural extension of the CCS and has as an express purpose to make corpora of documents available for deep data exploration.", "text-hash": 14716796829201051176, "type": "paragraph" @@ -20478,25 +80534,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/17", "hash": 16507313240019459642, "orig": "The purpose of CPS is to enable deep data exploration directly on large corpora. Here, we define deep data exploration as the capability to ingest large corpora of documents into a scalable service and detect, extract and combine facts contained in these corpora in order to make new discoveries or support critical decision making. It is key to understand that our goal of creating and querying Knowledge Graphs to enable deep data exploration goes beyond search in the spirit of rank and retrieve. Although search is by no means trivial, many state-of-the art solutions exist for this purpose. * We argue, however, that one needs query capabilities which allow for a combination of extracted facts and a fast, onthe-fly creation of new datasets to enable actual deep data exploration. Those datasets can then be used for further anal-", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/26" } ], + "sref": "#/texts/17", "text": "The purpose of CPS is to enable deep data exploration directly on large corpora. Here, we define deep data exploration as the capability to ingest large corpora of documents into a scalable service and detect, extract and combine facts contained in these corpora in order to make new discoveries or support critical decision making. It is key to understand that our goal of creating and querying Knowledge Graphs to enable deep data exploration goes beyond search in the spirit of rank and retrieve. Although search is by no means trivial, many state-of-the art solutions exist for this purpose. * We argue, however, that one needs query capabilities which allow for a combination of extracted facts and a fast, onthe-fly creation of new datasets to enable actual deep data exploration. Those datasets can then be used for further anal-", "text-hash": 4261190952114998337, "type": "paragraph" @@ -20505,25 +80548,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/18", "hash": 7900229969942228522, "orig": "ysis, which might lead to new discoveries or support decision making.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/27" } ], + "sref": "#/texts/18", "text": "ysis, which might lead to new discoveries or support decision making.", "text-hash": 12931323242585971793, "type": "paragraph" @@ -20532,25 +80562,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/19", "hash": 10081303962589804251, "orig": "To better distinguish this approach from conventional search, let us consider some example questions:", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/28" } ], + "sref": "#/texts/19", "text": "To better distinguish this approach from conventional search, let us consider some example questions:", "text-hash": 6426882630003520482, "type": "paragraph" @@ -20559,25 +80576,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/20", "hash": 12186698460099365002, "orig": "a. Definition of high temperature superconductor.", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.49000000953674316 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/29" } ], + "sref": "#/texts/20", "text": "a. Definition of high temperature superconductor.", "text-hash": 8586326920090596785, "type": "paragraph" @@ -20586,25 +80590,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/21", "hash": 14190244699299580163, "orig": "b. Publications of before year 2010.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/30" } ], + "sref": "#/texts/21", "text": "b. Publications of before year 2010.", "text-hash": 2034196463390881594, "type": "paragraph" @@ -20613,25 +80604,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/22", "hash": 1376279050886549305, "orig": "c. Maps of the Permian basin.", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.800000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/31" } ], + "sref": "#/texts/22", "text": "c. Maps of the Permian basin.", "text-hash": 17379120122282474820, "type": "paragraph" @@ -20640,25 +80618,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/23", "hash": 10155628801693924200, "orig": "d. Geological formations from the Miocene age with their depth, thickness, geographic location, and composition.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/32" } ], + "sref": "#/texts/23", "text": "d. Geological formations from the Miocene age with their depth, thickness, geographic location, and composition.", "text-hash": 6073268612165724563, "type": "paragraph" @@ -20667,25 +80632,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/24", "hash": 9107499507097280105, "orig": "e. List all high-Tc superconductors with their known crystallographic and material properties?", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.6100000143051147 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/33" } ], + "sref": "#/texts/24", "text": "e. List all high-Tc superconductors with their known crystallographic and material properties?", "text-hash": 14246074989165808788, "type": "paragraph" @@ -20694,25 +80646,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/25", "hash": 7248467870339433322, "orig": "Question (a) undoubtedly fits the classic search paradigm, since here one can expect a search engine to find a number sources with exact answers (ie, definitions). Likewise, question (b) can be easily answered through metadata based filter rules on a literature database. Question (c) already requires some extent of domain knowledge to be encoded in a model to accurately classify the relevance of all known maps to the query, at least assuming no manual curation effort has been done. Questions (d) and (e) ultimately impose query capabilities which are clearly infeasible to support through manual curation, and are very unlikely to be answered in any single data source. These questions require the system to return a more complex data structure (eg, a table in which the rows list the formations or materials while the columns contain their respective properties).", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/34" } ], + "sref": "#/texts/25", "text": "Question (a) undoubtedly fits the classic search paradigm, since here one can expect a search engine to find a number sources with exact answers (ie, definitions). Likewise, question (b) can be easily answered through metadata based filter rules on a literature database. Question (c) already requires some extent of domain knowledge to be encoded in a model to accurately classify the relevance of all known maps to the query, at least assuming no manual curation effort has been done. Questions (d) and (e) ultimately impose query capabilities which are clearly infeasible to support through manual curation, and are very unlikely to be answered in any single data source. These questions require the system to return a more complex data structure (eg, a table in which the rows list the formations or materials while the columns contain their respective properties).", "text-hash": 13592184899010298257, "type": "paragraph" @@ -20721,25 +80660,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/26", "hash": 13346892078888080449, "orig": "Concluding from the above examples, we define the following qualifying criteria for a system that supports deep data exploration on corpora:", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/35" } ], + "sref": "#/texts/26", "text": "Concluding from the above examples, we define the following qualifying criteria for a system that supports deep data exploration on corpora:", "text-hash": 9732050976592056956, "type": "paragraph" @@ -20748,25 +80674,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/27", "hash": 1118972765223422660, "orig": "1. It can answer queries by combining different data elements from different sources into a new data structure.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8299999833106995 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/36" } ], + "sref": "#/texts/27", "text": "1. It can answer queries by combining different data elements from different sources into a new data structure.", "text-hash": 15389200666968750079, "type": "paragraph" @@ -20775,25 +80688,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/28", "hash": 324023167304456371, "orig": "2. It supports (1) by creating a knowledge model from a controlled, unstructured corpus in a mostly unsupervised way. It may profit from, but not require any manually curated data.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/37" } ], + "sref": "#/texts/28", "text": "2. It supports (1) by creating a knowledge model from a controlled, unstructured corpus in a mostly unsupervised way. It may profit from, but not require any manually curated data.", "text-hash": 15837385157674255818, "type": "paragraph" @@ -20802,25 +80702,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/29", "hash": 4651508276868765576, "orig": "3. It may restrict supported queries to a specific domain (eg, a technical field).", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/38" } ], + "sref": "#/texts/29", "text": "3. It may restrict supported queries to a specific domain (eg, a technical field).", "text-hash": 11572955042484278451, "type": "paragraph" @@ -20829,25 +80716,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/30", "hash": 3052020526349962744, "orig": "To meet the objectives defined earlier, CPS implements and tightly integrates two essential components. The first component is a scalable Knowledge Graph creation pipeline, which is used to automatically process text, tables and images through state-of-the-art segmentation and natural language understanding (NLU) models and extract entities and relationships from them. The second component serves the created KG, enabling users to perform deep queries and advanced graph analytics in real time. 2 This is supported through an underlying, highly optimized graph engine we developed to specifically address requirements for deep data exploration.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.949999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/39" } ], + "sref": "#/texts/30", "text": "To meet the objectives defined earlier, CPS implements and tightly integrates two essential components. The first component is a scalable Knowledge Graph creation pipeline, which is used to automatically process text, tables and images through state-of-the-art segmentation and natural language understanding (NLU) models and extract entities and relationships from them. The second component serves the created KG, enabling users to perform deep queries and advanced graph analytics in real time. 2 This is supported through an underlying, highly optimized graph engine we developed to specifically address requirements for deep data exploration.", "text-hash": 18009286910191614723, "type": "paragraph" @@ -20856,25 +80730,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/31", "hash": 6725501529910185390, "orig": "It is worth noting that the CPS platform is a fully functioning cloud application that has been successfully deployed in multiple real-world scenarios in material science 3 and oil and gas industries. 4", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/40" } ], + "sref": "#/texts/31", "text": "It is worth noting that the CPS platform is a fully functioning cloud application that has been successfully deployed in multiple real-world scenarios in material science 3 and oil and gas industries. 4", "text-hash": 11737175762912836309, "type": "paragraph" @@ -20883,25 +80744,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/32", "hash": 14814111183601762276, "orig": "In the remainder of this paper, we discuss in detail the technical aspects and implementation details of the two main components of the CPS. In section 2, we present in depth how the platform extracts facts from corpora at a massive scale. In section 3, we go into detail of designing deep queries and show how we compute them in a very efficient", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/41" } ], + "sref": "#/texts/32", "text": "In the remainder of this paper, we discuss in detail the technical aspects and implementation details of the two main components of the CPS. In section 2, we present in depth how the platform extracts facts from corpora at a massive scale. In section 3, we go into detail of designing deep queries and show how we compute them in a very efficient", "text-hash": 1414786465877142815, "type": "paragraph" @@ -20910,25 +80758,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/33", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/42" } ], + "sref": "#/texts/33", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" @@ -20937,25 +80772,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/34", "hash": 4361549266681704196, "orig": "3of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/44" } ], + "sref": "#/texts/34", "text": "3of15", "text-hash": 329104147711745343, "type": "paragraph" @@ -20964,25 +80786,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/35", "hash": 8043608144162608258, "orig": "way with our high-performance graph engine. Later, in section 4, we will discuss in detail how both components are deployed and interacting on the cloud. Finally, in section 5, we present the complete system in a real world case study and benchmark its accuracy.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/45" } ], + "sref": "#/texts/35", "text": "way with our high-performance graph engine. Later, in section 4, we will discuss in detail how both components are deployed and interacting on the cloud. Finally, in section 5, we present the complete system in a real world case study and benchmark its accuracy.", "text-hash": 13076251584287625657, "type": "paragraph" @@ -20991,25 +80800,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/36", "hash": 7159467829896778939, "orig": "2 | SCALABLE KNOWLEDGE GRAPH CREATION", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.75 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/46" } ], + "sref": "#/texts/36", "text": "2 | SCALABLE KNOWLEDGE GRAPH CREATION", "text-hash": 13901790948575121858, "type": "subtitle-level-1" @@ -21018,25 +80814,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/37", "hash": 5617240156952377, "orig": "In CPS, a Knowledge Graph is defined as a collection of entities and their relationships forming the graphs nodes and edges. Entities can have a wide variety of types. A basic scenario includes types such as documents, document components, keywords, and authors. In addition, there can be more specific types tied to domain verticals, such as materials and properties in material science, or geological ages, formations, rocks, minerals, structures, etc., for oil and gas exploration. Relationships in the KG are strictly defined between the entities. Similar to the entities, the relationships are typed (' has-material-property ' or ' has-geological-age '). Also, relationships in the KG can be weighted, for example, to represent the trustworthiness of a fact that the relationship represents.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/47" } ], + "sref": "#/texts/37", "text": "In CPS, a Knowledge Graph is defined as a collection of entities and their relationships forming the graphs nodes and edges. Entities can have a wide variety of types. A basic scenario includes types such as documents, document components, keywords, and authors. In addition, there can be more specific types tied to domain verticals, such as materials and properties in material science, or geological ages, formations, rocks, minerals, structures, etc., for oil and gas exploration. Relationships in the KG are strictly defined between the entities. Similar to the entities, the relationships are typed (' has-material-property ' or ' has-geological-age '). Also, relationships in the KG can be weighted, for example, to represent the trustworthiness of a fact that the relationship represents.", "text-hash": 16151270992855323972, "type": "paragraph" @@ -21045,25 +80828,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/38", "hash": 3276490574487379366, "orig": "In typical cases, we start from a collection of documents in different formats. Sometimes, documents are available in semistructured, machine-interpretable formatssuchasJSON,XML,orHTML.However,inthevastmajority of cases this does not apply, especially for proprietary documents of companies and organizations. The latter are very often scanned or programmatic PDF documents. Using the CCS, 1 these types of documents are converted into structured JSON files. Those provide easy access to the meta-data (eg, title, abstract, references, authors) and the document body. The latter is structured by subtitles (of various levels), paragraphs, lists, tables (with internal row and column structures), figures, and linked captions. O n c et h ec o r p u si sp r e s n ti nas t r u c t u r e d,m a c h i n e processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely extraction, annotation,and aggregation. The inherent dependencies between these three tasks are defined through a directed acyclic graph (DAG). We willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss the details for each DF task.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/48" } ], + "sref": "#/texts/38", "text": "In typical cases, we start from a collection of documents in different formats. Sometimes, documents are available in semistructured, machine-interpretable formatssuchasJSON,XML,orHTML.However,inthevastmajority of cases this does not apply, especially for proprietary documents of companies and organizations. The latter are very often scanned or programmatic PDF documents. Using the CCS, 1 these types of documents are converted into structured JSON files. Those provide easy access to the meta-data (eg, title, abstract, references, authors) and the document body. The latter is structured by subtitles (of various levels), paragraphs, lists, tables (with internal row and column structures), figures, and linked captions. O n c et h ec o r p u si sp r e s n ti nas t r u c t u r e d,m a c h i n e processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely extraction, annotation,and aggregation. The inherent dependencies between these three tasks are defined through a directed acyclic graph (DAG). We willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss the details for each DF task.", "text-hash": 17496609193730656989, "type": "paragraph" @@ -21072,25 +80842,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/39", "hash": 3367451956962330174, "orig": "2.1 | DF tasks", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/49" } ], + "sref": "#/texts/39", "text": "2.1 | DF tasks", "text-hash": 17765848133863277637, "type": "subtitle-level-1" @@ -21099,25 +80856,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/40", "hash": 5509744459704235873, "orig": "In Figure 1, we sketch a minimal DF, in which each of the three tasks is used consecutively in order to generate entities and relationships for a generic KG. We will use Figure1toillustratethepurposeandimplementationof each DF task.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/50" } ], + "sref": "#/texts/40", "text": "In Figure 1, we sketch a minimal DF, in which each of the three tasks is used consecutively in order to generate entities and relationships for a generic KG. We will use Figure1toillustratethepurposeandimplementationof each DF task.", "text-hash": 10647094536020604316, "type": "paragraph" @@ -21126,25 +80870,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/42", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/53" } ], + "sref": "#/texts/41", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" @@ -21153,25 +80884,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/43", "hash": 4361549176688508574, "orig": "4of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/54" } ], + "sref": "#/texts/42", "text": "4of15", "text-hash": 329104066308221861, "type": "paragraph" @@ -21180,25 +80898,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/44", "hash": 12374482891052873875, "orig": "2.1.1 | Extraction", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.5699999928474426 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/56" } ], + "sref": "#/texts/43", "text": "2.1.1 | Extraction", "text-hash": 8758905122433574314, "type": "subtitle-level-1" @@ -21207,25 +80912,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/45", "hash": 2755397864153233778, "orig": "In an extraction task, we generate new data entities (eg, document components) from an original set of source entities (eg, documents). During this process, new links are created which connect these newly generated data entities to their original source entity. Typical examples of such extraction tasks are the extraction of abstracts, paragraphs, tables, or figures from the structured document files.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/57" } ], + "sref": "#/texts/44", "text": "In an extraction task, we generate new data entities (eg, document components) from an original set of source entities (eg, documents). During this process, new links are created which connect these newly generated data entities to their original source entity. Typical examples of such extraction tasks are the extraction of abstracts, paragraphs, tables, or figures from the structured document files.", "text-hash": 18305914688852125577, "type": "paragraph" @@ -21234,25 +80926,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/46", "hash": 4698316471746130896, "orig": "From a scalability point of view, this task is embarrassingly parallel, which makes it extremely easy to implement on loosely interconnected environments such as a cloud. We simply iterate in parallel over all source entities in the backend database, extract the desired components and then insert those components as new data entities back into the database. Extraction tasks have no internal synchronization points.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/58" } ], + "sref": "#/texts/45", "text": "From a scalability point of view, this task is embarrassingly parallel, which makes it extremely easy to implement on loosely interconnected environments such as a cloud. We simply iterate in parallel over all source entities in the backend database, extract the desired components and then insert those components as new data entities back into the database. Extraction tasks have no internal synchronization points.", "text-hash": 11458501594938683627, "type": "paragraph" @@ -21261,25 +80940,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/47", "hash": 11827267218358801841, "orig": "One particular benefit of this task is to make the query capability on the Knowledge Graph more fine grained by being able to provide provenance information on the result. For example, this would let the user explore all the paragraphs, tables, or figures that embed a certain fact.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/59" } ], + "sref": "#/texts/46", "text": "One particular benefit of this task is to make the query capability on the Knowledge Graph more fine grained by being able to provide provenance information on the result. For example, this would let the user explore all the paragraphs, tables, or figures that embed a certain fact.", "text-hash": 8932299863639200460, "type": "paragraph" @@ -21288,25 +80954,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/48", "hash": 6297710299044869343, "orig": "2.1.2 | Annotation", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.8299999833106995 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/60" } ], + "sref": "#/texts/47", "text": "2.1.2 | Annotation", "text-hash": 12444247655523627494, "type": "subtitle-level-1" @@ -21315,25 +80968,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/49", "hash": 7158837349769150986, "orig": "In the annotation task, we apply NLU methods to detect language entities and their relationships within a single data entity. Here, data entities can be as simple as a snippet of text (eg, a paragraph) or more complex structures such as tables or figures. The main goal of the annotation task is to obtain all relevant information from the data entity with regard to the domain of the corpus. Since different technical fields require different annotations, our annotation task is modular, allowing language entities to be annotated for material science, oil and gas, or more basic entities (eg, noun phrases, abbreviations, unit and values, etc.).", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/61" } ], + "sref": "#/texts/48", "text": "In the annotation task, we apply NLU methods to detect language entities and their relationships within a single data entity. Here, data entities can be as simple as a snippet of text (eg, a paragraph) or more complex structures such as tables or figures. The main goal of the annotation task is to obtain all relevant information from the data entity with regard to the domain of the corpus. Since different technical fields require different annotations, our annotation task is modular, allowing language entities to be annotated for material science, oil and gas, or more basic entities (eg, noun phrases, abbreviations, unit and values, etc.).", "text-hash": 13902418307602972721, "type": "paragraph" @@ -21342,25 +80982,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/50", "hash": 1150871476689677866, "orig": "From a technical perspective, the language entities are detected and annotated using multiple NLU methods, ranging from complex regular expressions \u2020 to LSTM networks. 5,6 We employ state-of-the-art NLU toolkits such as Spacy 7 or NLTK \u2021 to train and apply custom named entity recognition models. A detailed investigation of these NLU annotators unfortunately goes beyond of the scope of this paper. However, in Figure 2, we show the different types of named (geological) entities found in a paragraph by our oil and gas annotation model.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/62" } ], + "sref": "#/texts/49", "text": "From a technical perspective, the language entities are detected and annotated using multiple NLU methods, ranging from complex regular expressions \u2020 to LSTM networks. 5,6 We employ state-of-the-art NLU toolkits such as Spacy 7 or NLTK \u2021 to train and apply custom named entity recognition models. A detailed investigation of these NLU annotators unfortunately goes beyond of the scope of this paper. However, in Figure 2, we show the different types of named (geological) entities found in a paragraph by our oil and gas annotation model.", "text-hash": 15370812655802342481, "type": "paragraph" @@ -21369,25 +80996,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/51", "hash": 5163702913945903725, "orig": "In Listing 1, we also show an excerpt of how the annotations (both language entities and relationships) are stored in the backend. It is noteworthy here that relationships are stored as (weighted) links between two entity references. \u00a7 The usage of references reduces data duplication and more importantly ensures that the relationships are always defined between two known entities in the KG. The latter simplifies the aggregation of the relationships significantly, since no new entities need to be created in the KG in order to aggregate the relationships (see section 2.1.4).", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/63" } ], + "sref": "#/texts/50", "text": "In Listing 1, we also show an excerpt of how the annotations (both language entities and relationships) are stored in the backend. It is noteworthy here that relationships are stored as (weighted) links between two entity references. \u00a7 The usage of references reduces data duplication and more importantly ensures that the relationships are always defined between two known entities in the KG. The latter simplifies the aggregation of the relationships significantly, since no new entities need to be created in the KG in order to aggregate the relationships (see section 2.1.4).", "text-hash": 11348986383696847000, "type": "paragraph" @@ -21396,25 +81010,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/52", "hash": 5462319091745771382, "orig": "FIGURE 2 Illustration of various detected language entities in a particularly rich snippet of an AAPG abstract. 8 The language entities here are all related to geological concepts in the domain of oil and gas exploration", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.5899999737739563 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/64" } ], + "sref": "#/texts/51", "text": "FIGURE 2 Illustration of various detected language entities in a particularly rich snippet of an AAPG abstract. 8 The language entities here are all related to geological concepts in the domain of oil and gas exploration", "text-hash": 11050304000116997517, "type": "paragraph" @@ -21423,25 +81024,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/53", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/65" } ], + "sref": "#/texts/52", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" @@ -21450,25 +81038,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/54", "hash": 958124839653591304, "orig": "LISTING 1 Excerpt of the annotated abstract from an AAPG paper 8 with its original text and the detected entities and relationships. Note that relationships are typed (encoded in the field name) and weighted. The weight reflects the confidence of the language annotation model during extraction. Relationships are always defined on detected entities, and will therefore use references defining a link between two entities", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/68" } ], + "sref": "#/texts/53", "text": "LISTING 1 Excerpt of the annotated abstract from an AAPG paper 8 with its original text and the detected entities and relationships. Note that relationships are typed (encoded in the field name) and weighted. The weight reflects the confidence of the language annotation model during extraction. Relationships are always defined on detected entities, and will therefore use references defining a link between two entities", "text-hash": 15194258930241746739, "type": "paragraph" @@ -21477,25 +81052,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/55", "hash": 1448405324616602032, "orig": "From a scaling perspective, this task is again embarrassingly parallel. Unlike the extraction task, the annotation task is not creating new data entities, but rather appending new data associated with an existing data entity. We simply apply the desired entity and relationship annotators on all document components (paragraphs, tables, etc.) in parallel by distributing the operations on all available compute resources. Annotation tasks have no internal synchronization points. From a corpus of about 100 000 documents, we typically extract about 3 million paragraphs. Assuming unlimited resources, the annotation task could be distributed to potentially 3 million independent workers.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/69" } ], + "sref": "#/texts/54", "text": "From a scaling perspective, this task is again embarrassingly parallel. Unlike the extraction task, the annotation task is not creating new data entities, but rather appending new data associated with an existing data entity. We simply apply the desired entity and relationship annotators on all document components (paragraphs, tables, etc.) in parallel by distributing the operations on all available compute resources. Annotation tasks have no internal synchronization points. From a corpus of about 100 000 documents, we typically extract about 3 million paragraphs. Assuming unlimited resources, the annotation task could be distributed to potentially 3 million independent workers.", "text-hash": 17018759417884348107, "type": "paragraph" @@ -21504,25 +81066,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/56", "hash": 2617775076168299948, "orig": "2.1.3 | Aggregation of entities", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.800000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/70" } ], + "sref": "#/texts/55", "text": "2.1.3 | Aggregation of entities", "text-hash": 18150799209915986647, "type": "subtitle-level-1" @@ -21531,25 +81080,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/57", "hash": 13974986056043304735, "orig": "The aggregation task for entities is similar to an extraction task, in the sense that we create new entities and link them each to the source they were mentioned in. In addition to extraction, the entity aggregation task also applies a similarity metric \u00b6 between the entities during extraction. This similarity metric will define if two entities refer to the same language concept and thus need to be represented by a single entity in the KG, rather than remaining separated. In Figure 1, we have illustrated the aggregation task for two types of entities across many different document components. These entity types could be for example materials and properties or geological formations and geological ages. The links connecting the new entities to their source entity are weighted according to the frequency of the match, that is, we set a higher weight if the language entity has been found multiple times. From an implementation point of view, the aggregation task for entities is nontrivial. In distributed computing, it corresponds to a reduction operation. Our implementation distributes the iteration of the source elements among all available computational resources. The aggregation is first performed in a local buffer, which is then synchronized with the backend database only when it reaches a maximum size. The synchronization step is a simple atomic update into an existing (or a newly created) database object. The synchronization for updates from each worker task does not collide with the others.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/71" } ], + "sref": "#/texts/56", "text": "The aggregation task for entities is similar to an extraction task, in the sense that we create new entities and link them each to the source they were mentioned in. In addition to extraction, the entity aggregation task also applies a similarity metric \u00b6 between the entities during extraction. This similarity metric will define if two entities refer to the same language concept and thus need to be represented by a single entity in the KG, rather than remaining separated. In Figure 1, we have illustrated the aggregation task for two types of entities across many different document components. These entity types could be for example materials and properties or geological formations and geological ages. The links connecting the new entities to their source entity are weighted according to the frequency of the match, that is, we set a higher weight if the language entity has been found multiple times. From an implementation point of view, the aggregation task for entities is nontrivial. In distributed computing, it corresponds to a reduction operation. Our implementation distributes the iteration of the source elements among all available computational resources. The aggregation is first performed in a local buffer, which is then synchronized with the backend database only when it reaches a maximum size. The synchronization step is a simple atomic update into an existing (or a newly created) database object. The synchronization for updates from each worker task does not collide with the others.", "text-hash": 2253911354578933030, "type": "paragraph" @@ -21558,25 +81094,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/58", "hash": 5985285694705576020, "orig": "2.1.4 | Aggregation of relationships", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.8199999928474426 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/72" } ], + "sref": "#/texts/57", "text": "2.1.4 | Aggregation of relationships", "text-hash": 12765605759878485615, "type": "subtitle-level-1" @@ -21585,25 +81108,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/59", "hash": 11235296141350659290, "orig": "The aggregation of relationships introduces new links between the entities that were aggregated in the previous aggregation operation. In Figure 1, this task is depicted as the last operation, where entities with an annotated relationship are explicitly linked together. For example, we create an edge between the Egret-Hibernia Petroleum System and Jeanne D'Arc Basin from Listing 1.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/73" } ], + "sref": "#/texts/58", "text": "The aggregation of relationships introduces new links between the entities that were aggregated in the previous aggregation operation. In Figure 1, this task is depicted as the last operation, where entities with an annotated relationship are explicitly linked together. For example, we create an edge between the Egret-Hibernia Petroleum System and Jeanne D'Arc Basin from Listing 1.", "text-hash": 7583169921155047905, "type": "paragraph" @@ -21612,25 +81122,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/60", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/74" } ], + "sref": "#/texts/59", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" @@ -21639,25 +81136,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/61", "hash": 4361549266576336732, "orig": "6of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/75" } ], + "sref": "#/texts/60", "text": "6of15", "text-hash": 329104147615819111, "type": "paragraph" @@ -21666,25 +81150,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/62", "hash": 5771309285006424458, "orig": "Similar to the aggregation of entities, the aggregation task for relationships is a reduction operation. Two independent document components could describe the same relationship between two entities. To minimize the synchronization lookup operation with the backend database, this task also utilizes a local buffer which accumulates the changes to be committed to the KG until the maximum size is reached. This approach allows to distribute the computation among all the source document components and performs very few blocking operations in the backend database.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/77" } ], + "sref": "#/texts/61", "text": "Similar to the aggregation of entities, the aggregation task for relationships is a reduction operation. Two independent document components could describe the same relationship between two entities. To minimize the synchronization lookup operation with the backend database, this task also utilizes a local buffer which accumulates the changes to be committed to the KG until the maximum size is reached. This approach allows to distribute the computation among all the source document components and performs very few blocking operations in the backend database.", "text-hash": 12691372718925440689, "type": "paragraph" @@ -21693,25 +81164,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/63", "hash": 5371685212527510397, "orig": "2.2 | Data flows", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.949999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/78" } ], + "sref": "#/texts/62", "text": "2.2 | Data flows", "text-hash": 11140938221338345864, "type": "subtitle-level-1" @@ -21720,25 +81178,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/64", "hash": 7817257645383866853, "orig": "The purpose of a DF is to provide an execution plan for the task types detailed above in a meaningful order to generate or update a specific KG. When instantiating a DF, one has the possibility to define in a declarative way:", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9399999976158142 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/79" } ], + "sref": "#/texts/63", "text": "The purpose of a DF is to provide an execution plan for the task types detailed above in a meaningful order to generate or update a specific KG. When instantiating a DF, one has the possibility to define in a declarative way:", "text-hash": 12955841367339550496, "type": "paragraph" @@ -21747,25 +81192,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/65", "hash": 2929626768872004841, "orig": "1. Which document components should be extracted from a converted corpus to form source entities (eg, extract all paragraphs, tables, figures and captions from the AAPG articles)?", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/80" } ], + "sref": "#/texts/64", "text": "1. Which document components should be extracted from a converted corpus to form source entities (eg, extract all paragraphs, tables, figures and captions from the AAPG articles)?", "text-hash": 17906500337671162388, "type": "paragraph" @@ -21774,25 +81206,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/66", "hash": 15879756297712818143, "orig": "2. Which annotator model(s) to use on which type of source entity (eg, run the geology or material science annotators on paragraphs)?", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/81" } ], + "sref": "#/texts/65", "text": "2. Which annotator model(s) to use on which type of source entity (eg, run the geology or material science annotators on paragraphs)?", "text-hash": 2573988876245521638, "type": "paragraph" @@ -21801,25 +81220,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/67", "hash": 16116531546352845311, "orig": "3. Which entity and relationship aggregations to perform on which set of annotated language entities?", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8899999856948853 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/82" } ], + "sref": "#/texts/66", "text": "3. Which entity and relationship aggregations to perform on which set of annotated language entities?", "text-hash": 2702000589258555142, "type": "paragraph" @@ -21828,25 +81234,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/68", "hash": 9541434157786316356, "orig": "The DFs can thus be seen as blueprints for processing the corpus into a defined graph topology. Notably, our implementation of DFs and their tasks retains the flexibility of processing not only source documents of a well-known data schema such as from CCS, but virtually any structure that can be transformed to a JSON representation, including data entities from precurated databases. We designed the CPS platform to support export and import of DFs on entirely new datasets without the burden of recreating it from scratch.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/83" } ], + "sref": "#/texts/67", "text": "The DFs can thus be seen as blueprints for processing the corpus into a defined graph topology. Notably, our implementation of DFs and their tasks retains the flexibility of processing not only source documents of a well-known data schema such as from CCS, but virtually any structure that can be transformed to a JSON representation, including data entities from precurated databases. We designed the CPS platform to support export and import of DFs on entirely new datasets without the burden of recreating it from scratch.", "text-hash": 6610972392363355263, "type": "paragraph" @@ -21855,25 +81248,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/69", "hash": 997682002692959482, "orig": "Our backend engine can exploit the DAG defined through the DF to massively distribute the individual tasks on all compute resources, because independent branches of the DAG each containing a chain of tasks can execute in parallel. The achievable level of parallelism changes throughout the execution. A practical example is a DF which extracts paragraphs and abstracts from all documents in the corpus, then annotates them and finally aggregates all entities. Here, the extraction tasks are distributed only over all documents; then, in the annotation tasks, we increase the parallelism to all document components. Any synchronization points thus can be pushed back into the aggregation tasks.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/84" } ], + "sref": "#/texts/68", "text": "Our backend engine can exploit the DAG defined through the DF to massively distribute the individual tasks on all compute resources, because independent branches of the DAG each containing a chain of tasks can execute in parallel. The achievable level of parallelism changes throughout the execution. A practical example is a DF which extracts paragraphs and abstracts from all documents in the corpus, then annotates them and finally aggregates all entities. Here, the extraction tasks are distributed only over all documents; then, in the annotation tasks, we increase the parallelism to all document components. Any synchronization points thus can be pushed back into the aggregation tasks.", "text-hash": 15235788623540001281, "type": "paragraph" @@ -21882,25 +81262,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/70", "hash": 11590138063543342276, "orig": "3 | DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.8799999952316284 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/85" } ], + "sref": "#/texts/69", "text": "3 | DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS", "text-hash": 9254996552431571455, "type": "subtitle-level-1" @@ -21909,25 +81276,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/71", "hash": 16380310806374538602, "orig": "We will now look into the requirements to perform deep data exploration on a populated Knowledge Graph. A deep data exploration requires two fundamental capabilities:", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/86" } ], + "sref": "#/texts/70", "text": "We will now look into the requirements to perform deep data exploration on a populated Knowledge Graph. A deep data exploration requires two fundamental capabilities:", "text-hash": 4676441280076073873, "type": "paragraph" @@ -21936,25 +81290,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/72", "hash": 5393976293631695754, "orig": "1. perform deep queries on the graph, that is, queries that require multi-hop traversals and", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8799999952316284 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/87" } ], + "sref": "#/texts/71", "text": "1. perform deep queries on the graph, that is, queries that require multi-hop traversals and", "text-hash": 11127633169729292465, "type": "paragraph" @@ -21963,25 +81304,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/73", "hash": 1988335831916069382, "orig": "2. perform graph analytics on the full graph or subsets of it on-the-fly.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.6200000047683716 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/88" } ], + "sref": "#/texts/72", "text": "2. perform graph analytics on the full graph or subsets of it on-the-fly.", "text-hash": 16834701212347777085, "type": "paragraph" @@ -21990,25 +81318,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/74", "hash": 5147764798816678886, "orig": "Deep queries are essential to dynamically combine independent facts together in the given query context. This would apply for example to explorational queries aimed to characterize petroleum system elements, as detailed in our case study (see section 5). Graph analytics can further reveal hidden structure in the KG topology. Examples of advanced graphanalytical operations are page rank, node centralities, 9,10 node clustering, spectral analysis, and label propagation.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8600000143051147 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/89" } ], + "sref": "#/texts/73", "text": "Deep queries are essential to dynamically combine independent facts together in the given query context. This would apply for example to explorational queries aimed to characterize petroleum system elements, as detailed in our case study (see section 5). Graph analytics can further reveal hidden structure in the KG topology. Examples of advanced graphanalytical operations are page rank, node centralities, 9,10 node clustering, spectral analysis, and label propagation.", "text-hash": 11297301064675504413, "type": "paragraph" @@ -22017,25 +81332,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/75", "hash": 285583876932865368, "orig": "Both deep queries and graph analytics have in common that they are inherently expensive to compute on conventional graph databases, due to a rapid expansion of the number of visited nodes as a function of the graph-traversal depth. This is a major obstacle in providing reasonable time-to-solution in the aforementioned cases. Virtually all established graph database products on the market today ** fall victim to this, as was also reported in multiple sources. 11,12 Due to the poor performance we observed with available graph databases, we developed a new graph engine for the CPS platform. This graph engine is able to execute advanced graph-analytics 2 as well as evaluate deep queries with multi-hop traversals on large graphs (>1B edges) extremely fast.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/90" } ], + "sref": "#/texts/74", "text": "Both deep queries and graph analytics have in common that they are inherently expensive to compute on conventional graph databases, due to a rapid expansion of the number of visited nodes as a function of the graph-traversal depth. This is a major obstacle in providing reasonable time-to-solution in the aforementioned cases. Virtually all established graph database products on the market today ** fall victim to this, as was also reported in multiple sources. 11,12 Due to the poor performance we observed with available graph databases, we developed a new graph engine for the CPS platform. This graph engine is able to execute advanced graph-analytics 2 as well as evaluate deep queries with multi-hop traversals on large graphs (>1B edges) extremely fast.", "text-hash": 16231538415772072803, "type": "paragraph" @@ -22044,25 +81346,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/76", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/91" } ], + "sref": "#/texts/75", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" @@ -22071,25 +81360,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/77", "hash": 4361549257370278754, "orig": "7of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/93" } ], + "sref": "#/texts/76", "text": "7of15", "text-hash": 329104161989101977, "type": "paragraph" @@ -22098,25 +81374,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/78", "hash": 13183039880198077038, "orig": "In the remaining part of this section, we elaborate on our newly developed graph engine. In section 3.1, we discuss the implementation design. In section 3.2, we discuss performance results and compare it to Neo4J. Later, in section 3.3, we will explain how the deep queries are formulated and evaluated in the graph engine.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/94" } ], + "sref": "#/texts/77", "text": "In the remaining part of this section, we elaborate on our newly developed graph engine. In section 3.1, we discuss the implementation design. In section 3.2, we discuss performance results and compare it to Neo4J. Later, in section 3.3, we will explain how the deep queries are formulated and evaluated in the graph engine.", "text-hash": 10251595290936699029, "type": "paragraph" @@ -22125,25 +81388,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/79", "hash": 13428900458866068249, "orig": "3.1 | Design of the graph engine", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.800000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/95" } ], + "sref": "#/texts/78", "text": "3.1 | Design of the graph engine", "text-hash": 9938197928077211940, "type": "subtitle-level-1" @@ -22152,25 +81402,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/80", "hash": 1430911655724119030, "orig": "In computer science, two prevalent implementation schemes for graphs have emerged, one using adjacency lists and one relying on adjacency matrices. 13,14 In the adjacency list format, every node is essentially an object which contains a set of indices representing its neighbors. \u2020\u2020 The edges are therefore stored as a property of the node. In the adjacency matrix approach, all nodes obtain an identifier (typically an unsigned integer) and the edges are stored as a list of nodeidentifier tuples.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/96" } ], + "sref": "#/texts/79", "text": "In computer science, two prevalent implementation schemes for graphs have emerged, one using adjacency lists and one relying on adjacency matrices. 13,14 In the adjacency list format, every node is essentially an object which contains a set of indices representing its neighbors. \u2020\u2020 The edges are therefore stored as a property of the node. In the adjacency matrix approach, all nodes obtain an identifier (typically an unsigned integer) and the edges are stored as a list of nodeidentifier tuples.", "text-hash": 17396562708416737549, "type": "paragraph" @@ -22179,25 +81416,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/81", "hash": 13770706479324480755, "orig": "It is commonly known that most graph operations can be translated into matrix-operations using linear algebra. 13 For example, consider the graph-traversal V ! A W, in which we start from a set of nodes V and traverse the edge A in order to obtain a new set of nodes W. This can be directly translated into linear algebra as", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8899999856948853 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/97" } ], + "sref": "#/texts/80", "text": "It is commonly known that most graph operations can be translated into matrix-operations using linear algebra. 13 For example, consider the graph-traversal V ! A W, in which we start from a set of nodes V and traverse the edge A in order to obtain a new set of nodes W. This can be directly translated into linear algebra as", "text-hash": 9596444718520353290, "type": "paragraph" @@ -22206,25 +81430,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/82", "hash": 11165481757050847950, "orig": "w $^{!}$= Av ! with v $^{!}$$_{i}$= 1 if node i \\b V 0 if node i = 2 V , GLYPH \u00f0 1 \u00de", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/98" } ], + "sref": "#/texts/81", "text": "w $^{!}$= Av ! with v $^{!}$$_{i}$= 1 if node i \\b V 0 if node i = 2 V , GLYPH \u00f0 1 \u00de", "text-hash": 7657471412122468341, "type": "equation" @@ -22233,25 +81444,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/83", "hash": 9572077971492738329, "orig": "and with A being the adjacency matrix representation of the edge A. Translating single graph-traversals into linear algebra operations significantly simplifies the job of deeper graph traversals. For example, to obtain the k-order neighborhood of node set V, one simply needs to evaluate Equation (1) k times recursively, as in", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/99" } ], + "sref": "#/texts/82", "text": "and with A being the adjacency matrix representation of the edge A. Translating single graph-traversals into linear algebra operations significantly simplifies the job of deeper graph traversals. For example, to obtain the k-order neighborhood of node set V, one simply needs to evaluate Equation (1) k times recursively, as in", "text-hash": 6656818579934057252, "type": "paragraph" @@ -22260,25 +81458,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/84", "hash": 14951391138799557075, "orig": "w $^{!}$= A$^{k}$v $^{!}$= AA \u2026 Av ! GLYPHGLYPH GLYPH GLYPH GLYPH GLYPH : \u00f0 2 \u00de", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/100" } ], + "sref": "#/texts/83", "text": "w $^{!}$= A$^{k}$v $^{!}$= AA \u2026 Av ! GLYPHGLYPH GLYPH GLYPH GLYPH GLYPH : \u00f0 2 \u00de", "text-hash": 1498163960925914858, "type": "equation" @@ -22287,25 +81472,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/85", "hash": 16602156009514813718, "orig": "Therefore, deep queries can be implemented efficiently as long as Equation (1) can be evaluated efficiently. Over the past decades, lots of research has been conducted in the High Performance Computing community on the acceleration and parallelization of Equation (1) in the context of graphs. In this context, the matrix A is sparse and the linear operation of Equation (1) is referred to as a sparse matrix vector multiplication (SpMV), for which highly optimized implementations have been developed. 15,16 Notably, most advanced graph-analytical operations can be formulated using SpMV operations. The most trivial case is page-rank, in which one recursively executes Equation (1) in combination with a renormalization until w ! is equal to v $^{!}$. In our previous work, 2 we have also shown in detail that advanced graph-analytical operations such as node centralities and spectral analysis of the graph can be done effectively with only SpMV operations.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/101" } ], + "sref": "#/texts/84", "text": "Therefore, deep queries can be implemented efficiently as long as Equation (1) can be evaluated efficiently. Over the past decades, lots of research has been conducted in the High Performance Computing community on the acceleration and parallelization of Equation (1) in the context of graphs. In this context, the matrix A is sparse and the linear operation of Equation (1) is referred to as a sparse matrix vector multiplication (SpMV), for which highly optimized implementations have been developed. 15,16 Notably, most advanced graph-analytical operations can be formulated using SpMV operations. The most trivial case is page-rank, in which one recursively executes Equation (1) in combination with a renormalization until w ! is equal to v $^{!}$. In our previous work, 2 we have also shown in detail that advanced graph-analytical operations such as node centralities and spectral analysis of the graph can be done effectively with only SpMV operations.", "text-hash": 4445641728881669933, "type": "paragraph" @@ -22314,25 +81486,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/86", "hash": 7162849562576593449, "orig": "Since both deep queries and advanced graph analytics hugely benefit from a fast SpMV kernel, we have opted to design the graph engine in the CPS platform to work entirely with the adjacency matrix format.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.7900000214576721 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/102" } ], + "sref": "#/texts/85", "text": "Since both deep queries and advanced graph analytics hugely benefit from a fast SpMV kernel, we have opted to design the graph engine in the CPS platform to work entirely with the adjacency matrix format.", "text-hash": 13884895358995816532, "type": "paragraph" @@ -22341,25 +81500,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/87", "hash": 15385417954505503552, "orig": "3.2 | Memory architecture and performance optimization", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/103" } ], + "sref": "#/texts/86", "text": "3.2 | Memory architecture and performance optimization", "text-hash": 3140380205981200763, "type": "subtitle-level-1" @@ -22368,25 +81514,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/88", "hash": 10815650641518265876, "orig": "Both adjacency lists and adjacency matrices-based graph implementations have specific advantages and disadvantages. The adjacency list format is very well suited for node-centric operations since it exploits data-locality for local graph operations, such as first order traversals. However, it proves suboptimal for global scale graph operations, which are required for deep queries and the advanced graph analytics. Here, one typically has to perform graph-traversals starting from many (or even all) nodes and accumulating the weight in the resulting nodes. In an adjacency list format, this often leads to many cache misses during execution, resulting in low performance. Furthermore, parallelizing global graph-traversals in the adjacency list format suffers significantly from concurrent write conflicts between threads during execution. In the adjacency matrix format, these problems are not encountered. The graph-traversals can be directly translated into a SpMV or even a sparse-matrix sparse-vector multiplication (SpMSpV). It has also been well established how to execute the SpMV effectively in a multithreaded fashion, and how to minimize cache-misses by applying a clever sorting of the tuples list. 17", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9100000262260437 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/104" } ], + "sref": "#/texts/87", "text": "Both adjacency lists and adjacency matrices-based graph implementations have specific advantages and disadvantages. The adjacency list format is very well suited for node-centric operations since it exploits data-locality for local graph operations, such as first order traversals. However, it proves suboptimal for global scale graph operations, which are required for deep queries and the advanced graph analytics. Here, one typically has to perform graph-traversals starting from many (or even all) nodes and accumulating the weight in the resulting nodes. In an adjacency list format, this often leads to many cache misses during execution, resulting in low performance. Furthermore, parallelizing global graph-traversals in the adjacency list format suffers significantly from concurrent write conflicts between threads during execution. In the adjacency matrix format, these problems are not encountered. The graph-traversals can be directly translated into a SpMV or even a sparse-matrix sparse-vector multiplication (SpMSpV). It has also been well established how to execute the SpMV effectively in a multithreaded fashion, and how to minimize cache-misses by applying a clever sorting of the tuples list. 17", "text-hash": 7939832404963099695, "type": "paragraph" @@ -22395,25 +81528,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/89", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/105" } ], + "sref": "#/texts/88", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" @@ -22422,25 +81542,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/91", "hash": 12004249365408683930, "orig": "To illustrate the advantages of the adjacency matrix format for our needs, we show the time-to-solution (TTS) for queries with increasing order of traversals for Neo4J \u2021\u2021 and our graph engine in Figure 3. We computed a k-hop traversal query on the graph500 \u00a7\u00a7 (64M edges) and twitter-graph \u00b6\u00b6 (1.5B edges). Two important observations can be made. Firstly, our graph engine is able to run easily third, fourth, and even higher-order graph traversals. With Neo4J, this proves very difficult, as the TTS grows upwards of 1 hour. Secondly, our graph engine shows minimal variance in the TTS between all runs of the k-order graph-traversals. This is in stark contrast to Neo4J, where the TTS strongly depends on which node(s) one starts from.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/110" } ], + "sref": "#/texts/89", "text": "To illustrate the advantages of the adjacency matrix format for our needs, we show the time-to-solution (TTS) for queries with increasing order of traversals for Neo4J \u2021\u2021 and our graph engine in Figure 3. We computed a k-hop traversal query on the graph500 \u00a7\u00a7 (64M edges) and twitter-graph \u00b6\u00b6 (1.5B edges). Two important observations can be made. Firstly, our graph engine is able to run easily third, fourth, and even higher-order graph traversals. With Neo4J, this proves very difficult, as the TTS grows upwards of 1 hour. Secondly, our graph engine shows minimal variance in the TTS between all runs of the k-order graph-traversals. This is in stark contrast to Neo4J, where the TTS strongly depends on which node(s) one starts from.", "text-hash": 9124629550221661345, "type": "paragraph" @@ -22449,25 +81556,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/92", "hash": 7223381657047466215, "orig": "Another big advantage of using the adjacency matrix format is that we can exploit advanced compression methods 18 such as CSR or blocked COO. This reduces significantly the memory footprint of the graph and allows bigger graphs to be hosted entirely in-memory. In our case, we have opted to represent the edges by blocked matrices of a fixed size, in which each block matrix is of type COO. We chose the size of the block-matrix to be 2 16 = 65 536, allowing a pair of indices to be compactly represented by two unsigned short integers. Consequently, an edge has a memory footprint of only 4 bytes (equivalent to a single 32-bit integer), while a weighted edge a footprint of 8 bytes. *** This is a significant reduction in memory footprint compared to Neo4J graph databases, which use 33 bytes for unweighted edges $^{\u2020\u2020\u2020}$). Consequently, we can host graphs of close to 8 billion edges on a virtual machine with 32 GB of free memory, and even close to one trillion edges on a bare-metal POWER9 node with 4 TB of memory.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/111" } ], + "sref": "#/texts/90", "text": "Another big advantage of using the adjacency matrix format is that we can exploit advanced compression methods 18 such as CSR or blocked COO. This reduces significantly the memory footprint of the graph and allows bigger graphs to be hosted entirely in-memory. In our case, we have opted to represent the edges by blocked matrices of a fixed size, in which each block matrix is of type COO. We chose the size of the block-matrix to be 2 16 = 65 536, allowing a pair of indices to be compactly represented by two unsigned short integers. Consequently, an edge has a memory footprint of only 4 bytes (equivalent to a single 32-bit integer), while a weighted edge a footprint of 8 bytes. *** This is a significant reduction in memory footprint compared to Neo4J graph databases, which use 33 bytes for unweighted edges $^{\u2020\u2020\u2020}$). Consequently, we can host graphs of close to 8 billion edges on a virtual machine with 32 GB of free memory, and even close to one trillion edges on a bare-metal POWER9 node with 4 TB of memory.", "text-hash": 13549646715324792350, "type": "paragraph" @@ -22476,25 +81570,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/93", "hash": 15132906055887224772, "orig": "3.3 | Formulation and evaluation of deep queries", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.7099999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/112" } ], + "sref": "#/texts/91", "text": "3.3 | Formulation and evaluation of deep queries", "text-hash": 3609048564712975615, "type": "subtitle-level-1" @@ -22503,25 +81584,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/94", "hash": 17129434987283608290, "orig": "The goal of querying a KG is to answer complex questions. As such, users need to be provided with a functionality to formulate complex queries on the KG and quickly evaluate them.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/113" } ], + "sref": "#/texts/92", "text": "The goal of querying a KG is to answer complex questions. As such, users need to be provided with a functionality to formulate complex queries on the KG and quickly evaluate them.", "text-hash": 3711217782201102361, "type": "paragraph" @@ -22530,25 +81598,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/95", "hash": 10350406469077463155, "orig": "In order to avoid imposing a complex query language onto users, we have devised a way to define complex graph queries in a declarative format, which we call a workflow. Workflows are represented as a DAG of operations and are conceptually related to DFs. Unlike the former, the nodes of workflow DAGs do not represent data-transformation tasks, but specific graph operations which mutate an input (or intermediate) set of nodes into another set. We call these operations worktasks. For further convenience, we have developed a graphical user interface (UI) which allows to define such workflows in a visual programming approach (see Figure 4).", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9300000071525574 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/114" } ], + "sref": "#/texts/93", "text": "In order to avoid imposing a complex query language onto users, we have devised a way to define complex graph queries in a declarative format, which we call a workflow. Workflows are represented as a DAG of operations and are conceptually related to DFs. Unlike the former, the nodes of workflow DAGs do not represent data-transformation tasks, but specific graph operations which mutate an input (or intermediate) set of nodes into another set. We call these operations worktasks. For further convenience, we have developed a graphical user interface (UI) which allows to define such workflows in a visual programming approach (see Figure 4).", "text-hash": 6157696558870441610, "type": "paragraph" @@ -22557,25 +81612,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/96", "hash": 16949854269270315165, "orig": "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions. In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/115" } ], + "sref": "#/texts/94", "text": "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions. In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design.", "text-hash": 4111476184068705704, "type": "paragraph" @@ -22584,25 +81626,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/97", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/116" } ], + "sref": "#/texts/95", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" @@ -22611,25 +81640,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/98", "hash": 4361549266593946746, "orig": "9of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/118" } ], + "sref": "#/texts/96", "text": "9of15", "text-hash": 329104147597527681, "type": "paragraph" @@ -22638,25 +81654,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/100", "hash": 9802652237802670052, "orig": "3.3.1 | Node retrieval", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.7099999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/121" } ], + "sref": "#/texts/97", "text": "3.3.1 | Node retrieval", "text-hash": 6349660887815587103, "type": "subtitle-level-1" @@ -22665,25 +81668,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/101", "hash": 5524728206729419689, "orig": "This task finds a set of nodes which satisfy certain search criteria. This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property. The task constructs a node vector v $^{!}$, such that", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/122" } ], + "sref": "#/texts/98", "text": "This task finds a set of nodes which satisfy certain search criteria. This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property. The task constructs a node vector v $^{!}$, such that", "text-hash": 10699646946138261716, "type": "paragraph" @@ -22692,25 +81682,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/102", "hash": 4043385013945968936, "orig": "v $^{!}$$_{i}$= 1 if node i \\b S 0 if node i = 2 S , GLYPH \u00f0 3 \u00de", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/123" } ], + "sref": "#/texts/99", "text": "v $^{!}$$_{i}$= 1 if node i \\b S 0 if node i = 2 S , GLYPH \u00f0 3 \u00de", "text-hash": 588808569772103507, "type": "equation" @@ -22719,25 +81696,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/103", "hash": 11778884428660217326, "orig": "where S represents the set of nodes that satisfy the search criteria.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/124" } ], + "sref": "#/texts/100", "text": "where S represents the set of nodes that satisfy the search criteria.", "text-hash": 9277850099981357845, "type": "paragraph" @@ -22746,25 +81710,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/104", "hash": 12875050310340408203, "orig": "3.3.2 | Graph traversal", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/125" } ], + "sref": "#/texts/101", "text": "3.3.2 | Graph traversal", "text-hash": 10555101842315227314, "type": "subtitle-level-1" @@ -22773,25 +81724,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/105", "hash": 3785875504044487339, "orig": "The simplest type of graph-traversal is the direct graph-traversal. As explained in detail in section 3.1, these can be implemented as a straightforward SpMV operation w $^{!}$= Av $^{!}$. In more advanced types of graph-traversals, we evaluate all paths of different depth. Since the number of paths connecting two nodes might increase exponentially with the pathlength, one typically reduces the contribution of each path by weighting it with the inverse factorial of the path-length. For example, consider the case in which we want to explore deeper, indirect paths as follows,", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/126" } ], + "sref": "#/texts/102", "text": "The simplest type of graph-traversal is the direct graph-traversal. As explained in detail in section 3.1, these can be implemented as a straightforward SpMV operation w $^{!}$= Av $^{!}$. In more advanced types of graph-traversals, we evaluate all paths of different depth. Since the number of paths connecting two nodes might increase exponentially with the pathlength, one typically reduces the contribution of each path by weighting it with the inverse factorial of the path-length. For example, consider the case in which we want to explore deeper, indirect paths as follows,", "text-hash": 909351913600217042, "type": "paragraph" @@ -22800,25 +81738,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/106", "hash": 12105626155924658285, "orig": "w $^{!}$= A + A 2 2 ! + A 3 3 ! + GLYPH GLYPH GLYPH GLYPH GLYPH v $^{!}$= e$^{A}$\u2212 1 GLYPH GLYPH v $^{!}$: \u00f0 4 \u00de", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/127" } ], + "sref": "#/texts/103", "text": "w $^{!}$= A + A 2 2 ! + A 3 3 ! + GLYPH GLYPH GLYPH GLYPH GLYPH v $^{!}$= e$^{A}$- 1 GLYPH GLYPH v $^{!}$: \u00f0 4 \u00de", "text-hash": 9027673695254677144, "type": "equation" @@ -22827,25 +81752,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/107", "hash": 16265612055607243129, "orig": "In its most generic case, a graph-traversal can therefore be written down as a matrix-function applied on an edge, that is, w $^{!}$= fA \u00f0 \u00de v $^{!}$. As discussed in detail in previous work, 2 this type of operation can be evaluated extremely efficiently using a recursive Chebyshev polynomial expansion.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/128" } ], + "sref": "#/texts/104", "text": "In its most generic case, a graph-traversal can therefore be written down as a matrix-function applied on an edge, that is, w $^{!}$= fA \u00f0 \u00de v $^{!}$. As discussed in detail in previous work, 2 this type of operation can be evaluated extremely efficiently using a recursive Chebyshev polynomial expansion.", "text-hash": 4579475315408875396, "type": "paragraph" @@ -22854,25 +81766,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/108", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/129" } ], + "sref": "#/texts/105", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" @@ -22881,25 +81780,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/109", "hash": 10252446451495472512, "orig": "3.3.3 | Logical operations", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/132" } ], + "sref": "#/texts/106", "text": "3.3.3 | Logical operations", "text-hash": 6188098459342469819, "type": "subtitle-level-1" @@ -22908,25 +81794,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/110", "hash": 17011944206067158637, "orig": "In logical operations, two sets of nodes are merged into one resulting set, each represented through a node vector. There are three common logical operations, AND, OR, and NOT. In the AND and OR operations, we compute the geometric or the arithmetic mean respectively for each pairwise elements in the vectors. In the NOT operation, we inverse the sign for each element of the input vector.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9399999976158142 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/133" } ], + "sref": "#/texts/107", "text": "In logical operations, two sets of nodes are merged into one resulting set, each represented through a node vector. There are three common logical operations, AND, OR, and NOT. In the AND and OR operations, we compute the geometric or the arithmetic mean respectively for each pairwise elements in the vectors. In the NOT operation, we inverse the sign for each element of the input vector.", "text-hash": 3756558606376352920, "type": "paragraph" @@ -22935,25 +81808,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/111", "hash": 16289627123982758705, "orig": "3.3.4 | Transform functions", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.4399999976158142 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/134" } ], + "sref": "#/texts/108", "text": "3.3.4 | Transform functions", "text-hash": 4767177430745297228, "type": "subtitle-level-1" @@ -22962,25 +81822,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/112", "hash": 13969801897340997317, "orig": "Lastly, we implement operations which transform the weights associated with nodes. One such operation renormalizes and ultimately ranks the nodes according to their weight.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/135" } ], + "sref": "#/texts/109", "text": "Lastly, we implement operations which transform the weights associated with nodes. One such operation renormalizes and ultimately ranks the nodes according to their weight.", "text-hash": 2263647560089238528, "type": "paragraph" @@ -22989,25 +81836,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/113", "hash": 105697770555684555, "orig": "With these four types of operations, we can express rich queries to answer complex questions, which can have multiple inputs and outputs. Let us now discuss how a workflow is evaluated within the graph engine. Once a workflow has been submitted, each worktask is initially assigned a vector. These vectors are all initialized to zero (v $^{!}$$_{i}$= 0). Next, the graph will analyze the DAG of worktasks and identify which tasks can be run in parallel. This is achieved by performing a topological sort using depth-first traversal, which yields a list in which each item is a set of tasks that can be executed in parallel. The graph engine then proceeds with the parallel task computations.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/136" } ], + "sref": "#/texts/110", "text": "With these four types of operations, we can express rich queries to answer complex questions, which can have multiple inputs and outputs. Let us now discuss how a workflow is evaluated within the graph engine. Once a workflow has been submitted, each worktask is initially assigned a vector. These vectors are all initialized to zero (v $^{!}$$_{i}$= 0). Next, the graph will analyze the DAG of worktasks and identify which tasks can be run in parallel. This is achieved by performing a topological sort using depth-first traversal, which yields a list in which each item is a set of tasks that can be executed in parallel. The graph engine then proceeds with the parallel task computations.", "text-hash": 16051124526605366258, "type": "paragraph" @@ -23016,25 +81850,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/114", "hash": 15938840672015995359, "orig": "For each task, we obtain a set of nodes with corresponding weights by identifying the nonzero elements in the associated node vector. After executing the full workflow, we therefore obtain for each task a list of nodes which can be sorted according to their weights. The higher the weight of the node, the more relevant this node is. As such, we can also retrace which nodes were important in each stage of the workflow.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/137" } ], + "sref": "#/texts/111", "text": "For each task, we obtain a set of nodes with corresponding weights by identifying the nonzero elements in the associated node vector. After executing the full workflow, we therefore obtain for each task a list of nodes which can be sorted according to their weights. The higher the weight of the node, the more relevant this node is. As such, we can also retrace which nodes were important in each stage of the workflow.", "text-hash": 2523894108122369766, "type": "paragraph" @@ -23043,25 +81864,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/115", "hash": 16505790528099785698, "orig": "4 | CLOUD DESIGN AND DEPLOYMENT", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/138" } ], + "sref": "#/texts/112", "text": "4 | CLOUD DESIGN AND DEPLOYMENT", "text-hash": 4262729847538649369, "type": "subtitle-level-1" @@ -23070,25 +81878,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/116", "hash": 14738723905055920039, "orig": "The primary deployment target for the CPS is a cloud environment orchestrated via Kubernetes. We package the full platform assets with a Helm chart for quick deployment on multiple setups. For example we can easily deploy the platform on the IBM Cloud or on-premise in an IBM Cloud Private instance, both on x86-and POWER-based nodes.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/139" } ], + "sref": "#/texts/113", "text": "The primary deployment target for the CPS is a cloud environment orchestrated via Kubernetes. We package the full platform assets with a Helm chart for quick deployment on multiple setups. For example we can easily deploy the platform on the IBM Cloud or on-premise in an IBM Cloud Private instance, both on x86-and POWER-based nodes.", "text-hash": 1485721651435830494, "type": "paragraph" @@ -23097,25 +81892,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/117", "hash": 5699550326698755904, "orig": "In Figure 5, we show the high-level cloud design of the CPS. The platform allows to manage and instrument the corpus processing in a multitenant fashion, that is, it handles multiple knowledge ingestion pipelines and it serves multiple knowledge graphs. We call each unit a Knowledge Graph Space (KGS), which consists of a dedicated instance of the graph engine, a dedicated MongoDB database and a bucket on a cloud object store (COS). A dashboard allows each project owner to manage the access and the usage of resources. The KGS can be launched into multiple flavors to optimally balance the utilization of the cluster. These flavors range from a virtual machine with small amount of memory to a full dedicated node including hardware acceleration with GPUs. Once a KGS is created, it can be paused and rescaled without loss of data or downtime.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/140" } ], + "sref": "#/texts/114", "text": "In Figure 5, we show the high-level cloud design of the CPS. The platform allows to manage and instrument the corpus processing in a multitenant fashion, that is, it handles multiple knowledge ingestion pipelines and it serves multiple knowledge graphs. We call each unit a Knowledge Graph Space (KGS), which consists of a dedicated instance of the graph engine, a dedicated MongoDB database and a bucket on a cloud object store (COS). A dashboard allows each project owner to manage the access and the usage of resources. The KGS can be launched into multiple flavors to optimally balance the utilization of the cluster. These flavors range from a virtual machine with small amount of memory to a full dedicated node including hardware acceleration with GPUs. Once a KGS is created, it can be paused and rescaled without loss of data or downtime.", "text-hash": 10750023430231115131, "type": "paragraph" @@ -23124,25 +81906,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/118", "hash": 11609131422778723150, "orig": "For the KG creation pipeline, we implemented an asynchronous compute scheme we already use in our CCS solution. 1 The system is exposed to the user via an API frontend which communicates to the compute workers through a message broker and a result backend. The workers operate on the data, which is hosted on a NoSQL database and a cloud object store for data blobs. These workers are dynamically scaled by the cloud orchestrator to best match the current load of the platform.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/141" } ], + "sref": "#/texts/115", "text": "For the KG creation pipeline, we implemented an asynchronous compute scheme we already use in our CCS solution. 1 The system is exposed to the user via an API frontend which communicates to the compute workers through a message broker and a result backend. The workers operate on the data, which is hosted on a NoSQL database and a cloud object store for data blobs. These workers are dynamically scaled by the cloud orchestrator to best match the current load of the platform.", "text-hash": 9163968380151462261, "type": "paragraph" @@ -23151,25 +81920,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/119", "hash": 788128893109726279, "orig": "The processing of the KG creation typically starts with the user submitting the DF to the frontend API. The DAG of operations is then interpreted as described in the previous section and fine-grained tasks are submitted to the broker, for example, the whole corpus is split in many independent chunks. The user receives an overall status from the API and is notified when the DF processing has completed.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9100000262260437 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/142" } ], + "sref": "#/texts/116", "text": "The processing of the KG creation typically starts with the user submitting the DF to the frontend API. The DAG of operations is then interpreted as described in the previous section and fine-grained tasks are submitted to the broker, for example, the whole corpus is split in many independent chunks. The user receives an overall status from the API and is notified when the DF processing has completed.", "text-hash": 15724564631854553726, "type": "paragraph" @@ -23178,25 +81934,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/120", "hash": 7029344862946908483, "orig": "The KG data are distributed between three storage solutions: a NoSQL database, a cloud object storage (COS) and the KGS. Each node is represented as a document in a NoSQL database which contains all the properties attached to the node, for example, the text of a paragraph. If there is a binary object attached to the node, for example, the PDF document or an image, this is stored on the COS. The KGS contains only the minimal information needed to execute the queries, that is, the connectivity of the graph and the properties which are indexed for filtering and search.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/143" } ], + "sref": "#/texts/117", "text": "The KG data are distributed between three storage solutions: a NoSQL database, a cloud object storage (COS) and the KGS. Each node is represented as a document in a NoSQL database which contains all the properties attached to the node, for example, the text of a paragraph. If there is a binary object attached to the node, for example, the PDF document or an image, this is stored on the COS. The KGS contains only the minimal information needed to execute the queries, that is, the connectivity of the graph and the properties which are indexed for filtering and search.", "text-hash": 13806805648097199994, "type": "paragraph" @@ -23205,25 +81948,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/121", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/144" } ], + "sref": "#/texts/118", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" @@ -23232,25 +81962,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/122", "hash": 2144926686518491811, "orig": "11of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/146" } ], + "sref": "#/texts/119", "text": "11of15", "text-hash": 16380805707549272026, "type": "paragraph" @@ -23259,25 +81976,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/124", "hash": 18333396269095847693, "orig": "The KGS is exposed to the user via a REST API which is able to aggregate results collected from the different storage sources. To ensure decent performance when serving queries of multiple users, the graph engine can be dynamically scaled horizontally. Most workflow queries execute fast enough such that they can be responded from a synchronous request. Others, especially the graph analytics computations, are more expensive and return large amounts of data. Thus, these queries are executed through an asynchronous API and the results are paginated and streamed back to the user on completion.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/149" } ], + "sref": "#/texts/120", "text": "The KGS is exposed to the user via a REST API which is able to aggregate results collected from the different storage sources. To ensure decent performance when serving queries of multiple users, the graph engine can be dynamically scaled horizontally. Most workflow queries execute fast enough such that they can be responded from a synchronous request. Others, especially the graph analytics computations, are more expensive and return large amounts of data. Thus, these queries are executed through an asynchronous API and the results are paginated and streamed back to the user on completion.", "text-hash": 5024699355629880632, "type": "paragraph" @@ -23286,25 +81990,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/125", "hash": 4030998538427149966, "orig": "5 | CASE STUDY: OIL AND GAS EXPLORATION", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.7599999904632568 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/150" } ], + "sref": "#/texts/121", "text": "5 | CASE STUDY: OIL AND GAS EXPLORATION", "text-hash": 956984534850296757, "type": "subtitle-level-1" @@ -23313,25 +82004,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/126", "hash": 10295608624766759271, "orig": "Oil and gas exploration is a complex, technical field of expertise. Unfortunately, the data of many geological processes and entities is scattered across databases (public and proprietary) and corpora of documents, where it is often deeply embedded in text, tables, and figures. This is a serious impediment for efficient exploration of new oil and gas opportunities. For example, geographic information of geological structures can be found in NaturalEarthData, \u2021\u2021\u2021 while their history, evolution, and components (eg, formations with their age, rock-composition, and depth) are discussed in reports (governmental and proprietary) and scientific articles. As such, experts in oil and gas exploration often need to read many documents in order to find all the information of a certain geographic area and get a good understanding of its underlying geology.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/151" } ], + "sref": "#/texts/122", "text": "Oil and gas exploration is a complex, technical field of expertise. Unfortunately, the data of many geological processes and entities is scattered across databases (public and proprietary) and corpora of documents, where it is often deeply embedded in text, tables, and figures. This is a serious impediment for efficient exploration of new oil and gas opportunities. For example, geographic information of geological structures can be found in NaturalEarthData, \u2021\u2021\u2021 while their history, evolution, and components (eg, formations with their age, rock-composition, and depth) are discussed in reports (governmental and proprietary) and scientific articles. As such, experts in oil and gas exploration often need to read many documents in order to find all the information of a certain geographic area and get a good understanding of its underlying geology.", "text-hash": 6212506812498931614, "type": "paragraph" @@ -23340,25 +82018,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/127", "hash": 10633780781731536747, "orig": "The main tasks of the experts working in oil and gas exploration are to identify potential new exploration sites. This is typically done by describing a basin or one of its sub-regions. In practice, ' describing a basin ' boils down to identifying all geological formations with their properties in the basin and investigating if these formations constitute a petroleum system. 19 In its most minimalistic form, a petroleum system is defined by three components: source, reservoir, and seal. The source is the rock formation in which the oil or gas was created. Once created, the oil or gas typically migrates to a porous reservoir rock, which holds the oil and gas. In order for the oil and gas not to escape, the reservoir needs to be covered by an impermeable rock formation which is called the seal. Each one of these components is comprised of one or more formations, with a certain age and rock composition. To identify a petroleum system in a certain geographical area, one has to find a candidate formation for each component (ie, reservoir, seal, and source) and observe that the properties of these components satisfy some well-established constraints. For example, the reservoir", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/152" } ], + "sref": "#/texts/123", "text": "The main tasks of the experts working in oil and gas exploration are to identify potential new exploration sites. This is typically done by describing a basin or one of its sub-regions. In practice, ' describing a basin ' boils down to identifying all geological formations with their properties in the basin and investigating if these formations constitute a petroleum system. 19 In its most minimalistic form, a petroleum system is defined by three components: source, reservoir, and seal. The source is the rock formation in which the oil or gas was created. Once created, the oil or gas typically migrates to a porous reservoir rock, which holds the oil and gas. In order for the oil and gas not to escape, the reservoir needs to be covered by an impermeable rock formation which is called the seal. Each one of these components is comprised of one or more formations, with a certain age and rock composition. To identify a petroleum system in a certain geographical area, one has to find a candidate formation for each component (ie, reservoir, seal, and source) and observe that the properties of these components satisfy some well-established constraints. For example, the reservoir", "text-hash": 8189171326047604114, "type": "paragraph" @@ -23367,25 +82032,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/128", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/153" } ], + "sref": "#/texts/124", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" @@ -23394,25 +82046,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/129", "hash": 1080447728722590413, "orig": "12", - "properties": { - "data": [ - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/154" } ], + "sref": "#/texts/125", "text": "12", "text-hash": 15441160910541481976, "type": "paragraph" @@ -23421,25 +82060,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/130", "hash": 4361549257087816853, "orig": "of 15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8899999856948853 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/155" } ], + "sref": "#/texts/126", "text": "of 15", "text-hash": 329104161717916080, "type": "paragraph" @@ -23448,25 +82074,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/132", "hash": 10195664788154887804, "orig": "formation has to have a lower depth than the seal formation. Another example of such constraints is that the age of the seal and reservoir has to be older than the source.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/159" } ], + "sref": "#/texts/127", "text": "formation has to have a lower depth than the seal formation. Another example of such constraints is that the age of the seal and reservoir has to be older than the source.", "text-hash": 5965659969661688967, "type": "paragraph" @@ -23475,25 +82088,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/133", "hash": 7538054744015619336, "orig": "In order for the CPS platform to help the oil and gas explorationalists in their day-to-day job effectively, it needs to meet two objectives. On the one hand, it needs to create a consistent Knowledge Graph from a document corpus. This Knowledge Graph has to contain all geological formations with their respective properties (eg, geographical locations, depth, age, and rock composition). On the other hand, CPS needs to provide fast query responses, such that one can automatically retrieve potential components of petroleum systems and apply the constraints to filter out promising candidates.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/160" } ], + "sref": "#/texts/128", "text": "In order for the CPS platform to help the oil and gas explorationalists in their day-to-day job effectively, it needs to meet two objectives. On the one hand, it needs to create a consistent Knowledge Graph from a document corpus. This Knowledge Graph has to contain all geological formations with their respective properties (eg, geographical locations, depth, age, and rock composition). On the other hand, CPS needs to provide fast query responses, such that one can automatically retrieve potential components of petroleum systems and apply the constraints to filter out promising candidates.", "text-hash": 13307027925001159475, "type": "paragraph" @@ -23502,25 +82102,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/134", "hash": 12426662601736619109, "orig": "During the development and implementation of custom NLU annotators in CPS for oil and gas exploration, the client team worked hand in hand with the IBM Research team to set up a controlled accuracy benchmark in which the key capabilities of the CPS can be quantified. The goal of the benchmark was to test the entire pipeline depicted in Figure 6, that is, from PDF document ingestion to a final, queryable KG. The key components of this specific pipeline are,", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/161" } ], + "sref": "#/texts/129", "text": "During the development and implementation of custom NLU annotators in CPS for oil and gas exploration, the client team worked hand in hand with the IBM Research team to set up a controlled accuracy benchmark in which the key capabilities of the CPS can be quantified. The goal of the benchmark was to test the entire pipeline depicted in Figure 6, that is, from PDF document ingestion to a final, queryable KG. The key components of this specific pipeline are,", "text-hash": 8341863300316693152, "type": "paragraph" @@ -23529,25 +82116,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/135", "hash": 4162783521620221579, "orig": "1. the conversion of PDF documents into JSON through CCS,", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.46000000834465027 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/162" } ], + "sref": "#/texts/130", "text": "1. the conversion of PDF documents into JSON through CCS,", "text-hash": 527957687390948274, "type": "paragraph" @@ -23556,25 +82130,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/136", "hash": 5135259059216244866, "orig": "2. the creation of the KG in the CPS from the JSON documents, and", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.7599999904632568 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/163" } ], + "sref": "#/texts/131", "text": "2. the creation of the KG in the CPS from the JSON documents, and", "text-hash": 11300804242294087097, "type": "paragraph" @@ -23583,25 +82144,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/137", "hash": 16998817296948099535, "orig": "3. the querying of the KG served by CPS to identify petroleum systems elements with their properties.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8700000047683716 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/164" } ], + "sref": "#/texts/132", "text": "3. the querying of the KG served by CPS to identify petroleum systems elements with their properties.", "text-hash": 4121058581451712246, "type": "paragraph" @@ -23610,25 +82158,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/138", "hash": 1205649569241141618, "orig": "On the suggestion of the experts in the client team, the entire pipeline was run on the 1051 Field Evaluation Reports from the C&C Reservoirs \u00a7\u00a7\u00a7 dataset. The advantage of using this dataset for an accuracy benchmark is that each report includes two parts. One part is verbose text describing the history, evolution, and composition of the fields. The language used is of similar complexity to standard geological publications and thus a realistic challenge for our KG creation pipeline. The second part at the end of each report is comprised of tables which summarize the text and provide us the elements of the petroleum systems with their properties. Therefore, we ingest these reports into CCS and extract both text and tables. Then, by generating a KG only from the text and keeping the tables as ground-truth to compare answers of the KG queries against, we obtain a well-controlled, end-to-end accuracy benchmark.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/165" } ], + "sref": "#/texts/133", "text": "On the suggestion of the experts in the client team, the entire pipeline was run on the 1051 Field Evaluation Reports from the C&C Reservoirs \u00a7\u00a7\u00a7 dataset. The advantage of using this dataset for an accuracy benchmark is that each report includes two parts. One part is verbose text describing the history, evolution, and composition of the fields. The language used is of similar complexity to standard geological publications and thus a realistic challenge for our KG creation pipeline. The second part at the end of each report is comprised of tables which summarize the text and provide us the elements of the petroleum systems with their properties. Therefore, we ingest these reports into CCS and extract both text and tables. Then, by generating a KG only from the text and keeping the tables as ground-truth to compare answers of the KG queries against, we obtain a well-controlled, end-to-end accuracy benchmark.", "text-hash": 17333577132913364873, "type": "paragraph" @@ -23637,25 +82172,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/139", "hash": 12257840490666828590, "orig": "For step (1) of the pipeline, we ingested all 1051 PDFs into CCS and visually annotated the document structure on 300 (out of 46 019) pages. This yielded a page model which accurately converted all documents to JSON format with a 99.7% recall and 99.3% precision in the converted structure. These numbers are in line with those reported in our previous works. 1 Importantly, very accurate conversion results are key to the resulting quality, since otherwise the language annotators will process incomplete data and eventually the relevance of query results will suffer.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/166" } ], + "sref": "#/texts/134", "text": "For step (1) of the pipeline, we ingested all 1051 PDFs into CCS and visually annotated the document structure on 300 (out of 46 019) pages. This yielded a page model which accurately converted all documents to JSON format with a 99.7% recall and 99.3% precision in the converted structure. These numbers are in line with those reported in our previous works. 1 Importantly, very accurate conversion results are key to the resulting quality, since otherwise the language annotators will process incomplete data and eventually the relevance of query results will suffer.", "text-hash": 8803415231465414997, "type": "paragraph" @@ -23664,25 +82186,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/140", "hash": 7040847965650746591, "orig": "In step (2), we create the Knowledge Graph by executing a DF that will generate all the entities and relationships relevant to the geology domain. Our language annotator models trained for geology extract geographic areas, geological structures (eg, basins), formations, ages, rocks, petroleum systems, and their elements (PSE) (eg, seal, source, and reservoir). Overall, we extracted a total of 4597 PSEs, 8811 formations, 471 geological ages, and 64 rock types (relevant to the PSEs). The full processing performed at an average rate of 130 ms per page per worker core, on a system with three worker nodes each using four cores. Eventually, the KG included 679 296 edges connecting 116 662 nodes.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/167" } ], + "sref": "#/texts/135", "text": "In step (2), we create the Knowledge Graph by executing a DF that will generate all the entities and relationships relevant to the geology domain. Our language annotator models trained for geology extract geographic areas, geological structures (eg, basins), formations, ages, rocks, petroleum systems, and their elements (PSE) (eg, seal, source, and reservoir). Overall, we extracted a total of 4597 PSEs, 8811 formations, 471 geological ages, and 64 rock types (relevant to the PSEs). The full processing performed at an average rate of 130 ms per page per worker core, on a system with three worker nodes each using four cores. Eventually, the KG included 679 296 edges connecting 116 662 nodes.", "text-hash": 13799731378750663142, "type": "paragraph" @@ -23691,25 +82200,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/141", "hash": 7927601225025519287, "orig": "In step (3), we query the Knowledge Graph using a tailored evaluation workflow. This workflow allows us to identify PSEs and their connected properties in the Knowledge Graph, for example, their age, formation and rock", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/168" } ], + "sref": "#/texts/136", "text": "In step (3), we query the Knowledge Graph using a tailored evaluation workflow. This workflow allows us to identify PSEs and their connected properties in the Knowledge Graph, for example, their age, formation and rock", "text-hash": 13120217128072555470, "type": "paragraph" @@ -23718,25 +82214,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/142", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/169" } ], + "sref": "#/texts/137", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" @@ -23745,25 +82228,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/143", "hash": 1080447728722590402, "orig": "13", - "properties": { - "data": [ - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/171" } ], + "sref": "#/texts/138", "text": "13", "text-hash": 15441160910541481977, "type": "paragraph" @@ -23772,25 +82242,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/144", "hash": 4361549257087816853, "orig": "of 15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8899999856948853 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/172" } ], + "sref": "#/texts/139", "text": "of 15", "text-hash": 329104161717916080, "type": "paragraph" @@ -23799,25 +82256,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/147", "hash": 8207961846673301043, "orig": "composition. In Figure 7, we visualize the DAG of this workflow. The final node weights are accumulated throughout the branches on the workflow and represent the relevance score of each node.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/177" } ], + "sref": "#/texts/140", "text": "composition. In Figure 7, we visualize the DAG of this workflow. The final node weights are accumulated throughout the branches on the workflow and represent the relevance score of each node.", "text-hash": 14933956665806015562, "type": "paragraph" @@ -23826,25 +82270,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/148", "hash": 11998199584890640594, "orig": "To evaluate the correctness of the predicted PSE properties, we follow the standard practice of reporting the top-k accuracy. This is computed as the percentage in which any of the k highest ranked answers matches the expected answer, over all documents. In Table 1, we show the top-1, top-2, top-3, and top-5 accuracy for all properties of each petroleum system element. One can make two distinct observations. First, the top-1 numbers are in the range of 0.75-0.9, meaning that for 3 in 4 cases, the most relevant result predicted by the KG was correct (precision). Secondly, we observe that the top-5 numbers are very high (\u2265 0.97), showing that the system was able detect and aggregate most of the PSEs and their properties (recall). Thus, the recall of the language annotators in the KG creation pipeline was very satisfactory.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/178" } ], + "sref": "#/texts/141", "text": "To evaluate the correctness of the predicted PSE properties, we follow the standard practice of reporting the top-k accuracy. This is computed as the percentage in which any of the k highest ranked answers matches the expected answer, over all documents. In Table 1, we show the top-1, top-2, top-3, and top-5 accuracy for all properties of each petroleum system element. One can make two distinct observations. First, the top-1 numbers are in the range of 0.75-0.9, meaning that for 3 in 4 cases, the most relevant result predicted by the KG was correct (precision). Secondly, we observe that the top-5 numbers are very high (\u2265 0.97), showing that the system was able detect and aggregate most of the PSEs and their properties (recall). Thus, the recall of the language annotators in the KG creation pipeline was very satisfactory.", "text-hash": 9121677663017059817, "type": "paragraph" @@ -23853,25 +82284,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/149", "hash": 16446129547721407877, "orig": "6 | CONCLUSIONS", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/179" } ], + "sref": "#/texts/142", "text": "6 | CONCLUSIONS", "text-hash": 4326952903809379008, "type": "subtitle-level-1" @@ -23880,25 +82298,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/150", "hash": 6720443978031524294, "orig": "With the introduction of the CPS platform, we demonstrate substantial benefit for domain experts and data scientists in exercising deep exploration of published knowledge in a fully integrated, yet modular cloud solution. CPS seamlessly connects to the CSS, complementing it with a highly scalable, automated pipeline to build consistent domain knowledge models and an intuitive, powerful approach to explorational queries and graph-scale analytics. This is accomplished through three fundamental design considerations: (1) We do not require manual data curation or annotation; (2) We built a scalable, efficient architecture to support the ingestion, processing and query workloads, all embedded in", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.800000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/180" } ], + "sref": "#/texts/143", "text": "With the introduction of the CPS platform, we demonstrate substantial benefit for domain experts and data scientists in exercising deep exploration of published knowledge in a fully integrated, yet modular cloud solution. CPS seamlessly connects to the CSS, complementing it with a highly scalable, automated pipeline to build consistent domain knowledge models and an intuitive, powerful approach to explorational queries and graph-scale analytics. This is accomplished through three fundamental design considerations: (1) We do not require manual data curation or annotation; (2) We built a scalable, efficient architecture to support the ingestion, processing and query workloads, all embedded in", "text-hash": 11733208797674542845, "type": "paragraph" @@ -23907,25 +82312,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/151", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/181" } ], + "sref": "#/texts/144", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" @@ -23934,25 +82326,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/152", "hash": 2144926730621142072, "orig": "14of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/182" } ], + "sref": "#/texts/145", "text": "14of15", "text-hash": 16380805732317250115, "type": "paragraph" @@ -23961,25 +82340,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/153", "hash": 14222671032550229818, "orig": "a single platform; and (3) We expose the capabilities through an intuitively consumable API and complementary UI tools.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.6000000238418579 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/184" } ], + "sref": "#/texts/146", "text": "a single platform; and (3) We expose the capabilities through an intuitively consumable API and complementary UI tools.", "text-hash": 1925144237473465665, "type": "paragraph" @@ -23988,25 +82354,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/154", "hash": 17486770941839589126, "orig": "In our oil and gas case study, we successfully verified our solution for a real-world application with the help of subject matter experts from a client team. Currently, CCS and CPS are actively used in more than five client engagements, most notably in the oil and gas industry as well as in the material science industry.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/185" } ], + "sref": "#/texts/147", "text": "In our oil and gas case study, we successfully verified our solution for a real-world application with the help of subject matter experts from a client team. Currently, CCS and CPS are actively used in more than five client engagements, most notably in the oil and gas industry as well as in the material science industry.", "text-hash": 5943448246547541309, "type": "paragraph" @@ -24015,25 +82368,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/155", "hash": 16574813224778118841, "orig": "Future work will focus on processing public repositories such as the arXiv.org library, USPTO, and PubMed in order to make their content available to deep data exploration.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/186" } ], + "sref": "#/texts/148", "text": "Future work will focus on processing public repositories such as the arXiv.org library, USPTO, and PubMed in order to make their content available to deep data exploration.", "text-hash": 4472913868502496196, "type": "paragraph" @@ -24042,25 +82382,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/156", "hash": 3356142343274371864, "orig": "DATA AVAILABILITY STATEMENT", - "properties": { - "data": [ - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/187" } ], + "sref": "#/texts/149", "text": "DATA AVAILABILITY STATEMENT", "text-hash": 17772737780533561635, "type": "subtitle-level-1" @@ -24069,25 +82396,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/157", "hash": 4778022085288441371, "orig": "Data subject to third party restrictions.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.6299999952316284 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/188" } ], + "sref": "#/texts/150", "text": "Data subject to third party restrictions.", "text-hash": 11662592888764396578, "type": "paragraph" @@ -24096,25 +82410,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/158", "hash": 4361549257598904601, "orig": "ORCID", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.8500000238418579 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/189" } ], + "sref": "#/texts/151", "text": "ORCID", "text-hash": 329104162230294308, "type": "subtitle-level-1" @@ -24123,25 +82424,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/159", "hash": 3523281823889115814, "orig": "Peter W. J. Staar https://orcid.org/0000-0002-8088-0823 Michele Dolfi https://orcid.org/0000-0001-7216-8505 Christoph Auer https://orcid.org/0000-0001-5761-0422", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.5799999833106995 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/190" } ], + "sref": "#/texts/152", "text": "Peter W. J. Staar https://orcid.org/0000-0002-8088-0823 Michele Dolfi https://orcid.org/0000-0001-7216-8505 Christoph Auer https://orcid.org/0000-0001-5761-0422", "text-hash": 1167445296370300893, "type": "paragraph" @@ -24150,25 +82438,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/160", "hash": 8500729849894221215, "orig": "ENDNOTES", - "properties": { - "data": [ - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/191" } ], + "sref": "#/texts/153", "text": "ENDNOTES", "text-hash": 14650266124350583462, "type": "subtitle-level-1" @@ -24177,25 +82452,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/161", "hash": 7813503946963688644, "orig": "* For example, ElasticSearch (https://www.elastic.co) and ApacheLucene (https://lucene.apache.org).", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/192" } ], + "sref": "#/texts/154", "text": "* For example, ElasticSearch (https://www.elastic.co) and ApacheLucene (https://lucene.apache.org).", "text-hash": 12950565807350876671, "type": "paragraph" @@ -24204,25 +82466,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/162", "hash": 9230987401345399746, "orig": "\u2020 Most language entities from a technical field are typically represented in a very specific, rigorous way that can be easily captured by regular expressions. We found that in practice, regular expressions often outperform DL models, since we can simply encode these representations.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9100000262260437 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/193" } ], + "sref": "#/texts/155", "text": "\u2020 Most language entities from a technical field are typically represented in a very specific, rigorous way that can be easily captured by regular expressions. We found that in practice, regular expressions often outperform DL models, since we can simply encode these representations.", "text-hash": 6930355155738437881, "type": "paragraph" @@ -24231,25 +82480,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/163", "hash": 1997735398126013155, "orig": "\u2021 https://www.nltk.org", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.800000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/194" } ], + "sref": "#/texts/156", "text": "\u2021 https://www.nltk.org", "text-hash": 16829787344811603994, "type": "paragraph" @@ -24258,25 +82494,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/164", "hash": 13566764974477978642, "orig": "\u00a7 We follow the standard JSON-schema for references.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/195" } ], + "sref": "#/texts/157", "text": "\u00a7 We follow the standard JSON-schema for references.", "text-hash": 9498574747519310377, "type": "paragraph" @@ -24285,25 +82508,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/165", "hash": 4925537010788978399, "orig": "\u00b6 A rather simple similarity metric is to perform a fuzzy comparison of the names of the newly found entities (ie, the name field found in Listing 1). A more sophisticated approach is to use word embeddings to identify if two concepts are similar.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/196" } ], + "sref": "#/texts/158", "text": "\u00b6 A rather simple similarity metric is to perform a fuzzy comparison of the names of the newly found entities (ie, the name field found in Listing 1). A more sophisticated approach is to use word embeddings to identify if two concepts are similar.", "text-hash": 11235784383716113382, "type": "paragraph" @@ -24312,25 +82522,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/166", "hash": 16552665876195410077, "orig": "** For example Neo4J, Titan, JanusGraph, Amazon Neptune, and Arangodb.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/197" } ], + "sref": "#/texts/159", "text": "** For example Neo4J, Titan, JanusGraph, Amazon Neptune, and Arangodb.", "text-hash": 4287966239864749480, "type": "paragraph" @@ -24339,25 +82536,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/167", "hash": 17579390613842440572, "orig": "\u2020\u2020 This memory architecture is clearly documented for Titan (http://s3.thinkaurelius.com/docs/titan/current/data-model.html) and Neo4J (http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html).", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.800000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/198" } ], + "sref": "#/texts/160", "text": "\u2020\u2020 This memory architecture is clearly documented for Titan (http://s3.thinkaurelius.com/docs/titan/current/data-model.html) and Neo4J (http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html).", "text-hash": 5855266272999108487, "type": "paragraph" @@ -24366,25 +82550,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/168", "hash": 722212543953276862, "orig": "\u2021\u2021 We chose Neo4J as a reference since it is currently the most popular graph database solution, see https://db-engines.com/en/ranking_ trend/graph+dbms", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/199" } ], + "sref": "#/texts/161", "text": "\u2021\u2021 We chose Neo4J as a reference since it is currently the most popular graph database solution, see https://db-engines.com/en/ranking_ trend/graph+dbms", "text-hash": 15713827668903361733, "type": "paragraph" @@ -24393,25 +82564,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/169", "hash": 11085577343317113173, "orig": "\u00a7\u00a7 http://graph500.org/", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.8199999928474426 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/200" } ], + "sref": "#/texts/162", "text": "\u00a7\u00a7 http://graph500.org/", "text-hash": 7449211522826545008, "type": "paragraph" @@ -24420,25 +82578,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/170", "hash": 1792096630133661292, "orig": "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.6000000238418579 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/201" } ], + "sref": "#/texts/163", "text": "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html", "text-hash": 16747146533825186967, "type": "paragraph" @@ -24447,25 +82592,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/171", "hash": 11462638369524745676, "orig": "*** We assume the weight can be represented by a float value.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/202" } ], + "sref": "#/texts/164", "text": "*** We assume the weight can be represented by a float value.", "text-hash": 7288340874592977655, "type": "paragraph" @@ -24474,25 +82606,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/172", "hash": 16611805225457383637, "orig": "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.4300000071525574 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/203" } ], + "sref": "#/texts/165", "text": "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", "text-hash": 4512570954370983408, "type": "paragraph" @@ -24501,25 +82620,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/173", "hash": 1531505125666754945, "orig": "\u2021\u2021\u2021 https://www.naturalearthdata.com/", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.6600000262260437 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/204" } ], + "sref": "#/texts/166", "text": "\u2021\u2021\u2021 https://www.naturalearthdata.com/", "text-hash": 16922240937803157180, "type": "paragraph" @@ -24528,25 +82634,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/174", "hash": 15684389308320953629, "orig": "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.6600000262260437 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/205" } ], + "sref": "#/texts/167", "text": "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/", "text-hash": 2845896203864732456, "type": "paragraph" @@ -24555,25 +82648,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/175", "hash": 14590754343934702701, "orig": "REFERENCES", - "properties": { - "data": [ - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/206" } ], + "sref": "#/texts/168", "text": "REFERENCES", "text-hash": 1858797456585454232, "type": "subtitle-level-1" @@ -24582,25 +82662,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/176", "hash": 10480452763767134455, "orig": "1. Staar Peter WJ, Michele D, Christoph A, Costas B. Corpus conversion service: a machine learning platform to ingest documents at scale. KDD '18. New York, NY: ACM; 2018:774-782.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.8299999833106995 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/207" } ], + "sref": "#/texts/169", "text": "1. Staar Peter WJ, Michele D, Christoph A, Costas B. Corpus conversion service: a machine learning platform to ingest documents at scale. KDD '18. New York, NY: ACM; 2018:774-782.", "text-hash": 7982224532612302350, "type": "paragraph" @@ -24609,25 +82676,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/177", "hash": 11866471329779366855, "orig": "2. Staar Peter WJ, Kl BP, Roxana I, et al. Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance. Chicago, IL: IEEE; 2016:812-821.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.949999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/208" } ], + "sref": "#/texts/170", "text": "2. Staar Peter WJ, Kl BP, Roxana I, et al. Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance. Chicago, IL: IEEE; 2016:812-821.", "text-hash": 8969674542364551422, "type": "paragraph" @@ -24636,25 +82690,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/178", "hash": 6016885898370676469, "orig": "3. Matteo M, Christoph A, Val'ery W, et al. An information extraction and knowledge graph platform for accelerating biochemical discoveries. ArXiv.abs/1907.08400; 2019.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.9200000166893005 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/209" } ], + "sref": "#/texts/171", "text": "3. Matteo M, Christoph A, Val'ery W, et al. An information extraction and knowledge graph platform for accelerating biochemical discoveries. ArXiv.abs/1907.08400; 2019.", "text-hash": 12797055744904705040, "type": "paragraph" @@ -24663,25 +82704,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/179", "hash": 13946275785662847920, "orig": "4. Paolo R, Marco P, Floriana B, Peter S, Costas B. Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019). Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10. https://doi. org/10.2118/197610-MS.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.8199999928474426 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/210" } ], + "sref": "#/texts/172", "text": "4. Paolo R, Marco P, Floriana B, Peter S, Costas B. Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019). Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10. https://doi. org/10.2118/197610-MS.", "text-hash": 2278118371277588683, "type": "paragraph" @@ -24690,25 +82718,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/180", "hash": 7693798302433367973, "orig": "5. Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D. Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics; 2016.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.9300000071525574 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/211" } ], + "sref": "#/texts/173", "text": "5. Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D. Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics; 2016.", "text-hash": 13426003943449777376, "type": "paragraph" @@ -24717,25 +82732,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/181", "hash": 3109792572574236398, "orig": "6. Chiu Jason PC, Eric N. Named entity recognition with bidirectional LSTM-CNNs. TACL. 2016;4:357-370.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.949999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/212" } ], + "sref": "#/texts/174", "text": "6. Chiu Jason PC, Eric N. Named entity recognition with bidirectional LSTM-CNNs. TACL. 2016;4:357-370.", "text-hash": 17942512882695875605, "type": "paragraph" @@ -24744,25 +82746,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/182", "hash": 8111170387462350170, "orig": "7. Matthew H, Ines M. spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing. To appear. 2017.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.9200000166893005 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/213" } ], + "sref": "#/texts/175", "text": "7. Matthew H, Ines M. spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing. To appear. 2017.", "text-hash": 15035325662489879393, "type": "paragraph" @@ -24771,25 +82760,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/183", "hash": 14682702346227170925, "orig": "8. Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!), a significant petroleum system, northern Grand Banks area, offshore eastern Canada. Am Assoc Pet Geol Bull. 2005;89(9):1203-1237.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.8600000143051147 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/214" } ], + "sref": "#/texts/176", "text": "8. Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!), a significant petroleum system, northern Grand Banks area, offshore eastern Canada. Am Assoc Pet Geol Bull. 2005;89(9):1203-1237.", "text-hash": 1825488956803771544, "type": "paragraph" @@ -24798,25 +82774,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/184", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/215" } ], + "sref": "#/texts/177", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" @@ -24825,25 +82788,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/185", "hash": 11430385775112165283, "orig": "9. Estrada E. Subgraph centrality in complex networks. Phys Rev E. 2005;71(5):056103.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/218" } ], + "sref": "#/texts/178", "text": "9. Estrada E. Subgraph centrality in complex networks. Phys Rev E. 2005;71(5):056103.", "text-hash": 7383629567386653914, "type": "paragraph" @@ -24852,25 +82802,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/186", "hash": 5825495964576843004, "orig": "10. Estrada Ernesto, Higham Desmond J. (2010). Network Properties Revealed through Matrix Functions. SIAM Review, 52, (4), 696-714. http://dx.doi.org/10.1137/090761070.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.699999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/219" } ], + "sref": "#/texts/179", "text": "10. Estrada Ernesto, Higham Desmond J. (2010). Network Properties Revealed through Matrix Functions. SIAM Review, 52, (4), 696-714. http://dx.doi.org/10.1137/090761070.", "text-hash": 12713726337853489671, "type": "paragraph" @@ -24879,25 +82816,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/187", "hash": 5698421097735371040, "orig": "11. Labs Redis. Benchmarking RedisGraph 1.0. 2019.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.5899999737739563 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/220" } ], + "sref": "#/texts/180", "text": "11. Labs Redis. Benchmarking RedisGraph 1.0. 2019.", "text-hash": 10746649133789046619, "type": "paragraph" @@ -24906,25 +82830,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/188", "hash": 5870535063942256428, "orig": "12. TigerGraph. Real-Time Deep Link Analytics. 2018.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.550000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/221" } ], + "sref": "#/texts/181", "text": "12. TigerGraph. Real-Time Deep Link Analytics. 2018.", "text-hash": 12596629408176592215, "type": "paragraph" @@ -24933,25 +82844,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/189", "hash": 18196767266655606709, "orig": "13. Jeremy K, John G. Graph Algorithms in the Language of Linear Algebra. Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.949999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/222" } ], + "sref": "#/texts/182", "text": "13. Jeremy K, John G. Graph Algorithms in the Language of Linear Algebra. Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011.", "text-hash": 4940703957630358736, "type": "paragraph" @@ -24960,25 +82858,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/190", "hash": 3623403683642367845, "orig": "14. Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning (2015). Graphs, Matrices, and the GraphBLAS: Seven Good Reasons. Procedia Computer Science, 51, 2453-2462. http://dx.doi.org/10.1016/j.procs.2015.05.353.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.7799999713897705 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/223" } ], + "sref": "#/texts/183", "text": "14. Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning (2015). Graphs, Matrices, and the GraphBLAS: Seven Good Reasons. Procedia Computer Science, 51, 2453-2462. http://dx.doi.org/10.1016/j.procs.2015.05.353.", "text-hash": 1288017376570396064, "type": "paragraph" @@ -24987,25 +82872,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/191", "hash": 13936866850854297069, "orig": "15. Aydin B, Gilbert John R. The combinatorial BLAS: design, implementation, and applications. Int J High Perform Comput Appl. 2011;25 (4):496-509.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/224" } ], + "sref": "#/texts/184", "text": "15. Aydin B, Gilbert John R. The combinatorial BLAS: design, implementation, and applications. Int J High Perform Comput Appl. 2011;25 (4):496-509.", "text-hash": 2215522210708998936, "type": "paragraph" @@ -25014,25 +82886,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/192", "hash": 8497015665124263236, "orig": "16. Jeremy K, Peter A, Bader David A, et al. Mathematical foundations of the GraphBLAS. 2016 IEEE HPEC. 2016; 1-9.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/225" } ], + "sref": "#/texts/185", "text": "16. Jeremy K, Peter A, Bader David A, et al. Mathematical foundations of the GraphBLAS. 2016 IEEE HPEC. 2016; 1-9.", "text-hash": 14644960259055240063, "type": "paragraph" @@ -25041,25 +82900,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/193", "hash": 15947529491299956047, "orig": "17. Ariful A, Mathias J, Aydin B, Ng Esmond G. The reverse Cuthill-McKee algorithm in distributed-memory. 2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 2017: 22-31.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.7900000214576721 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/226" } ], + "sref": "#/texts/186", "text": "17. Ariful A, Mathias J, Aydin B, Ng Esmond G. The reverse Cuthill-McKee algorithm in distributed-memory. 2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 2017: 22-31.", "text-hash": 2515131343544103798, "type": "paragraph" @@ -25068,25 +82914,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/194", "hash": 14843401725435831033, "orig": "18. Rukhsana S, Anila U, Chughtai IR. Review of storage techniques for sparse matrices. 2005 Pakistan Section Multitopic Conference. 2005 1-7.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.6600000262260437 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/227" } ], + "sref": "#/texts/187", "text": "18. Rukhsana S, Anila U, Chughtai IR. Review of storage techniques for sparse matrices. 2005 Pakistan Section Multitopic Conference. 2005 1-7.", "text-hash": 1389998498969001988, "type": "paragraph" @@ -25095,25 +82928,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/195", "hash": 16676439669743530711, "orig": "19. Welte DH, Horsfield B, Baker DR. Petroleum and Basin Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling, Berlin Heidelberg: Springer-Verlag; 1997.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.8899999856948853 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/228" } ], + "sref": "#/texts/188", "text": "19. Welte DH, Horsfield B, Baker DR. Petroleum and Basin Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling, Berlin Heidelberg: Springer-Verlag; 1997.", "text-hash": 4375808543141490670, "type": "paragraph" @@ -25122,25 +82942,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/196", "hash": 2986547206451163051, "orig": "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora. Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.699999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/229" } ], + "sref": "#/texts/189", "text": "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora. Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", "text-hash": 17781974298360978642, "type": "paragraph" @@ -25149,25 +82956,12 @@ "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/197", "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/230" } ], + "sref": "#/texts/190", "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" diff --git a/tests/data/texts/references.nlp.jsonl b/tests/data/texts/references.nlp.jsonl index b22a3472..e29c15e6 100644 --- a/tests/data/texts/references.nlp.jsonl +++ b/tests/data/texts/references.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["reference", "citation-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3089690646178643593, 8143668872857370346, 18446744073709551615, 18446744073709551615, 4, 16, 4, 16, 3, 6, true, "J. Nagamatsu", "J. Nagamatsu"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 4958313984110456424, 12758216704979571657, 18446744073709551615, 18446744073709551615, 18, 29, 18, 29, 7, 10, true, "N. Nakagawa", "N. Nakagawa"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 11365016642846088308, 8423174012903247560, 18446744073709551615, 18446744073709551615, 31, 42, 31, 42, 11, 14, true, "T. Muranaka", "T. Muranaka"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3078907282781552519, 13717640772378057531, 18446744073709551615, 18446744073709551615, 44, 55, 44, 55, 15, 18, true, "Y. Zenitani", "Y. Zenitani"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 1988581422311921121, 4091673062415006471, 18446744073709551615, 18446744073709551615, 61, 72, 61, 72, 20, 23, true, "J. Akimitsu", "J. Akimitsu"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164561, 18446744073709551615, 18446744073709551615, 74, 120, 74, 120, 24, 31, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, 18446744073709551615, 18446744073709551615, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "volume", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, 18446744073709551615, 18446744073709551615, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["reference", "pages", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", "reference", 0.89]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, 18446744073709551615, 18446744073709551615, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, 18446744073709551615, 18446744073709551615, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, 18446744073709551615, 18446744073709551615, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, 18446744073709551615, 18446744073709551615, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, 18446744073709551615, 18446744073709551615, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, 18446744073709551615, 18446744073709551615, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, 18446744073709551615, 18446744073709551615, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, 18446744073709551615, 18446744073709551615, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, 18446744073709551615, 18446744073709551615, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, 18446744073709551615, 18446744073709551615, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, 18446744073709551615, 18446744073709551615, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"], ["reference", "volume", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, 18446744073709551615, 18446744073709551615, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["reference", "pages", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, 18446744073709551615, 18446744073709551615, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", "reference", 0.94]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 14523797031010145779, "instances": {"data": [["reference", "citation-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3089690646178643593, 8143668872857370346, 18446744073709551615, 18446744073709551615, 4, 16, 4, 16, 3, 6, true, "J. Nagamatsu", "J. Nagamatsu"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 4958313984110456424, 12758216704979571657, 18446744073709551615, 18446744073709551615, 18, 29, 18, 29, 7, 10, true, "N. Nakagawa", "N. Nakagawa"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 11365016642846088308, 8423174012903247560, 18446744073709551615, 18446744073709551615, 31, 42, 31, 42, 11, 14, true, "T. Muranaka", "T. Muranaka"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3078907282781552519, 13717640772378057531, 18446744073709551615, 18446744073709551615, 44, 55, 44, 55, 15, 18, true, "Y. Zenitani", "Y. Zenitani"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 1988581422311921121, 4091673062415006471, 18446744073709551615, 18446744073709551615, 61, 72, 61, 72, 20, 23, true, "J. Akimitsu", "J. Akimitsu"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164561, 18446744073709551615, 18446744073709551615, 74, 120, 74, 120, 24, 31, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, 18446744073709551615, 18446744073709551615, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.89]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 4183773491823524238, "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, 18446744073709551615, 18446744073709551615, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, 18446744073709551615, 18446744073709551615, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, 18446744073709551615, 18446744073709551615, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, 18446744073709551615, 18446744073709551615, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, 18446744073709551615, 18446744073709551615, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, 18446744073709551615, 18446744073709551615, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, 18446744073709551615, 18446744073709551615, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, 18446744073709551615, 18446744073709551615, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, 18446744073709551615, 18446744073709551615, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, 18446744073709551615, 18446744073709551615, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, 18446744073709551615, 18446744073709551615, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.94]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/semantics.nlp.jsonl b/tests/data/texts/semantics.nlp.jsonl index 3e879094..198e9ed0 100644 --- a/tests/data/texts/semantics.nlp.jsonl +++ b/tests/data/texts/semantics.nlp.jsonl @@ -1,7 +1,7 @@ -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 7759316032128614217, "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", "header", 0.71]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14339562343989983509, "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", "meta-data", 0.8]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 18143996061359107703, "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", "meta-data", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "text"} -{"applied-models": ["link", "numval"], "dloc": "", "hash": 11035282656876697300, "model-application": {"message": "success", "success": true}, "orig": "ABSTRACT", "properties": {"data": [["semantic", "meta-data", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14993488697470108654, "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, 18446744073709551615, 18446744073709551615, 447, 449, 447, 449, 81, 82, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, 18446744073709551615, 18446744073709551615, 599, 602, 599, 602, 106, 107, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", "text", 0.96]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 164218115435155290, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481728, 16379900111711101126, 18446744073709551615, 18446744073709551615, 95, 97, 95, 97, 26, 27, true, "39", "39"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, 18446744073709551615, 18446744073709551615, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["numval", "year", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", "reference", 0.89]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, 18446744073709551615, 18446744073709551615, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, 18446744073709551615, 18446744073709551615, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, 18446744073709551615, 18446744073709551615, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", "reference", 0.94]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 7759316032128614217, "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", 7759316032128614217, "TEXT", "#", "header", 0.71]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 14339562343989983509, "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", 14339562343989983509, "TEXT", "#", "meta-data", 0.8]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 18143996061359107703, "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", 18143996061359107703, "TEXT", "#", "meta-data", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "text"} +{"applied-models": ["link", "numval"], "dloc": "#", "hash": 11035282656876697300, "model-application": {"message": "success", "success": true}, "orig": "ABSTRACT", "properties": {"data": [["semantic", 11035282656876697300, "TEXT", "#", "meta-data", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 14993488697470108654, "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, 18446744073709551615, 18446744073709551615, 447, 449, 447, 449, 81, 82, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, 18446744073709551615, 18446744073709551615, 599, 602, 599, 602, 106, 107, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", 14993488697470108654, "TEXT", "#", "text", 0.96]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 164218115435155290, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 14523797031010145779, "instances": {"data": [["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481728, 16379900111711101126, 18446744073709551615, 18446744073709551615, 95, 97, 95, 97, 26, 27, true, "39", "39"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, 18446744073709551615, 18446744073709551615, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["numval", "year", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.89]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} +{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 4183773491823524238, "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, 18446744073709551615, 18446744073709551615, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, 18446744073709551615, 18446744073709551615, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, 18446744073709551615, 18446744073709551615, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.94]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/terms.nlp.jsonl b/tests/data/texts/terms.nlp.jsonl index 9c8b9ec3..de76c201 100644 --- a/tests/data/texts/terms.nlp.jsonl +++ b/tests/data/texts/terms.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 9818235231875948258, "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, 18446744073709551615, 18446744073709551615, 0, 177, 0, 164, 0, 35, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, 18446744073709551615, 18446744073709551615, 7, 31, 7, 26, 1, 7, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, 18446744073709551615, 18446744073709551615, 27, 30, 24, 25, 5, 6, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, 18446744073709551615, 18446744073709551615, 48, 63, 43, 58, 10, 12, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, 18446744073709551615, 18446744073709551615, 64, 122, 59, 109, 12, 22, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, 18446744073709551615, 18446744073709551615, 73, 95, 68, 88, 15, 17, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, 18446744073709551615, 18446744073709551615, 96, 106, 89, 98, 17, 19, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, 18446744073709551615, 18446744073709551615, 107, 121, 99, 108, 19, 21, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, 18446744073709551615, 18446744073709551615, 123, 127, 110, 114, 23, 26, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, 18446744073709551615, 18446744073709551615, 124, 126, 111, 113, 24, 25, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, 18446744073709551615, 18446744073709551615, 128, 130, 115, 117, 26, 27, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, 18446744073709551615, 18446744073709551615, 133, 140, 120, 127, 28, 29, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, 18446744073709551615, 18446744073709551615, 141, 158, 128, 145, 29, 31, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, 18446744073709551615, 18446744073709551615, 159, 161, 146, 148, 31, 32, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, 18446744073709551615, 18446744073709551615, 162, 176, 149, 163, 32, 34, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, 18446744073709551615, 18446744073709551615, 170, 176, 157, 163, 33, 34, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, 18446744073709551615, 18446744073709551615, 178, 375, 165, 362, 35, 67, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, 18446744073709551615, 18446744073709551615, 186, 194, 173, 181, 37, 38, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, 18446744073709551615, 18446744073709551615, 195, 211, 182, 198, 38, 40, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, 18446744073709551615, 18446744073709551615, 204, 227, 191, 214, 39, 42, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, 18446744073709551615, 18446744073709551615, 216, 227, 203, 214, 41, 42, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, 18446744073709551615, 18446744073709551615, 228, 234, 215, 221, 42, 44, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, 18446744073709551615, 18446744073709551615, 235, 243, 222, 230, 44, 45, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16827399947339178045, 496128657873109341, 18446744073709551615, 18446744073709551615, 252, 293, 239, 280, 47, 53, true, "Atlantic, Pacific and Indian oceans,[XII]", "Atlantic, Pacific and Indian oceans,[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, 18446744073709551615, 18446744073709551615, 252, 260, 239, 247, 47, 48, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, 18446744073709551615, 18446744073709551615, 262, 269, 249, 256, 49, 50, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 3575373331357445963, 1702692810903063225, 18446744073709551615, 18446744073709551615, 274, 293, 261, 280, 51, 53, true, "Indian oceans,[XII]", "Indian oceans,[XII]"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, 18446744073709551615, 18446744073709551615, 281, 293, 268, 280, 52, 53, true, "oceans,[XII]", "oceans,[XII]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 16381206561323757770, 14007677850696664277, 18446744073709551615, 18446744073709551615, 294, 300, 281, 287, 53, 54, true, "giving", "giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, 18446744073709551615, 18446744073709551615, 308, 314, 295, 301, 56, 58, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, 18446744073709551615, 18446744073709551615, 315, 361, 302, 348, 58, 63, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, 18446744073709551615, 18446744073709551615, 362, 368, 349, 355, 63, 65, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, 18446744073709551615, 18446744073709551615, 369, 374, 356, 361, 65, 66, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, 18446744073709551615, 18446744073709551615, 376, 637, 363, 624, 67, 118, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, 18446744073709551615, 18446744073709551615, 376, 410, 363, 397, 67, 71, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, 18446744073709551615, 18446744073709551615, 389, 395, 376, 382, 68, 69, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, 18446744073709551615, 18446744073709551615, 411, 415, 398, 402, 71, 72, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, 18446744073709551615, 18446744073709551615, 416, 438, 403, 425, 72, 75, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, 18446744073709551615, 18446744073709551615, 439, 445, 426, 432, 75, 77, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, 18446744073709551615, 18446744073709551615, 446, 451, 433, 438, 77, 78, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, 18446744073709551615, 18446744073709551615, 461, 467, 448, 454, 80, 82, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, 18446744073709551615, 18446744073709551615, 492, 498, 479, 485, 86, 88, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, 18446744073709551615, 18446744073709551615, 505, 521, 492, 508, 90, 93, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, 18446744073709551615, 18446744073709551615, 515, 521, 502, 508, 92, 93, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, 18446744073709551615, 18446744073709551615, 522, 528, 509, 515, 93, 95, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, 18446744073709551615, 18446744073709551615, 541, 558, 528, 545, 98, 101, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, 18446744073709551615, 18446744073709551615, 559, 565, 546, 552, 101, 103, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, 18446744073709551615, 18446744073709551615, 566, 571, 553, 558, 103, 104, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, 18446744073709551615, 18446744073709551615, 579, 594, 566, 581, 107, 109, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, 18446744073709551615, 18446744073709551615, 595, 603, 582, 590, 109, 111, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, 18446744073709551615, 18446744073709551615, 619, 625, 606, 612, 113, 115, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, 18446744073709551615, 18446744073709551615, 626, 636, 613, 623, 115, 117, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, 18446744073709551615, 18446744073709551615, 638, 961, 625, 948, 118, 176, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, 18446744073709551615, 18446744073709551615, 642, 659, 629, 646, 119, 121, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, 18446744073709551615, 18446744073709551615, 660, 667, 647, 654, 121, 122, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, 18446744073709551615, 18446744073709551615, 668, 676, 655, 663, 122, 124, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, 18446744073709551615, 18446744073709551615, 677, 682, 664, 669, 124, 125, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, 18446744073709551615, 18446744073709551615, 683, 689, 670, 676, 125, 127, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, 18446744073709551615, 18446744073709551615, 709, 717, 696, 704, 130, 132, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, 18446744073709551615, 18446744073709551615, 736, 742, 723, 729, 134, 136, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, 18446744073709551615, 18446744073709551615, 778, 798, 765, 785, 143, 145, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, 18446744073709551615, 18446744073709551615, 799, 806, 786, 793, 145, 146, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, 18446744073709551615, 18446744073709551615, 807, 820, 794, 807, 146, 148, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, 18446744073709551615, 18446744073709551615, 821, 823, 808, 810, 148, 149, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, 18446744073709551615, 18446744073709551615, 824, 864, 811, 851, 149, 156, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, 18446744073709551615, 18446744073709551615, 839, 851, 826, 838, 152, 154, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, 18446744073709551615, 18446744073709551615, 856, 864, 843, 851, 155, 156, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, 18446744073709551615, 18446744073709551615, 865, 871, 852, 858, 156, 158, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, 18446744073709551615, 18446744073709551615, 872, 886, 859, 873, 158, 160, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, 18446744073709551615, 18446744073709551615, 892, 910, 879, 897, 162, 165, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, 18446744073709551615, 18446744073709551615, 916, 928, 903, 915, 167, 169, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, 18446744073709551615, 18446744073709551615, 929, 931, 916, 918, 169, 170, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, 18446744073709551615, 18446744073709551615, 962, 1384, 949, 1371, 176, 254, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, 18446744073709551615, 18446744073709551615, 966, 991, 953, 978, 177, 180, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, 18446744073709551615, 18446744073709551615, 992, 1020, 979, 1007, 180, 187, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, 18446744073709551615, 18446744073709551615, 998, 1000, 985, 987, 182, 183, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, 18446744073709551615, 18446744073709551615, 1007, 1019, 994, 1006, 184, 186, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, 18446744073709551615, 18446744073709551615, 1021, 1025, 1008, 1012, 187, 188, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, 18446744073709551615, 18446744073709551615, 1028, 1036, 1015, 1023, 189, 190, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, 18446744073709551615, 18446744073709551615, 1037, 1041, 1024, 1028, 190, 191, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, 18446744073709551615, 18446744073709551615, 1042, 1044, 1029, 1031, 191, 192, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, 18446744073709551615, 18446744073709551615, 1045, 1052, 1032, 1039, 192, 193, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, 18446744073709551615, 18446744073709551615, 1057, 1072, 1044, 1059, 194, 199, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, 18446744073709551615, 18446744073709551615, 1058, 1065, 1045, 1052, 195, 196, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, 18446744073709551615, 18446744073709551615, 1066, 1071, 1053, 1058, 196, 198, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, 18446744073709551615, 18446744073709551615, 1077, 1081, 1064, 1068, 200, 201, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, 18446744073709551615, 18446744073709551615, 1084, 1100, 1071, 1087, 202, 204, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, 18446744073709551615, 18446744073709551615, 1101, 1103, 1088, 1090, 204, 205, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, 18446744073709551615, 18446744073709551615, 1104, 1108, 1091, 1095, 205, 206, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, 18446744073709551615, 18446744073709551615, 1109, 1111, 1096, 1098, 206, 207, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, 18446744073709551615, 18446744073709551615, 1112, 1119, 1099, 1106, 207, 208, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, 18446744073709551615, 18446744073709551615, 1120, 1122, 1107, 1109, 208, 209, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, 18446744073709551615, 18446744073709551615, 1123, 1125, 1110, 1112, 209, 210, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, 18446744073709551615, 18446744073709551615, 1126, 1133, 1113, 1120, 210, 211, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, 18446744073709551615, 18446744073709551615, 1134, 1145, 1121, 1132, 211, 212, true, "2023.[5][8]", "2023.[5][8]"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, 18446744073709551615, 18446744073709551615, 1153, 1155, 1140, 1142, 213, 214, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 101756270285429158, 6309445736017161690, 18446744073709551615, 18446744073709551615, 1158, 1192, 1145, 1179, 215, 218, true, "unitary semi-presidential republic", "unitary semi-presidential republic"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, 18446744073709551615, 18446744073709551615, 1166, 1183, 1153, 1170, 216, 217, true, "semi-presidential", "semi-presidential"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, 18446744073709551615, 18446744073709551615, 1193, 1197, 1180, 1184, 218, 219, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, 18446744073709551615, 18446744073709551615, 1202, 1209, 1189, 1196, 220, 221, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, 18446744073709551615, 18446744073709551615, 1210, 1212, 1197, 1199, 221, 222, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, 18446744073709551615, 18446744073709551615, 1213, 1218, 1200, 1205, 222, 223, true, "Paris", "Paris"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7060736712546470087, 14254659311922306724, 18446744073709551615, 18446744073709551615, 1224, 1246, 1211, 1233, 225, 228, true, "countrys largest city", "country's largest city"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, 18446744073709551615, 18446744073709551615, 1224, 1233, 1211, 1220, 225, 226, true, "countrys", "country's"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, 18446744073709551615, 18446744073709551615, 1251, 1286, 1238, 1273, 229, 234, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, 18446744073709551615, 18446744073709551615, 1269, 1286, 1256, 1273, 232, 234, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, 18446744073709551615, 18446744073709551615, 1288, 1311, 1275, 1298, 235, 239, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, 18446744073709551615, 18446744073709551615, 1312, 1319, 1299, 1306, 239, 240, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, 18446744073709551615, 18446744073709551615, 1320, 1383, 1307, 1370, 240, 253, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, 18446744073709551615, 18446744073709551615, 1320, 1329, 1307, 1316, 240, 241, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, 18446744073709551615, 18446744073709551615, 1331, 1335, 1318, 1322, 242, 243, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, 18446744073709551615, 18446744073709551615, 1337, 1345, 1324, 1332, 244, 245, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, 18446744073709551615, 18446744073709551615, 1347, 1352, 1334, 1339, 246, 247, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, 18446744073709551615, 18446744073709551615, 1354, 1362, 1341, 1349, 248, 249, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, 18446744073709551615, 18446744073709551615, 1364, 1374, 1351, 1361, 250, 251, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, 18446744073709551615, 18446744073709551615, 1379, 1383, 1366, 1370, 252, 253, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", "en", 0.93], ["semantic", "text", 0.96]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 4522339299074192207, "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, 18446744073709551615, 18446744073709551615, 0, 188, 0, 188, 0, 28, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, 18446744073709551615, 18446744073709551615, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, 18446744073709551615, 18446744073709551615, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, 18446744073709551615, 18446744073709551615, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004679635976, 18446744073709551615, 18446744073709551615, 24, 41, 24, 41, 5, 7, true, "interband pairing", "interband pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, 18446744073709551615, 18446744073709551615, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["term", "enum-term-mark-1", 4522339299074192207, "TEXT", "#", 1.0, 18178792033664231045, 5215905145529509301, 18446744073709551615, 18446744073709551615, 45, 87, 45, 87, 8, 13, true, "two-band s-wave and d-wave superconductors", "two-band s-wave and d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, 18446744073709551615, 18446744073709551615, 45, 53, 45, 53, 8, 9, true, "two-band", "two-band"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, 18446744073709551615, 18446744073709551615, 54, 60, 54, 60, 9, 10, true, "s-wave", "s-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15865120430118694837, 607662791561950043, 18446744073709551615, 18446744073709551615, 65, 87, 65, 87, 11, 13, true, "d-wave superconductors", "d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, 18446744073709551615, 18446744073709551615, 65, 71, 65, 71, 11, 12, true, "d-wave", "d-wave"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, 18446744073709551615, 18446744073709551615, 88, 92, 88, 92, 13, 14, true, "with", "with"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7066208506210013514, 1315102098090612032, 18446744073709551615, 18446744073709551615, 93, 105, 93, 105, 14, 16, true, "D4h symmetry", "D4h symmetry"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, 18446744073709551615, 18446744073709551615, 93, 96, 93, 96, 14, 15, true, "D4h", "D4h"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, 18446744073709551615, 18446744073709551615, 106, 113, 106, 113, 16, 18, true, "in both", "in both"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5172475826427571765, 16752879714615995236, 18446744073709551615, 18446744073709551615, 114, 137, 114, 137, 18, 20, true, "time-reversal invariant", "time-reversal invariant"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, 18446744073709551615, 18446744073709551615, 114, 127, 114, 127, 18, 19, true, "time-reversal", "time-reversal"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, 18446744073709551615, 18446744073709551615, 146, 148, 146, 148, 22, 23, true, "as", "as"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 10193294999568911218, 6331719907444433820, 18446744073709551615, 18446744073709551615, 149, 171, 149, 171, 23, 25, true, "time-reversal symmetry", "time-reversal symmetry"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, 18446744073709551615, 18446744073709551615, 149, 162, 149, 162, 23, 24, true, "time-reversal", "time-reversal"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, 18446744073709551615, 18446744073709551615, 172, 180, 172, 180, 25, 26, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, 18446744073709551615, 18446744073709551615, 181, 187, 181, 187, 26, 27, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, 18446744073709551615, 18446744073709551615, 189, 384, 189, 384, 28, 58, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, 18446744073709551615, 18446744073709551615, 193, 201, 193, 201, 29, 30, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 30, 31, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, 18446744073709551615, 18446744073709551615, 205, 214, 205, 214, 31, 32, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, 18446744073709551615, 18446744073709551615, 215, 244, 215, 244, 32, 35, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, 18446744073709551615, 18446744073709551615, 249, 264, 249, 264, 36, 38, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, 18446744073709551615, 18446744073709551615, 265, 271, 265, 271, 38, 40, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, 18446744073709551615, 18446744073709551615, 272, 286, 272, 286, 40, 41, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, 18446744073709551615, 18446744073709551615, 288, 293, 288, 293, 42, 43, true, "nodes", "nodes"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 3766089650286616147, 5895288868427388531, 18446744073709551615, 18446744073709551615, 294, 309, 294, 309, 43, 45, true, "can (dis)appear", "can (dis)appear"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, 18446744073709551615, 18446744073709551615, 298, 309, 298, 309, 44, 45, true, "(dis)appear", "(dis)appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, 18446744073709551615, 18446744073709551615, 311, 316, 311, 316, 46, 47, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, 18446744073709551615, 18446744073709551615, 322, 327, 322, 327, 49, 50, true, "leave", "leave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106670696871780136, 17807492235586576248, 18446744073709551615, 18446744073709551615, 328, 351, 328, 351, 50, 52, true, "high-symmetry locations", "high-symmetry locations"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, 18446744073709551615, 18446744073709551615, 328, 341, 328, 341, 50, 51, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, 18446744073709551615, 18446744073709551615, 357, 374, 357, 374, 53, 55, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, 18446744073709551615, 18446744073709551615, 375, 383, 375, 383, 55, 57, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, 18446744073709551615, 18446744073709551615, 385, 594, 385, 594, 58, 93, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, 18446744073709551615, 18446744073709551615, 398, 404, 398, 404, 60, 62, true, "in the", "in the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15559186615879240368, 12910915472651789195, 18446744073709551615, 18446744073709551615, 405, 416, 405, 416, 62, 64, true, "d-wave case", "d-wave case"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, 18446744073709551615, 18446744073709551615, 405, 411, 405, 411, 62, 63, true, "d-wave", "d-wave"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, 18446744073709551615, 18446744073709551615, 421, 425, 421, 425, 66, 67, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, 18446744073709551615, 18446744073709551615, 426, 430, 426, 430, 67, 68, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, 18446744073709551615, 18446744073709551615, 440, 454, 440, 454, 70, 72, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, 18446744073709551615, 18446744073709551615, 455, 475, 455, 475, 72, 74, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, 18446744073709551615, 18446744073709551615, 481, 490, 481, 490, 75, 76, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, 18446744073709551615, 18446744073709551615, 491, 498, 491, 498, 76, 77, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, 18446744073709551615, 18446744073709551615, 499, 508, 499, 508, 77, 78, true, "increases", "increases"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 18352755674675419019, 8051640294707098683, 18446744073709551615, 18446744073709551615, 510, 547, 510, 547, 79, 84, true, "flat zero-energy Andreev bound states", "flat zero-energy Andreev bound states"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, 18446744073709551615, 18446744073709551615, 515, 526, 515, 526, 80, 81, true, "zero-energy", "zero-energy"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, 18446744073709551615, 18446744073709551615, 548, 555, 548, 555, 84, 86, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, 18446744073709551615, 18446744073709551615, 560, 570, 560, 570, 87, 88, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, 18446744073709551615, 18446744073709551615, 571, 573, 571, 573, 88, 89, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, 18446744073709551615, 18446744073709551615, 574, 593, 574, 593, 89, 92, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", "en", 0.88], ["semantic", "text", 0.99]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} +{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "hash": 9818235231875948258, "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, 18446744073709551615, 18446744073709551615, 0, 177, 0, 164, 0, 35, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, 18446744073709551615, 18446744073709551615, 7, 31, 7, 26, 1, 7, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, 18446744073709551615, 18446744073709551615, 27, 30, 24, 25, 5, 6, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, 18446744073709551615, 18446744073709551615, 48, 63, 43, 58, 10, 12, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, 18446744073709551615, 18446744073709551615, 64, 122, 59, 109, 12, 22, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, 18446744073709551615, 18446744073709551615, 73, 95, 68, 88, 15, 17, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, 18446744073709551615, 18446744073709551615, 96, 106, 89, 98, 17, 19, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, 18446744073709551615, 18446744073709551615, 107, 121, 99, 108, 19, 21, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, 18446744073709551615, 18446744073709551615, 123, 127, 110, 114, 23, 26, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, 18446744073709551615, 18446744073709551615, 124, 126, 111, 113, 24, 25, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, 18446744073709551615, 18446744073709551615, 128, 130, 115, 117, 26, 27, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, 18446744073709551615, 18446744073709551615, 133, 140, 120, 127, 28, 29, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, 18446744073709551615, 18446744073709551615, 141, 158, 128, 145, 29, 31, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, 18446744073709551615, 18446744073709551615, 159, 161, 146, 148, 31, 32, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, 18446744073709551615, 18446744073709551615, 162, 176, 149, 163, 32, 34, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, 18446744073709551615, 18446744073709551615, 170, 176, 157, 163, 33, 34, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, 18446744073709551615, 18446744073709551615, 178, 375, 165, 362, 35, 67, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, 18446744073709551615, 18446744073709551615, 186, 194, 173, 181, 37, 38, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, 18446744073709551615, 18446744073709551615, 195, 211, 182, 198, 38, 40, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, 18446744073709551615, 18446744073709551615, 204, 227, 191, 214, 39, 42, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, 18446744073709551615, 18446744073709551615, 216, 227, 203, 214, 41, 42, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, 18446744073709551615, 18446744073709551615, 228, 234, 215, 221, 42, 44, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, 18446744073709551615, 18446744073709551615, 235, 243, 222, 230, 44, 45, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16827399947339178045, 496128657873109341, 18446744073709551615, 18446744073709551615, 252, 293, 239, 280, 47, 53, true, "Atlantic, Pacific and Indian oceans,[XII]", "Atlantic, Pacific and Indian oceans,[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, 18446744073709551615, 18446744073709551615, 252, 260, 239, 247, 47, 48, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, 18446744073709551615, 18446744073709551615, 262, 269, 249, 256, 49, 50, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 3575373331357445963, 1702692810903063225, 18446744073709551615, 18446744073709551615, 274, 293, 261, 280, 51, 53, true, "Indian oceans,[XII]", "Indian oceans,[XII]"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, 18446744073709551615, 18446744073709551615, 281, 293, 268, 280, 52, 53, true, "oceans,[XII]", "oceans,[XII]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 16381206561323757770, 14007677850696664277, 18446744073709551615, 18446744073709551615, 294, 300, 281, 287, 53, 54, true, "giving", "giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, 18446744073709551615, 18446744073709551615, 308, 314, 295, 301, 56, 58, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, 18446744073709551615, 18446744073709551615, 315, 361, 302, 348, 58, 63, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, 18446744073709551615, 18446744073709551615, 362, 368, 349, 355, 63, 65, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, 18446744073709551615, 18446744073709551615, 369, 374, 356, 361, 65, 66, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, 18446744073709551615, 18446744073709551615, 376, 637, 363, 624, 67, 118, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, 18446744073709551615, 18446744073709551615, 376, 410, 363, 397, 67, 71, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, 18446744073709551615, 18446744073709551615, 389, 395, 376, 382, 68, 69, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, 18446744073709551615, 18446744073709551615, 411, 415, 398, 402, 71, 72, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, 18446744073709551615, 18446744073709551615, 416, 438, 403, 425, 72, 75, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, 18446744073709551615, 18446744073709551615, 439, 445, 426, 432, 75, 77, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, 18446744073709551615, 18446744073709551615, 446, 451, 433, 438, 77, 78, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, 18446744073709551615, 18446744073709551615, 461, 467, 448, 454, 80, 82, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, 18446744073709551615, 18446744073709551615, 492, 498, 479, 485, 86, 88, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, 18446744073709551615, 18446744073709551615, 505, 521, 492, 508, 90, 93, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, 18446744073709551615, 18446744073709551615, 515, 521, 502, 508, 92, 93, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, 18446744073709551615, 18446744073709551615, 522, 528, 509, 515, 93, 95, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, 18446744073709551615, 18446744073709551615, 541, 558, 528, 545, 98, 101, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, 18446744073709551615, 18446744073709551615, 559, 565, 546, 552, 101, 103, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, 18446744073709551615, 18446744073709551615, 566, 571, 553, 558, 103, 104, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, 18446744073709551615, 18446744073709551615, 579, 594, 566, 581, 107, 109, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, 18446744073709551615, 18446744073709551615, 595, 603, 582, 590, 109, 111, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, 18446744073709551615, 18446744073709551615, 619, 625, 606, 612, 113, 115, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, 18446744073709551615, 18446744073709551615, 626, 636, 613, 623, 115, 117, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, 18446744073709551615, 18446744073709551615, 638, 961, 625, 948, 118, 176, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, 18446744073709551615, 18446744073709551615, 642, 659, 629, 646, 119, 121, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, 18446744073709551615, 18446744073709551615, 660, 667, 647, 654, 121, 122, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, 18446744073709551615, 18446744073709551615, 668, 676, 655, 663, 122, 124, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, 18446744073709551615, 18446744073709551615, 677, 682, 664, 669, 124, 125, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, 18446744073709551615, 18446744073709551615, 683, 689, 670, 676, 125, 127, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, 18446744073709551615, 18446744073709551615, 709, 717, 696, 704, 130, 132, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, 18446744073709551615, 18446744073709551615, 736, 742, 723, 729, 134, 136, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, 18446744073709551615, 18446744073709551615, 778, 798, 765, 785, 143, 145, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, 18446744073709551615, 18446744073709551615, 799, 806, 786, 793, 145, 146, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, 18446744073709551615, 18446744073709551615, 807, 820, 794, 807, 146, 148, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, 18446744073709551615, 18446744073709551615, 821, 823, 808, 810, 148, 149, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, 18446744073709551615, 18446744073709551615, 824, 864, 811, 851, 149, 156, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, 18446744073709551615, 18446744073709551615, 839, 851, 826, 838, 152, 154, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, 18446744073709551615, 18446744073709551615, 856, 864, 843, 851, 155, 156, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, 18446744073709551615, 18446744073709551615, 865, 871, 852, 858, 156, 158, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, 18446744073709551615, 18446744073709551615, 872, 886, 859, 873, 158, 160, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, 18446744073709551615, 18446744073709551615, 892, 910, 879, 897, 162, 165, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, 18446744073709551615, 18446744073709551615, 916, 928, 903, 915, 167, 169, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, 18446744073709551615, 18446744073709551615, 929, 931, 916, 918, 169, 170, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, 18446744073709551615, 18446744073709551615, 962, 1384, 949, 1371, 176, 254, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, 18446744073709551615, 18446744073709551615, 966, 991, 953, 978, 177, 180, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, 18446744073709551615, 18446744073709551615, 992, 1020, 979, 1007, 180, 187, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, 18446744073709551615, 18446744073709551615, 998, 1000, 985, 987, 182, 183, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, 18446744073709551615, 18446744073709551615, 1007, 1019, 994, 1006, 184, 186, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, 18446744073709551615, 18446744073709551615, 1021, 1025, 1008, 1012, 187, 188, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, 18446744073709551615, 18446744073709551615, 1028, 1036, 1015, 1023, 189, 190, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, 18446744073709551615, 18446744073709551615, 1037, 1041, 1024, 1028, 190, 191, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, 18446744073709551615, 18446744073709551615, 1042, 1044, 1029, 1031, 191, 192, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, 18446744073709551615, 18446744073709551615, 1045, 1052, 1032, 1039, 192, 193, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, 18446744073709551615, 18446744073709551615, 1057, 1072, 1044, 1059, 194, 199, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, 18446744073709551615, 18446744073709551615, 1058, 1065, 1045, 1052, 195, 196, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, 18446744073709551615, 18446744073709551615, 1066, 1071, 1053, 1058, 196, 198, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, 18446744073709551615, 18446744073709551615, 1077, 1081, 1064, 1068, 200, 201, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, 18446744073709551615, 18446744073709551615, 1084, 1100, 1071, 1087, 202, 204, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, 18446744073709551615, 18446744073709551615, 1101, 1103, 1088, 1090, 204, 205, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, 18446744073709551615, 18446744073709551615, 1104, 1108, 1091, 1095, 205, 206, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, 18446744073709551615, 18446744073709551615, 1109, 1111, 1096, 1098, 206, 207, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, 18446744073709551615, 18446744073709551615, 1112, 1119, 1099, 1106, 207, 208, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, 18446744073709551615, 18446744073709551615, 1120, 1122, 1107, 1109, 208, 209, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, 18446744073709551615, 18446744073709551615, 1123, 1125, 1110, 1112, 209, 210, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, 18446744073709551615, 18446744073709551615, 1126, 1133, 1113, 1120, 210, 211, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, 18446744073709551615, 18446744073709551615, 1134, 1145, 1121, 1132, 211, 212, true, "2023.[5][8]", "2023.[5][8]"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, 18446744073709551615, 18446744073709551615, 1153, 1155, 1140, 1142, 213, 214, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 101756270285429158, 6309445736017161690, 18446744073709551615, 18446744073709551615, 1158, 1192, 1145, 1179, 215, 218, true, "unitary semi-presidential republic", "unitary semi-presidential republic"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, 18446744073709551615, 18446744073709551615, 1166, 1183, 1153, 1170, 216, 217, true, "semi-presidential", "semi-presidential"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, 18446744073709551615, 18446744073709551615, 1193, 1197, 1180, 1184, 218, 219, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, 18446744073709551615, 18446744073709551615, 1202, 1209, 1189, 1196, 220, 221, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, 18446744073709551615, 18446744073709551615, 1210, 1212, 1197, 1199, 221, 222, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, 18446744073709551615, 18446744073709551615, 1213, 1218, 1200, 1205, 222, 223, true, "Paris", "Paris"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7060736712546470087, 14254659311922306724, 18446744073709551615, 18446744073709551615, 1224, 1246, 1211, 1233, 225, 228, true, "countrys largest city", "country's largest city"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, 18446744073709551615, 18446744073709551615, 1224, 1233, 1211, 1220, 225, 226, true, "countrys", "country's"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, 18446744073709551615, 18446744073709551615, 1251, 1286, 1238, 1273, 229, 234, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, 18446744073709551615, 18446744073709551615, 1269, 1286, 1256, 1273, 232, 234, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, 18446744073709551615, 18446744073709551615, 1288, 1311, 1275, 1298, 235, 239, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, 18446744073709551615, 18446744073709551615, 1312, 1319, 1299, 1306, 239, 240, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, 18446744073709551615, 18446744073709551615, 1320, 1383, 1307, 1370, 240, 253, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, 18446744073709551615, 18446744073709551615, 1320, 1329, 1307, 1316, 240, 241, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, 18446744073709551615, 18446744073709551615, 1331, 1335, 1318, 1322, 242, 243, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, 18446744073709551615, 18446744073709551615, 1337, 1345, 1324, 1332, 244, 245, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, 18446744073709551615, 18446744073709551615, 1347, 1352, 1334, 1339, 246, 247, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, 18446744073709551615, 18446744073709551615, 1354, 1362, 1341, 1349, 248, 249, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, 18446744073709551615, 18446744073709551615, 1364, 1374, 1351, 1361, 250, 251, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, 18446744073709551615, 18446744073709551615, 1379, 1383, 1366, 1370, 252, 253, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.93], ["semantic", 9818235231875948258, "TEXT", "#", "text", 0.96]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} +{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "hash": 4522339299074192207, "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, 18446744073709551615, 18446744073709551615, 0, 188, 0, 188, 0, 28, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, 18446744073709551615, 18446744073709551615, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, 18446744073709551615, 18446744073709551615, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, 18446744073709551615, 18446744073709551615, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004679635976, 18446744073709551615, 18446744073709551615, 24, 41, 24, 41, 5, 7, true, "interband pairing", "interband pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, 18446744073709551615, 18446744073709551615, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["term", "enum-term-mark-1", 4522339299074192207, "TEXT", "#", 1.0, 18178792033664231045, 5215905145529509301, 18446744073709551615, 18446744073709551615, 45, 87, 45, 87, 8, 13, true, "two-band s-wave and d-wave superconductors", "two-band s-wave and d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, 18446744073709551615, 18446744073709551615, 45, 53, 45, 53, 8, 9, true, "two-band", "two-band"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, 18446744073709551615, 18446744073709551615, 54, 60, 54, 60, 9, 10, true, "s-wave", "s-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15865120430118694837, 607662791561950043, 18446744073709551615, 18446744073709551615, 65, 87, 65, 87, 11, 13, true, "d-wave superconductors", "d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, 18446744073709551615, 18446744073709551615, 65, 71, 65, 71, 11, 12, true, "d-wave", "d-wave"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, 18446744073709551615, 18446744073709551615, 88, 92, 88, 92, 13, 14, true, "with", "with"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7066208506210013514, 1315102098090612032, 18446744073709551615, 18446744073709551615, 93, 105, 93, 105, 14, 16, true, "D4h symmetry", "D4h symmetry"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, 18446744073709551615, 18446744073709551615, 93, 96, 93, 96, 14, 15, true, "D4h", "D4h"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, 18446744073709551615, 18446744073709551615, 106, 113, 106, 113, 16, 18, true, "in both", "in both"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5172475826427571765, 16752879714615995236, 18446744073709551615, 18446744073709551615, 114, 137, 114, 137, 18, 20, true, "time-reversal invariant", "time-reversal invariant"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, 18446744073709551615, 18446744073709551615, 114, 127, 114, 127, 18, 19, true, "time-reversal", "time-reversal"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, 18446744073709551615, 18446744073709551615, 146, 148, 146, 148, 22, 23, true, "as", "as"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 10193294999568911218, 6331719907444433820, 18446744073709551615, 18446744073709551615, 149, 171, 149, 171, 23, 25, true, "time-reversal symmetry", "time-reversal symmetry"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, 18446744073709551615, 18446744073709551615, 149, 162, 149, 162, 23, 24, true, "time-reversal", "time-reversal"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, 18446744073709551615, 18446744073709551615, 172, 180, 172, 180, 25, 26, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, 18446744073709551615, 18446744073709551615, 181, 187, 181, 187, 26, 27, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, 18446744073709551615, 18446744073709551615, 189, 384, 189, 384, 28, 58, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, 18446744073709551615, 18446744073709551615, 193, 201, 193, 201, 29, 30, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 30, 31, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, 18446744073709551615, 18446744073709551615, 205, 214, 205, 214, 31, 32, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, 18446744073709551615, 18446744073709551615, 215, 244, 215, 244, 32, 35, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, 18446744073709551615, 18446744073709551615, 249, 264, 249, 264, 36, 38, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, 18446744073709551615, 18446744073709551615, 265, 271, 265, 271, 38, 40, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, 18446744073709551615, 18446744073709551615, 272, 286, 272, 286, 40, 41, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, 18446744073709551615, 18446744073709551615, 288, 293, 288, 293, 42, 43, true, "nodes", "nodes"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 3766089650286616147, 5895288868427388531, 18446744073709551615, 18446744073709551615, 294, 309, 294, 309, 43, 45, true, "can (dis)appear", "can (dis)appear"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, 18446744073709551615, 18446744073709551615, 298, 309, 298, 309, 44, 45, true, "(dis)appear", "(dis)appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, 18446744073709551615, 18446744073709551615, 311, 316, 311, 316, 46, 47, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, 18446744073709551615, 18446744073709551615, 322, 327, 322, 327, 49, 50, true, "leave", "leave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106670696871780136, 17807492235586576248, 18446744073709551615, 18446744073709551615, 328, 351, 328, 351, 50, 52, true, "high-symmetry locations", "high-symmetry locations"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, 18446744073709551615, 18446744073709551615, 328, 341, 328, 341, 50, 51, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, 18446744073709551615, 18446744073709551615, 357, 374, 357, 374, 53, 55, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, 18446744073709551615, 18446744073709551615, 375, 383, 375, 383, 55, 57, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, 18446744073709551615, 18446744073709551615, 385, 594, 385, 594, 58, 93, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, 18446744073709551615, 18446744073709551615, 398, 404, 398, 404, 60, 62, true, "in the", "in the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15559186615879240368, 12910915472651789195, 18446744073709551615, 18446744073709551615, 405, 416, 405, 416, 62, 64, true, "d-wave case", "d-wave case"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, 18446744073709551615, 18446744073709551615, 405, 411, 405, 411, 62, 63, true, "d-wave", "d-wave"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, 18446744073709551615, 18446744073709551615, 421, 425, 421, 425, 66, 67, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, 18446744073709551615, 18446744073709551615, 426, 430, 426, 430, 67, 68, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, 18446744073709551615, 18446744073709551615, 440, 454, 440, 454, 70, 72, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, 18446744073709551615, 18446744073709551615, 455, 475, 455, 475, 72, 74, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, 18446744073709551615, 18446744073709551615, 481, 490, 481, 490, 75, 76, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, 18446744073709551615, 18446744073709551615, 491, 498, 491, 498, 76, 77, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, 18446744073709551615, 18446744073709551615, 499, 508, 499, 508, 77, 78, true, "increases", "increases"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 18352755674675419019, 8051640294707098683, 18446744073709551615, 18446744073709551615, 510, 547, 510, 547, 79, 84, true, "flat zero-energy Andreev bound states", "flat zero-energy Andreev bound states"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, 18446744073709551615, 18446744073709551615, 515, 526, 515, 526, 80, 81, true, "zero-energy", "zero-energy"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, 18446744073709551615, 18446744073709551615, 548, 555, 548, 555, 84, 86, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, 18446744073709551615, 18446744073709551615, 560, 570, 560, 570, 87, 88, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, 18446744073709551615, 18446744073709551615, 571, 573, 571, 573, 88, 89, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, 18446744073709551615, 18446744073709551615, 574, 593, 574, 593, 89, 92, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.88], ["semantic", 4522339299074192207, "TEXT", "#", "text", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} diff --git a/tests/data/texts/test_02A_text_01.jsonl b/tests/data/texts/test_02A_text_01.jsonl index deb0b90c..8b859d5c 100644 --- a/tests/data/texts/test_02A_text_01.jsonl +++ b/tests/data/texts/test_02A_text_01.jsonl @@ -1 +1 @@ -{"applied-models": ["cite", "expression", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "", "hash": 253473544312511038, "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, 18446744073709551615, 18446744073709551615, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, 18446744073709551615, 18446744073709551615, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, 18446744073709551615, 18446744073709551615, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", "en", 0.58]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"applied-models": ["cite", "expression", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "#", "hash": 253473544312511038, "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, 18446744073709551615, 18446744073709551615, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, 18446744073709551615, 18446744073709551615, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, 18446744073709551615, 18446744073709551615, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} diff --git a/tests/data/texts/test_02B_text_01.jsonl b/tests/data/texts/test_02B_text_01.jsonl index adbb6f57..7f30acab 100644 --- a/tests/data/texts/test_02B_text_01.jsonl +++ b/tests/data/texts/test_02B_text_01.jsonl @@ -1 +1 @@ -{"dloc": "", "hash": 253473544312511038, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", "en", 0.58]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"dloc": "#", "hash": 253473544312511038, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} diff --git a/tests/test_nlp.py b/tests/test_nlp.py index 5c522544..68abf697 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -10,7 +10,7 @@ from deepsearch_glm.nlp_train_semantic import train_semantic -GENERATE=False +GENERATE=True def round_floats(o): if isinstance(o, float): return round(o, 2) @@ -67,6 +67,9 @@ def test_02A_run_nlp_models_on_text(): for label in ["relations"]: assert label not in sres + print(tres["properties"]) + print(sres["properties"]) + assert tres==sres def test_02B_run_nlp_models_on_text(): @@ -313,7 +316,7 @@ def test_04C_references(): def test_05_to_legacy(): - model = init_nlp_model("reference") + model = init_nlp_model("reference;term") source = "./tests/data/docs/doc_01.old.json" @@ -330,10 +333,12 @@ def test_05_to_legacy(): with open(target_nlp, "w") as fw: fw.write(json.dumps(doc_j, indent=2)) + """ doc_i = to_legacy_document_format(doc_j, doc_i) with open(target_leg, "w") as fw: fw.write(json.dumps(doc_i, indent=2)) + """ else: with open(target_nlp, "r") as fr: doc_nlp = json.load(fr) @@ -342,7 +347,6 @@ def test_05_to_legacy(): with open(target_leg, "r") as fr: doc_leg = json.load(fr) doc_leg = round_floats(doc_leg) - doc_j = model.apply_on_doc(doc_i) doc_j = round_floats(doc_j) From 050de9a8d6f785ddd523c6256a6983e203e8e5e5 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Fri, 17 Nov 2023 15:46:35 +0100 Subject: [PATCH 10/22] updated the test_glm Signed-off-by: Peter Staar --- tests/data/glm/test_01A/glm_ref/topology.json | 354 +++++++++--------- tests/test_nlp.py | 2 +- 2 files changed, 178 insertions(+), 178 deletions(-) diff --git a/tests/data/glm/test_01A/glm_ref/topology.json b/tests/data/glm/test_01A/glm_ref/topology.json index c05dd0db..9d16cdf6 100644 --- a/tests/data/glm/test_01A/glm_ref/topology.json +++ b/tests/data/glm/test_01A/glm_ref/topology.json @@ -29,12 +29,12 @@ [ -1, "prev", - 8990 + 9209 ], [ 1, "next", - 9060 + 9283 ], [ 2, @@ -64,12 +64,12 @@ [ 32, "tax-dn", - 1119 + 1173 ], [ 33, "tax-up", - 1690 + 1768 ], [ 64, @@ -84,52 +84,52 @@ [ 66, "to-singular", - 304 + 310 ], [ 67, "to-plural", - 304 + 310 ], [ 96, "to-token", - 1116 + 1158 ], [ 97, "from-token", - 1116 + 1158 ], [ 98, "to-pos", - 1994 + 1891 ], [ 99, "from-pos", - 1994 + 1891 ], [ 100, "to-label", - 1432 + 1483 ], [ 101, "from-label", - 1432 + 1483 ], [ 102, "to-root", - 1072 + 1129 ], [ 103, "from-root", - 1057 + 1107 ], [ 128, @@ -144,32 +144,32 @@ [ 130, "to-text", - 1323 + 1405 ], [ 131, "from-text", - 1323 + 1405 ], [ 132, "to-table", - 5 + 0 ], [ 133, "from-table", - 5 + 0 ], [ 134, "to-doc", - 877 + 917 ], [ 135, "from-doc", - 877 + 917 ], [ 256, @@ -860,67 +860,67 @@ -1, "prev", 1, - 6625 + 6962 ], [ -1, "prev", 2, - 1382 + 1291 ], [ -1, "prev", 4, - 491 + 448 ], [ -1, "prev", 8, - 233 + 246 ], [ -1, "prev", 16, - 130 + 127 ], [ -1, "prev", 32, - 71 + 77 ], [ -1, "prev", 64, - 31 + 29 ], [ -1, "prev", 128, - 19 + 20 ], [ -1, "prev", 256, - 5 + 6 ], [ -1, "prev", 512, - 2 + 3 ], [ -1, "prev", 1024, - 1 + 0 ], [ -1, @@ -992,67 +992,67 @@ 1, "next", 1, - 6673 + 7010 ], [ 1, "next", 2, - 1389 + 1299 ], [ 1, "next", 4, - 496 + 455 ], [ 1, "next", 8, - 236 + 250 ], [ 1, "next", 16, - 135 + 132 ], [ 1, "next", 32, - 72 + 79 ], [ 1, "next", 64, - 32 + 29 ], [ 1, "next", 128, - 19 + 20 ], [ 1, "next", 256, - 5 + 6 ], [ 1, "next", 512, - 2 + 3 ], [ 1, "next", 1024, - 1 + 0 ], [ 1, @@ -1778,31 +1778,31 @@ 32, "tax-dn", 0, - 164 + 170 ], [ 32, "tax-dn", 1, - 299 + 312 ], [ 32, "tax-dn", 2, - 547 + 575 ], [ 32, "tax-dn", 4, - 77 + 75 ], [ 32, "tax-dn", 8, - 22 + 31 ], [ 32, @@ -1910,49 +1910,49 @@ 33, "tax-up", 0, - 164 + 170 ], [ 33, "tax-up", 1, - 637 + 659 ], [ 33, "tax-up", 2, - 652 + 684 ], [ 33, "tax-up", 4, - 128 + 135 ], [ 33, "tax-up", 8, - 65 + 75 ], [ 33, "tax-up", 16, - 38 + 35 ], [ 33, "tax-up", 32, - 4 + 7 ], [ 33, "tax-up", 64, - 2 + 3 ], [ 33, @@ -2312,7 +2312,7 @@ 66, "to-singular", 1, - 304 + 310 ], [ 66, @@ -2444,7 +2444,7 @@ 67, "to-plural", 1, - 304 + 310 ], [ 67, @@ -2576,31 +2576,31 @@ 96, "to-token", 1, - 855 + 881 ], [ 96, "to-token", 2, - 108 + 114 ], [ 96, "to-token", 4, - 91 + 100 ], [ 96, "to-token", 8, - 40 + 39 ], [ 96, "to-token", 16, - 16 + 18 ], [ 96, @@ -2708,31 +2708,31 @@ 97, "from-token", 1, - 855 + 881 ], [ 97, "from-token", 2, - 108 + 114 ], [ 97, "from-token", 4, - 91 + 100 ], [ 97, "from-token", 8, - 40 + 39 ], [ 97, "from-token", 16, - 16 + 18 ], [ 97, @@ -2840,37 +2840,37 @@ 98, "to-pos", 1, - 1196 + 1135 ], [ 98, "to-pos", 2, - 332 + 291 ], [ 98, "to-pos", 4, - 224 + 227 ], [ 98, "to-pos", 8, - 114 + 112 ], [ 98, "to-pos", 16, - 69 + 66 ], [ 98, "to-pos", 32, - 32 + 33 ], [ 98, @@ -2972,37 +2972,37 @@ 99, "from-pos", 1, - 1196 + 1135 ], [ 99, "from-pos", 2, - 332 + 291 ], [ 99, "from-pos", 4, - 224 + 227 ], [ 99, "from-pos", 8, - 114 + 112 ], [ 99, "from-pos", 16, - 69 + 66 ], [ 99, "from-pos", 32, - 32 + 33 ], [ 99, @@ -3104,43 +3104,43 @@ 100, "to-label", 1, - 890 + 913 ], [ 100, "to-label", 2, - 237 + 239 ], [ 100, "to-label", 4, - 139 + 159 ], [ 100, "to-label", 8, - 92 + 97 ], [ 100, "to-label", 16, - 58 + 55 ], [ 100, "to-label", 32, - 8 + 11 ], [ 100, "to-label", 64, - 7 + 8 ], [ 100, @@ -3236,43 +3236,43 @@ 101, "from-label", 1, - 890 + 913 ], [ 101, "from-label", 2, - 237 + 239 ], [ 101, "from-label", 4, - 139 + 159 ], [ 101, "from-label", 8, - 92 + 97 ], [ 101, "from-label", 16, - 58 + 55 ], [ 101, "from-label", 32, - 8 + 11 ], [ 101, "from-label", 64, - 7 + 8 ], [ 101, @@ -3362,25 +3362,25 @@ 102, "to-root", 0, - 167 + 173 ], [ 102, "to-root", 1, - 771 + 814 ], [ 102, "to-root", 2, - 105 + 103 ], [ 102, "to-root", 4, - 17 + 27 ], [ 102, @@ -3494,31 +3494,31 @@ 103, "from-root", 0, - 105 + 107 ], [ 103, "from-root", 1, - 510 + 534 ], [ 103, "from-root", 2, - 339 + 345 ], [ 103, "from-root", 4, - 69 + 82 ], [ 103, "from-root", 8, - 24 + 29 ], [ 103, @@ -3896,19 +3896,19 @@ 130, "to-text", 1, - 1220 + 1289 ], [ 130, "to-text", 2, - 66 + 77 ], [ 130, "to-text", 4, - 33 + 35 ], [ 130, @@ -4028,19 +4028,19 @@ 131, "from-text", 1, - 1220 + 1289 ], [ 131, "from-text", 2, - 66 + 77 ], [ 131, "from-text", 4, - 33 + 35 ], [ 131, @@ -4160,13 +4160,13 @@ 132, "to-table", 1, - 1 + 0 ], [ 132, "to-table", 2, - 3 + 0 ], [ 132, @@ -4178,7 +4178,7 @@ 132, "to-table", 8, - 1 + 0 ], [ 132, @@ -4292,13 +4292,13 @@ 133, "from-table", 1, - 1 + 0 ], [ 133, "from-table", 2, - 3 + 0 ], [ 133, @@ -4310,7 +4310,7 @@ 133, "from-table", 8, - 1 + 0 ], [ 133, @@ -4424,31 +4424,31 @@ 134, "to-doc", 1, - 681 + 706 ], [ 134, "to-doc", 2, - 92 + 96 ], [ 134, "to-doc", 4, - 58 + 68 ], [ 134, "to-doc", 8, - 31 + 30 ], [ 134, "to-doc", 16, - 11 + 13 ], [ 134, @@ -4556,31 +4556,31 @@ 135, "from-doc", 1, - 681 + 706 ], [ 135, "from-doc", 2, - 92 + 96 ], [ 135, "from-doc", 4, - 58 + 68 ], [ 135, "from-doc", 8, - 31 + 30 ], [ 135, "from-doc", 16, - 11 + 13 ], [ 135, @@ -5112,7 +5112,7 @@ [ 1, "syntax", - 36 + 37 ], [ 2, @@ -5127,17 +5127,17 @@ [ 9, "conn", - 179 + 185 ], [ 10, "term", - 985 + 1028 ], [ 11, "verb", - 447 + 468 ], [ 16, @@ -5389,7 +5389,7 @@ 1, "syntax", 1, - 0 + 1 ], [ 1, @@ -5401,7 +5401,7 @@ 1, "syntax", 4, - 4 + 2 ], [ 1, @@ -5413,37 +5413,37 @@ 1, "syntax", 16, - 5 + 7 ], [ 1, "syntax", 32, - 1 + 0 ], [ 1, "syntax", 64, - 4 + 6 ], [ 1, "syntax", 128, - 7 + 4 ], [ 1, "syntax", 256, - 10 + 9 ], [ 1, "syntax", 512, - 2 + 5 ], [ 1, @@ -5779,7 +5779,7 @@ 9, "conn", 0, - 0 + 6 ], [ 9, @@ -5911,7 +5911,7 @@ 10, "term", 0, - 110 + 153 ], [ 10, @@ -6043,7 +6043,7 @@ 11, "verb", 0, - 0 + 21 ], [ 11, @@ -7253,7 +7253,7 @@ 1, "syntax", 2, - 0 + 1 ], [ 1, @@ -7277,7 +7277,7 @@ 1, "syntax", 32, - 4 + 3 ], [ 1, @@ -7289,7 +7289,7 @@ 1, "syntax", 128, - 10 + 11 ], [ 1, @@ -7649,19 +7649,19 @@ 9, "conn", 2, - 106 + 107 ], [ 9, "conn", 4, - 31 + 34 ], [ 9, "conn", 8, - 22 + 23 ], [ 9, @@ -7673,7 +7673,7 @@ 9, "conn", 32, - 7 + 8 ], [ 9, @@ -7769,7 +7769,7 @@ 10, "term", 0, - 108 + 111 ], [ 10, @@ -7781,25 +7781,25 @@ 10, "term", 2, - 705 + 734 ], [ 10, "term", 4, - 120 + 129 ], [ 10, "term", 8, - 39 + 40 ], [ 10, "term", 16, - 11 + 12 ], [ 10, @@ -7913,13 +7913,13 @@ 11, "verb", 2, - 358 + 376 ], [ 11, "verb", 4, - 67 + 70 ], [ 11, @@ -7931,13 +7931,13 @@ 11, "verb", 16, - 4 + 3 ], [ 11, "verb", 32, - 1 + 2 ], [ 11, @@ -9123,19 +9123,19 @@ 1, "syntax", 8, - 3 + 1 ], [ 1, "syntax", 16, - 5 + 7 ], [ 1, "syntax", 32, - 0 + 1 ], [ 1, @@ -9147,13 +9147,13 @@ 1, "syntax", 128, - 3 + 1 ], [ 1, "syntax", 256, - 7 + 9 ], [ 1, @@ -9165,13 +9165,13 @@ 1, "syntax", 1024, - 5 + 4 ], [ 1, "syntax", 2048, - 0 + 1 ], [ 1, @@ -9501,37 +9501,37 @@ 9, "conn", 1, - 105 + 106 ], [ 9, "conn", 2, - 17 + 21 ], [ 9, "conn", 4, - 21 + 22 ], [ 9, "conn", 8, - 17 + 14 ], [ 9, "conn", 16, - 9 + 10 ], [ 9, "conn", 32, - 5 + 7 ], [ 9, @@ -9627,37 +9627,37 @@ 10, "term", 0, - 108 + 111 ], [ 10, "term", 1, - 681 + 706 ], [ 10, "term", 2, - 92 + 96 ], [ 10, "term", 4, - 58 + 68 ], [ 10, "term", 8, - 31 + 30 ], [ 10, "term", 16, - 11 + 13 ], [ 10, @@ -9765,19 +9765,19 @@ 11, "verb", 1, - 352 + 369 ], [ 11, "verb", 2, - 52 + 51 ], [ 11, "verb", 4, - 25 + 30 ], [ 11, diff --git a/tests/test_nlp.py b/tests/test_nlp.py index 68abf697..96b45768 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -10,7 +10,7 @@ from deepsearch_glm.nlp_train_semantic import train_semantic -GENERATE=True +GENERATE=False def round_floats(o): if isinstance(o, float): return round(o, 2) From 1022752ee43b9dc3cde6324a4a0e958ccc47b5a8 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Fri, 17 Nov 2023 15:47:17 +0100 Subject: [PATCH 11/22] bumped version to 0.7.0 Signed-off-by: Peter Staar --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 62d3f245..92a84cef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "deepsearch-glm" -version = "0.6.4" +version = "0.7.0" description = "Graph Language Models" authors = ["Peter Staar "] license = "MIT" From 44cb878703d0ebb7ac85e9ed5d9073203efb453d Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Fri, 17 Nov 2023 16:11:38 +0100 Subject: [PATCH 12/22] removed unnecessary functions Signed-off-by: Peter Staar --- src/andromeda/tooling/structs/subjects/base.h | 2 -- src/andromeda/tooling/structs/subjects/document.h | 15 ++++++++++----- src/andromeda/tooling/structs/subjects/figure.h | 7 +++++-- src/andromeda/tooling/structs/subjects/table.h | 7 +++++-- src/andromeda/tooling/structs/subjects/text.h | 3 ++- tests/test_nlp.py | 3 ++- 6 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/andromeda/tooling/structs/subjects/base.h b/src/andromeda/tooling/structs/subjects/base.h index 551e2946..57f0d92c 100644 --- a/src/andromeda/tooling/structs/subjects/base.h +++ b/src/andromeda/tooling/structs/subjects/base.h @@ -117,8 +117,6 @@ namespace andromeda hash_type hash; // hash of the item hash_type dhash; // hash of the document of the item - - protected: std::string dloc; // location of item in the document # std::string sref; diff --git a/src/andromeda/tooling/structs/subjects/document.h b/src/andromeda/tooling/structs/subjects/document.h index f46a9c1a..94d39d68 100644 --- a/src/andromeda/tooling/structs/subjects/document.h +++ b/src/andromeda/tooling/structs/subjects/document.h @@ -522,7 +522,8 @@ namespace andromeda { instances.emplace_back(subj->get_hash(), subj->get_name(), - subj->get_path(), + //subj->get_path(), + subj->get_self_ref(), ent); } } @@ -534,7 +535,8 @@ namespace andromeda { instances.emplace_back(subj->get_hash(), subj->get_name(), - subj->get_path(), + //subj->get_path(), + subj->get_self_ref(), ent); } @@ -544,7 +546,8 @@ namespace andromeda { instances.emplace_back(capt->get_hash(), capt->get_name(), - capt->get_path(), + //capt->get_path(), + capt->get_self_ref(), ent); } } @@ -556,7 +559,8 @@ namespace andromeda { instances.emplace_back(subj->get_hash(), subj->get_name(), - subj->get_path(), + //subj->get_path(), + subj->get_self_ref(), ent); } @@ -566,7 +570,8 @@ namespace andromeda { instances.emplace_back(capt->get_hash(), capt->get_name(), - capt->get_path(), + //capt->get_path(), + capt->get_self_ref(), ent); } } diff --git a/src/andromeda/tooling/structs/subjects/figure.h b/src/andromeda/tooling/structs/subjects/figure.h index 07ae2321..7f4cf143 100644 --- a/src/andromeda/tooling/structs/subjects/figure.h +++ b/src/andromeda/tooling/structs/subjects/figure.h @@ -26,7 +26,8 @@ namespace andromeda virtual bool from_json(const nlohmann::json& item, const std::vector >& doc_provs); - std::string get_path() const { return (provs.size()>0? (provs.at(0)->get_item_ref()):"#"); } + //std::string get_path() const { return (provs.size()>0? (provs.at(0)->get_item_ref()):"#"); } + bool is_valid() { return (base_subject::valid); } bool set_data(const nlohmann::json& data); @@ -38,10 +39,12 @@ namespace andromeda void set_hash(); - public: + private: sval_type conf; std::string created_by; + + public: std::vector > provs; diff --git a/src/andromeda/tooling/structs/subjects/table.h b/src/andromeda/tooling/structs/subjects/table.h index 841fbd79..478f384c 100644 --- a/src/andromeda/tooling/structs/subjects/table.h +++ b/src/andromeda/tooling/structs/subjects/table.h @@ -24,7 +24,8 @@ namespace andromeda void clear(); - std::string get_path() const { return (provs.size()>0? (provs.at(0)->get_item_ref()):"#"); } + //std::string get_path() const { return (provs.size()>0? (provs.at(0)->get_item_ref()):"#"); } + bool is_valid() { return (base_subject::valid); } virtual nlohmann::json to_json(const std::set& filters); @@ -64,10 +65,12 @@ namespace andromeda bool is_legacy(const nlohmann::json& grid); - public: + private: sval_type conf; std::string created_by; + + public: std::vector > provs; diff --git a/src/andromeda/tooling/structs/subjects/text.h b/src/andromeda/tooling/structs/subjects/text.h index 2e707a61..f5013376 100644 --- a/src/andromeda/tooling/structs/subjects/text.h +++ b/src/andromeda/tooling/structs/subjects/text.h @@ -21,7 +21,8 @@ namespace andromeda void finalise(); void clear(); - std::string get_path() const { return (provs.size()>0? (provs.at(0)->get_item_ref()):"#"); } + //std::string get_path() const { return (provs.size()>0? (provs.at(0)->get_item_ref()):"#"); } + bool is_valid() { return (base_subject::valid and text_element::text_valid); } virtual nlohmann::json to_json(const std::set& filters); diff --git a/tests/test_nlp.py b/tests/test_nlp.py index 96b45768..b0224a17 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -329,7 +329,8 @@ def test_05_to_legacy(): if GENERATE: doc_j = model.apply_on_doc(doc_i) - + doc_j = round_floats(doc_j) + with open(target_nlp, "w") as fw: fw.write(json.dumps(doc_j, indent=2)) From dc26289ebc95dd29bb5aaf93c37b7acd24d92b5e Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Fri, 17 Nov 2023 17:17:29 +0100 Subject: [PATCH 13/22] updated the instances Signed-off-by: Peter Staar --- src/andromeda/nlp/ent/cite.h | 2 +- src/andromeda/nlp/ent/expression.h | 14 ++++---- src/andromeda/nlp/ent/geoloc.h | 2 +- src/andromeda/nlp/ent/link.h | 2 +- src/andromeda/nlp/ent/name.h | 2 +- src/andromeda/nlp/ent/numval.h | 4 +-- src/andromeda/nlp/ent/parenthesis.h | 14 ++++---- src/andromeda/nlp/ent/pos_pattern.h | 10 +++--- src/andromeda/nlp/ent/quote.h | 2 +- src/andromeda/nlp/ent/reference.h | 2 +- src/andromeda/nlp/ent/sentence.h | 2 +- src/andromeda/nlp/rel/abbreviation.h | 2 +- src/andromeda/tooling/models/base_rgx_model.h | 2 +- .../tooling/structs/items/ent/instance.h | 35 +++++++++++-------- .../tooling/structs/subjects/document.h | 15 ++++++++ .../tooling/structs/subjects/table.h | 7 ++-- src/andromeda/tooling/structs/subjects/text.h | 6 ++-- 17 files changed, 73 insertions(+), 50 deletions(-) diff --git a/src/andromeda/nlp/ent/cite.h b/src/andromeda/nlp/ent/cite.h index c9f91903..a79f249c 100644 --- a/src/andromeda/nlp/ent/cite.h +++ b/src/andromeda/nlp/ent/cite.h @@ -120,7 +120,7 @@ namespace andromeda std::string orig = subj.from_char_range(char_range); std::string name = subj.from_ctok_range(ctok_range); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), CITE, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/nlp/ent/expression.h b/src/andromeda/nlp/ent/expression.h index 0f2a3c21..a2b0b79e 100644 --- a/src/andromeda/nlp/ent/expression.h +++ b/src/andromeda/nlp/ent/expression.h @@ -413,7 +413,7 @@ namespace andromeda } //LOG_S(INFO) << __FUNCTION__ << " " << l << ": " << orig; - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), EXPRESSION, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); @@ -456,7 +456,7 @@ namespace andromeda orig = subj.from_ctok_range(ctok_range); name = utils::replace(orig, "'", ""); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), EXPRESSION, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); @@ -497,7 +497,7 @@ namespace andromeda orig = subj.from_ctok_range(ctok_range); name = utils::replace(orig, ".", ""); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), EXPRESSION, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); @@ -569,7 +569,7 @@ namespace andromeda if(keep) { - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), EXPRESSION, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); @@ -631,7 +631,7 @@ namespace andromeda if(keep) { - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), EXPRESSION, expr.get_subtype(), name, orig, subj(i,j).get_coor(), @@ -694,7 +694,7 @@ namespace andromeda orig = subj.from_ctok_range(ctok_range); name = normalise(orig); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), EXPRESSION, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); @@ -830,7 +830,7 @@ namespace andromeda { //std::size_t max_id = subj.get_max_ent_hash(); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), EXPRESSION, "wtoken-concatenation", name, orig, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/nlp/ent/geoloc.h b/src/andromeda/nlp/ent/geoloc.h index defb36da..024669a4 100644 --- a/src/andromeda/nlp/ent/geoloc.h +++ b/src/andromeda/nlp/ent/geoloc.h @@ -189,7 +189,7 @@ namespace andromeda std::string orig = subj.from_char_range(char_range); std::string name = subj.from_ctok_range(ctok_range); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), GEOLOC, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/nlp/ent/link.h b/src/andromeda/nlp/ent/link.h index a5c9d904..f4cff781 100644 --- a/src/andromeda/nlp/ent/link.h +++ b/src/andromeda/nlp/ent/link.h @@ -138,7 +138,7 @@ namespace andromeda // remove spaces name = utils::replace(name, " ", ""); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), LINK, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/nlp/ent/name.h b/src/andromeda/nlp/ent/name.h index 08f7a7b1..4d1c3e98 100644 --- a/src/andromeda/nlp/ent/name.h +++ b/src/andromeda/nlp/ent/name.h @@ -189,7 +189,7 @@ namespace andromeda if(keep) { - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), NAME, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/nlp/ent/numval.h b/src/andromeda/nlp/ent/numval.h index 270a0790..d258404c 100644 --- a/src/andromeda/nlp/ent/numval.h +++ b/src/andromeda/nlp/ent/numval.h @@ -173,7 +173,7 @@ namespace andromeda std::string orig = subj.from_char_range(char_range); std::string name = subj.from_ctok_range(ctok_range); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), TEXT, subj.get_self_ref(), NUMVAL, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); @@ -234,7 +234,7 @@ namespace andromeda std::string orig = subj(i,j).from_char_range(char_range); std::string name = subj(i,j).from_ctok_range(ctok_range); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), NUMVAL, expr.get_subtype(), name, orig, subj(i,j).get_coor(), diff --git a/src/andromeda/nlp/ent/parenthesis.h b/src/andromeda/nlp/ent/parenthesis.h index 7b777ca4..283fcb70 100644 --- a/src/andromeda/nlp/ent/parenthesis.h +++ b/src/andromeda/nlp/ent/parenthesis.h @@ -119,7 +119,7 @@ namespace andromeda std::string orig = subj.from_char_range(char_range); std::string name = subj.from_ctok_range(ctok_range); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), PARENTHESIS, expr.get_subtype(), name, orig, char_range, @@ -172,13 +172,13 @@ namespace andromeda auto row_span = subj(i,j).get_row_span(); auto col_span = subj(i,j).get_col_span(); - subj.instances.emplace_back(subj.get_hash(), - PARENTHESIS, expr.get_subtype(), - name, orig, + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), + PARENTHESIS, expr.get_subtype(), + name, orig, coor, row_span, col_span, - char_range, - ctok_range, - wtok_range); + char_range, + ctok_range, + wtok_range); utils::mask(text, char_range); } diff --git a/src/andromeda/nlp/ent/pos_pattern.h b/src/andromeda/nlp/ent/pos_pattern.h index 44b47164..c96e376a 100644 --- a/src/andromeda/nlp/ent/pos_pattern.h +++ b/src/andromeda/nlp/ent/pos_pattern.h @@ -199,7 +199,7 @@ namespace andromeda not contains(char_range, ranges_02) and (char_range[1]-char_range[0])>1) { - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), name, subtype, text, orig, char_range, ctok_range, wtok_range); @@ -255,11 +255,11 @@ namespace andromeda not contains(char_range, ranges_02) and (char_range[1]-char_range[0])>1) { - subj.instances.emplace_back(subj.get_hash(), - name, subtype, - text, orig, + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), + name, subtype, + text, orig, coor, row_span, col_span, - char_range, ctok_range, wtok_range); + char_range, ctok_range, wtok_range); } } } diff --git a/src/andromeda/nlp/ent/quote.h b/src/andromeda/nlp/ent/quote.h index 9a54bfcb..9e9c6567 100644 --- a/src/andromeda/nlp/ent/quote.h +++ b/src/andromeda/nlp/ent/quote.h @@ -112,7 +112,7 @@ namespace andromeda std::string orig = subj.from_char_range(char_range); std::string name = subj.from_ctok_range(ctok_range); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), QUOTE, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/nlp/ent/reference.h b/src/andromeda/nlp/ent/reference.h index 03950ab4..4904a2bd 100644 --- a/src/andromeda/nlp/ent/reference.h +++ b/src/andromeda/nlp/ent/reference.h @@ -261,7 +261,7 @@ namespace andromeda std::string orig = subj.from_char_range(char_range); std::string name = subj.from_ctok_range(ctok_range); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), REFERENCE, label, name, orig, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/nlp/ent/sentence.h b/src/andromeda/nlp/ent/sentence.h index be861cd3..6c5f17aa 100644 --- a/src/andromeda/nlp/ent/sentence.h +++ b/src/andromeda/nlp/ent/sentence.h @@ -108,7 +108,7 @@ namespace andromeda std::string sent = orig.substr(char_range[0], char_range[1]-char_range[0]); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), SENTENCE, "", sent, sent, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/nlp/rel/abbreviation.h b/src/andromeda/nlp/rel/abbreviation.h index 378243f9..bfd92a40 100644 --- a/src/andromeda/nlp/rel/abbreviation.h +++ b/src/andromeda/nlp/rel/abbreviation.h @@ -96,7 +96,7 @@ namespace andromeda (not filter_01.match(orig)) and // no all lower-case words (not filter_02.match(orig))) // no numbers { - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), ABBREVIATION, ent_j.get_subtype(), //ent_j.get_name(), ent_j.get_orig(), name, orig, crng, ctok_rng, wtok_rng); diff --git a/src/andromeda/tooling/models/base_rgx_model.h b/src/andromeda/tooling/models/base_rgx_model.h index 9213c19e..64547704 100644 --- a/src/andromeda/tooling/models/base_rgx_model.h +++ b/src/andromeda/tooling/models/base_rgx_model.h @@ -140,7 +140,7 @@ namespace andromeda std::string orig = subj.from_char_range(char_range); std::string name = subj.from_ctok_range(ctok_range); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), TEXT, subj.get_self_ref(), this->get_name(), expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/tooling/structs/items/ent/instance.h b/src/andromeda/tooling/structs/items/ent/instance.h index 4d3ccdbf..04b31353 100644 --- a/src/andromeda/tooling/structs/items/ent/instance.h +++ b/src/andromeda/tooling/structs/items/ent/instance.h @@ -82,14 +82,14 @@ namespace andromeda base_instance(); - base_instance(hash_type subj_hash, + base_instance(hash_type subj_hash, subject_name subj_name, std::string subj_path, model_name type, range_type char_range, range_type ctok_range, range_type wtok_range); // Paragraph entity - base_instance(hash_type subj_hash, + base_instance(hash_type subj_hash, subject_name subj_name, std::string subj_path, model_name type, std::string subtype, std::string name, std::string orig, range_type char_range, @@ -97,7 +97,7 @@ namespace andromeda range_type wtok_range); // Table entity - base_instance(hash_type subj_hash, + base_instance(hash_type subj_hash, subject_name subj_name, std::string subj_path, model_name type, std::string subtype, std::string name, std::string orig, table_range_type coor, @@ -107,10 +107,12 @@ namespace andromeda range_type ctok_range, range_type wtok_range); + /* // Document entity base_instance(hash_type subj_hash, subject_name subj_name, std::string subj_path, const base_instance& other); - + */ + bool is_wtok_range_match() { return wtok_range_match; } bool verify_wtok_range_match(std::vector& wtokens); @@ -206,14 +208,14 @@ namespace andromeda base_instance::base_instance() {} - base_instance::base_instance(hash_type subj_hash, + base_instance::base_instance(hash_type subj_hash, subject_name subj_name, std::string subj_path, model_name type, range_type char_range, range_type ctok_range, range_type wtok_range): subj_hash(subj_hash), - subj_name(TEXT), - subj_path("#"), + subj_name(subj_name), + subj_path(subj_path), ehash(DEFAULT_HASH), ihash(DEFAULT_HASH), @@ -245,16 +247,15 @@ namespace andromeda wtok_range_match = (wtok_range[0]instances) { + /* instances.emplace_back(subj->get_hash(), subj->get_name(), //subj->get_path(), subj->get_self_ref(), ent); + */ + instances.push_back(ent); } } //LOG_S(INFO) << "total #-insts: " << instances.size(); @@ -533,22 +536,28 @@ namespace andromeda { for(auto& ent:subj->instances) { + /* instances.emplace_back(subj->get_hash(), subj->get_name(), //subj->get_path(), subj->get_self_ref(), ent); + */ + instances.push_back(ent); } for(auto& capt:subj->captions) { for(auto& ent:capt->instances) { + /* instances.emplace_back(capt->get_hash(), capt->get_name(), //capt->get_path(), capt->get_self_ref(), ent); + */ + instances.push_back(ent); } } } @@ -557,22 +566,28 @@ namespace andromeda { for(auto& ent:subj->instances) { + /* instances.emplace_back(subj->get_hash(), subj->get_name(), //subj->get_path(), subj->get_self_ref(), ent); + */ + instances.push_back(ent); } for(auto& capt:subj->captions) { for(auto& ent:capt->instances) { + /* instances.emplace_back(capt->get_hash(), capt->get_name(), //capt->get_path(), capt->get_self_ref(), ent); + */ + instances.push_back(ent); } } } diff --git a/src/andromeda/tooling/structs/subjects/table.h b/src/andromeda/tooling/structs/subjects/table.h index 478f384c..4b320547 100644 --- a/src/andromeda/tooling/structs/subjects/table.h +++ b/src/andromeda/tooling/structs/subjects/table.h @@ -431,8 +431,8 @@ namespace andromeda range_type min_range = {0, 0}; table_range_type table_min_range = {0, 0}; - base_instance fake(base_subject::hash, NULL_MODEL, - "fake", "fake", "fake", + base_instance fake(base_subject::hash, TABLE, get_self_ref(), + NULL_MODEL, "fake", "fake", "fake", coor, table_min_range, table_min_range, min_range, min_range, min_range); @@ -449,7 +449,8 @@ namespace andromeda std::numeric_limits::max(), std::numeric_limits::max()}; - base_instance fake(base_subject::hash, NULL_MODEL, "fake", "fake", "fake", + base_instance fake(base_subject::hash, TABLE, get_self_ref(), + NULL_MODEL, "fake", "fake", "fake", coor, table_max_range, table_max_range, max_range, max_range, max_range); diff --git a/src/andromeda/tooling/structs/subjects/text.h b/src/andromeda/tooling/structs/subjects/text.h index f5013376..c9b6f37b 100644 --- a/src/andromeda/tooling/structs/subjects/text.h +++ b/src/andromeda/tooling/structs/subjects/text.h @@ -208,7 +208,8 @@ namespace andromeda typename std::vector::iterator subject::insts_beg(std::array char_rng) { - base_instance fake(base_subject::hash, NULL_MODEL, "fake", "fake", "fake", + base_instance fake(base_subject::hash, TEXT, get_self_ref(), + NULL_MODEL, "fake", "fake", "fake", char_rng, {0,0}, {0,0}); return std::lower_bound(instances.begin(), instances.end(), fake); @@ -216,7 +217,8 @@ namespace andromeda typename std::vector::iterator subject::insts_end(std::array char_rng) { - base_instance fake(base_subject::hash, NULL_MODEL, "fake", "fake", "fake", + base_instance fake(base_subject::hash, TEXT, get_self_ref(), + NULL_MODEL, "fake", "fake", "fake", char_rng, {0,0}, {0,0}); return std::upper_bound(instances.begin(), instances.end(), fake); From bad3a865a99e53a9b788c72c841b17883537b37e Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Mon, 20 Nov 2023 12:46:11 +0100 Subject: [PATCH 14/22] working on reproducibility Signed-off-by: Peter Staar --- deepsearch_glm/nlp_utils.py | 51 +- src/andromeda/nlp/cls/semantic.h | 6 +- .../tooling/producers/impl/document.h | 2 +- .../tooling/structs/elements/table_element.h | 44 +- .../tooling/structs/elements/text_element.h | 10 +- .../tooling/structs/items/cls/base.h | 30 +- .../tooling/structs/items/ent/instance.h | 14 +- .../tooling/structs/items/rel/base.h | 35 +- .../tooling/structs/subjects/document.h | 307 +- .../structs/subjects/document/doc_captions.h | 21 +- .../subjects/document/doc_normalisation.h | 39 +- .../structs/subjects/document/doc_order.h | 12 +- .../tooling/structs/subjects/figure.h | 11 +- .../tooling/structs/subjects/table.h | 18 +- src/andromeda/tooling/structs/subjects/text.h | 2 - tests/data/docs/1806.02284.nlp.json | 68282 ++++++++-------- tests/data/docs/doc_01.nlp.json | 8010 +- tests/test_nlp.py | 75 +- 18 files changed, 40565 insertions(+), 36404 deletions(-) diff --git a/deepsearch_glm/nlp_utils.py b/deepsearch_glm/nlp_utils.py index ec1d6aae..fdd04b37 100644 --- a/deepsearch_glm/nlp_utils.py +++ b/deepsearch_glm/nlp_utils.py @@ -101,33 +101,38 @@ def print_on_shell(text, result): def extract_references_from_doc(doc, verbose=False): - texts = doc["texts"] + texts = pd.DataFrame.from_records(doc["texts"]) - df = pd.DataFrame(doc["instances"]["data"], - columns=doc["instances"]["headers"]) + props = pd.DataFrame(doc["properties"]["data"], + columns=doc["properties"]["headers"]) + + insts = pd.DataFrame(doc["instances"]["data"], + columns=doc["instances"]["headers"]) + + refs = props[props["label"]=="reference"] + + #print("references: \n") + #print(refs) wrapper = textwrap.TextWrapper(width=70) - result=[] - for i,item in enumerate(doc["texts"]): + results=[] + + for i,ref in refs.iterrows(): + + text = texts[texts["hash"]==ref["subj_hash"]] + refc = insts[insts["subj_hash"]==ref["subj_hash"]] - path = f"#/texts/{i}" + #print(text["text"]) + #print(refc) + + results.append( + { + "text": text["text"], + "path": text["sref"], + "instances": refc.to_records() + } + ) - labels=[] - for row in item["properties"]["data"]: - labels.append(row[item["properties"]["headers"].index("label")]) - - if "reference" in labels: - - refs = df[df["subj_path"]==path] - result.append({"text": item["text"], "instances": refs}) - - if verbose: - print(f"text: ") #{type_}, labels: ", ",".join(labels)) - for line in wrapper.wrap(text=item["text"]): - print(f"\t{line}") - - print(refs[["type", "subtype", "original"]]) - - return result + return results diff --git a/src/andromeda/nlp/cls/semantic.h b/src/andromeda/nlp/cls/semantic.h index b60d43ac..67195a26 100644 --- a/src/andromeda/nlp/cls/semantic.h +++ b/src/andromeda/nlp/cls/semantic.h @@ -312,6 +312,7 @@ namespace andromeda return false; } + /* uint64_t abs_ind=-1, intro_ind=-1, ref_ind=-1; for(uint64_t ind=0; ind::write(doc_type& subj) { - std::filesystem::path filepath = subj.filepath; + std::filesystem::path filepath = subj.get_filepath(); std::filesystem::path filename = filepath.filename(); //std::filesystem::path filedir = filepath.dirname(); diff --git a/src/andromeda/tooling/structs/elements/table_element.h b/src/andromeda/tooling/structs/elements/table_element.h index 89791534..d9b8786f 100644 --- a/src/andromeda/tooling/structs/elements/table_element.h +++ b/src/andromeda/tooling/structs/elements/table_element.h @@ -58,30 +58,9 @@ namespace andromeda table_element::table_element(nlohmann::json& json_cell) { - from_json(json_cell); - - text_element::set(orig, NULL, NULL); + from_json(json_cell); } - /* - table_element::table_element(uint64_t i, - uint64_t j, - std::string orig): - text_element(), - i(i), j(j), - - row_span({i,i+1}), - col_span({j,j+1}), - - row_header(false), - col_header(false), - - numeric(false) - { - text_element::set(orig, NULL, NULL); - } - */ - table_element::table_element(uint64_t i, uint64_t j, std::array row_span, std::array col_span, @@ -100,7 +79,8 @@ namespace andromeda numeric(false) { - text_element::set(orig, NULL, NULL); + //text_element::set(orig, NULL, NULL); + text_element::set_text(orig); } nlohmann::json table_element::to_json() @@ -149,12 +129,22 @@ namespace andromeda i = json_cell.at("row").get(); j = json_cell.at("col").get(); - row_span = json_cell.at("row-span").get>(); - col_span = json_cell.at("col-span").get>(); + row_span = json_cell.at("row-span").get >(); + col_span = json_cell.at("col-span").get >(); + + std::string ctext = json_cell.at("text").get(); + text_element::set_text(ctext); - text = json_cell.at("text").get(); type = json_cell.at("type").get(); - bbox = json_cell.at("bbox").get >(); + + if(json_cell.at("bbox").is_array()) + { + bbox = json_cell.at("bbox").get >(); + } + else + { + bbox = {0.0, 0.0, 0.0, 0.0}; + } row_header = json_cell.at("row-header").get(); col_header = json_cell.at("col-header").get(); diff --git a/src/andromeda/tooling/structs/elements/text_element.h b/src/andromeda/tooling/structs/elements/text_element.h index b04f22f2..34771c0c 100644 --- a/src/andromeda/tooling/structs/elements/text_element.h +++ b/src/andromeda/tooling/structs/elements/text_element.h @@ -198,17 +198,21 @@ namespace andromeda bool text_element::set_text(const std::string& ctext) { clear(); - + + //LOG_S(INFO) << ctext << " -> " << orig << " -> " << text; + orig = utils::strip(ctext); text = orig; + //LOG_S(INFO) << ctext << " -> " << orig << " -> " << text; + if(orig.size()==0) { return false; } - + len = orig.size(); - + text_valid = utf8::is_valid(orig.c_str(), orig.c_str()+len); text_hash = utils::to_reproducible_hash(orig); diff --git a/src/andromeda/tooling/structs/items/cls/base.h b/src/andromeda/tooling/structs/items/cls/base.h index 366db299..6fcf687e 100644 --- a/src/andromeda/tooling/structs/items/cls/base.h +++ b/src/andromeda/tooling/structs/items/cls/base.h @@ -149,13 +149,37 @@ namespace andromeda bool operator<(const base_property& lhs, const base_property& rhs) { - if(lhs.model==rhs.model) + if(lhs.subj_path==rhs.subj_path) { - return lhs.conf>rhs.conf; + if(lhs.model==rhs.model) + { + return lhs.conf>rhs.conf; + } + else + { + return (lhs.model()); subj_path = row.at(4).get(); - conf = (row.at(5).get())/100.0; - //conf = (row.at(5).get())/100.0; + conf = (row.at(5).get()); ehash = row.at(6).get(); ihash = row.at(7).get(); @@ -554,13 +553,6 @@ namespace andromeda return SHORT_TABLE_HEADERS; } - /* - std::string base_instance::get_name() const - { - return name; - } - */ - std::string base_instance::get_reference() const { std::string ref = subj_path; @@ -579,6 +571,10 @@ namespace andromeda { nlohmann::json result = nlohmann::json::object(); { + result["subj_hash"] = subj_hash; + result["subj_name"] = to_string(subj_name); + result["subj_path"] = subj_path; + result["ehash"] = ehash; result["ihash"] = ihash; diff --git a/src/andromeda/tooling/structs/items/rel/base.h b/src/andromeda/tooling/structs/items/rel/base.h index b08ce182..f3d62626 100644 --- a/src/andromeda/tooling/structs/items/rel/base.h +++ b/src/andromeda/tooling/structs/items/rel/base.h @@ -32,6 +32,8 @@ namespace andromeda const base_instance& inst_i, const base_instance& inst_j); + friend bool operator<(const base_relation& lhs, const base_relation& rhs); + nlohmann::json to_json_row(); bool from_json_row(const nlohmann::json& row); @@ -41,9 +43,6 @@ namespace andromeda hash_type get_hash_i() { return hash_i; } hash_type get_hash_j() { return hash_j; } - - //hash_type get_ihash_i() { return ihash_i; } - //hash_type get_ihash_j() { return ihash_j; } private: @@ -51,9 +50,6 @@ namespace andromeda val_type conf; hash_type hash_i, hash_j; - //hash_type hash_i, ihash_i; - //hash_type hash_j, ihash_j; - std::string name_i, name_j; }; @@ -127,15 +123,34 @@ namespace andromeda flvr(to_flvr(name)), conf(conf), - hash_i(inst_i.get_ehash()), - //ihash_i(inst_i.ihash), + //hash_i(inst_i.get_ehash()), + hash_i(inst_i.get_ihash()), - hash_j(inst_j.get_ehash()), - //ihash_j(inst_j.ihash), + //hash_j(inst_j.get_ehash()), + hash_j(inst_j.get_ihash()), name_i(inst_i.get_name()), name_j(inst_j.get_name()) {} + + bool operator<(const base_relation& lhs, const base_relation& rhs) + { + if(lhs.flvr==rhs.flvr) + { + if(lhs.hash_i==rhs.hash_i) + { + return (rhs.hash_i >& get_pages() { return pages; } + std::vector >& get_provs() { return provs; } + + + void show(bool txt=true, bool mdls=false, bool ctokens=false, bool wtokens=true, bool prps=true, bool insts=true, bool rels=true); @@ -87,11 +92,8 @@ namespace andromeda void init_provs(); void show_provs(); - void clear_properties_from_texts(); - void clear_properties_from_tables(); - - void join_properties_with_texts(); - void join_properties_with_tables(); + void join_properties(); + void join_instances(); private: @@ -112,14 +114,14 @@ namespace andromeda bool finalise_instances(); bool finalise_relations(); - public: + private: std::filesystem::path filepath; uint64_t doc_hash; std::string doc_name; - nlohmann::json orig, dscr; + nlohmann::json orig, dscr, info; std::vector > pages; std::vector > provs; @@ -127,6 +129,8 @@ namespace andromeda std::vector > body; std::vector > meta; + public: + std::vector > > texts; std::vector > > tables; std::vector > > figures; @@ -142,6 +146,10 @@ namespace andromeda doc_hash(-1), doc_name(""), + orig(nlohmann::json::value_t::null), + dscr(nlohmann::json::value_t::null), + info(nlohmann::json::value_t::null), + pages(), provs(), @@ -165,16 +173,30 @@ namespace andromeda nlohmann::json subject::to_json(const std::set& filters) { nlohmann::json result = base_subject::_to_json(filters); + + result["description"] = nlohmann::json::object({}); + if(dscr!=nlohmann::json::value_t::null) + { + result["description"] = dscr; + } + + result["file-info"] = nlohmann::json::object({}); + if(info!=nlohmann::json::value_t::null) + { + result["file-info"] = info; + } - if(orig.count("description")) + /* + if(orig.count("description")) { - result["description"] = orig["description"]; + result["description"] = orig["description"]; } - else + else { - result["description"] = nlohmann::json::object({}); + result["description"] = nlohmann::json::object({}); } - + */ + // updated the description with predefined labels in schema { auto& desc = result.at("description"); @@ -231,8 +253,7 @@ namespace andromeda std::set doc_filters = { "hash", "dloc", "prov", "text", "data", - "captions", "footnotes", "mentions", - "properties"}; + "captions", "footnotes", "mentions"}; base_subject::to_json(result, texts_lbl, texts, doc_filters); base_subject::to_json(result, tables_lbl, tables, doc_filters); @@ -250,19 +271,22 @@ namespace andromeda bool subject::from_json(const nlohmann::json& doc) { base_subject::_from_json(doc); - + base_subject::from_json(doc, pages_lbl, pages); base_subject::from_json(doc, provs_lbl, provs); - base_subject::from_json(doc, provs, texts_lbl , texts ); - base_subject::from_json(doc, provs, tables_lbl , tables ); + base_subject::from_json(doc, provs, texts_lbl, texts); + base_subject::from_json(doc, provs, tables_lbl, tables); base_subject::from_json(doc, provs, figures_lbl, figures); - + base_subject::from_json(doc, provs, page_headers_lbl, page_headers); base_subject::from_json(doc, provs, page_footers_lbl, page_footers); base_subject::from_json(doc, provs, footnotes_lbl, footnotes); base_subject::from_json(doc, provs, other_lbl, other); + + join_properties(); + join_instances(); return true; } @@ -277,9 +301,10 @@ namespace andromeda { base_subject::clear(); - dscr = nlohmann::json::object({}); - orig = nlohmann::json::object({}); - + orig = nlohmann::json::value_t::null; + dscr = nlohmann::json::value_t::null; + info = nlohmann::json::value_t::null; + body.clear(); meta.clear(); @@ -362,6 +387,11 @@ namespace andromeda dscr = data.at("description"); } + if(data.count("file-info")) + { + info = data.at("file-info"); + } + base_subject::dloc = doc_name + "#"; } @@ -460,7 +490,6 @@ namespace andromeda bool subject::finalise_properties() { // only keep document global properties - std::set > doc_properties={}; for(auto& prop:properties) @@ -506,7 +535,9 @@ namespace andromeda } figure->properties.clear(); } - + + std::sort(properties.begin(), properties.end()); + return true; } @@ -516,33 +547,16 @@ namespace andromeda for(auto& subj:texts) { - //LOG_S(INFO) << __FUNCTION__ << ": " << subj.instances.size(); - for(auto& ent:subj->instances) { - /* - instances.emplace_back(subj->get_hash(), - subj->get_name(), - //subj->get_path(), - subj->get_self_ref(), - ent); - */ instances.push_back(ent); } } - //LOG_S(INFO) << "total #-insts: " << instances.size(); for(auto& subj:tables) { for(auto& ent:subj->instances) { - /* - instances.emplace_back(subj->get_hash(), - subj->get_name(), - //subj->get_path(), - subj->get_self_ref(), - ent); - */ instances.push_back(ent); } @@ -550,13 +564,6 @@ namespace andromeda { for(auto& ent:capt->instances) { - /* - instances.emplace_back(capt->get_hash(), - capt->get_name(), - //capt->get_path(), - capt->get_self_ref(), - ent); - */ instances.push_back(ent); } } @@ -566,13 +573,6 @@ namespace andromeda { for(auto& ent:subj->instances) { - /* - instances.emplace_back(subj->get_hash(), - subj->get_name(), - //subj->get_path(), - subj->get_self_ref(), - ent); - */ instances.push_back(ent); } @@ -580,18 +580,13 @@ namespace andromeda { for(auto& ent:capt->instances) { - /* - instances.emplace_back(capt->get_hash(), - capt->get_name(), - //capt->get_path(), - capt->get_self_ref(), - ent); - */ instances.push_back(ent); } } } + std::sort(instances.begin(), instances.end()); + return true; } @@ -613,86 +608,170 @@ namespace andromeda { relations.push_back(rel); } + + for(auto& capt:table->captions) + { + for(auto& rel:capt->relations) + { + relations.push_back(rel); + } + } } + for(auto& figure:figures) + { + for(auto& rel:figure->relations) + { + relations.push_back(rel); + } + + for(auto& capt:figure->captions) + { + for(auto& rel:capt->relations) + { + relations.push_back(rel); + } + } + } + + std::sort(relations.begin(), relations.end()); + return true; } - void subject::clear_properties_from_texts() + void subject::join_properties() { - for(auto& text:texts) - { - text->properties.clear(); - } - } - - void subject::join_properties_with_texts() - { - clear_properties_from_texts(); + for(auto& text:texts) { text->properties.clear(); } + for(auto& table:tables) { table->properties.clear(); } + for(auto& figure:figures) { figure->properties.clear(); } for(auto& prop:this->properties) { std::string path = prop.get_subj_path(); - LOG_S(INFO) << path; - + auto parts = utils::split(path, "/"); - if(parts.size()<3) - { - continue; - } - - int ind = std::stoi(parts.at(2)); - LOG_S(INFO) << " -> " << ind; - - if(parts.at(1)==texts_lbl and indget_hash()==prop.get_subj_hash()); texts.at(ind)->properties.push_back(prop); + } + else if(parts.size()==3 and parts.at(1)==tables_lbl) + { + int ind = std::stoi(parts.at(2)); + + assert(tables.at(ind)->get_hash()==prop.get_subj_hash()); + tables.at(ind)->properties.push_back(prop); } + else if(parts.size()==3 and parts.at(1)==figures_lbl) + { + int ind = std::stoi(parts.at(2)); + + assert(figures.at(ind)->get_hash()==prop.get_subj_hash()); + figures.at(ind)->properties.push_back(prop); + } + else if(parts.size()==5 and parts.at(1)==tables_lbl and parts.at(3)==captions_lbl) + { + int ti = std::stoi(parts.at(2)); + int ci = std::stoi(parts.at(4)); + + assert(tables.at(ti)->get_hash()==prop.get_subj_hash()); + tables.at(ti)->captions.at(ci)->properties.push_back(prop); + } + else if(parts.size()==5 and parts.at(1)==figures_lbl and parts.at(3)==captions_lbl) + { + int fi = std::stoi(parts.at(2)); + int ci = std::stoi(parts.at(4)); + + assert(figures.at(fi)->get_hash()==prop.get_subj_hash()); + figures.at(fi)->captions.at(ci)->properties.push_back(prop); + } else - {} + { + LOG_S(WARNING) << "ignoring properties with subj-path: " << path; + } } } - void subject::clear_properties_from_tables() - { - for(auto& table:tables) - { - table->properties.clear(); - } - } - - void subject::join_properties_with_tables() + void subject::join_instances() { - for(auto& table:tables) - { - table->properties.clear(); - } + for(auto& text:texts) { text->instances.clear(); } + for(auto& table:tables) { table->instances.clear(); } + for(auto& figure:figures) { figure->instances.clear(); } - for(auto& prop:this->properties) + for(auto& inst:this->instances) { - std::string path = prop.get_subj_path(); - LOG_S(INFO) << path; + std::string path = inst.get_subj_path(); auto parts = utils::split(path, "/"); - if(parts.size()<3) + + if(parts.size()==1) // document instances, nothing to be done ... + {} + else if(parts.size()==3 and parts.at(1)==texts_lbl) { - continue; + int ind = std::stoi(parts.at(2)); + + assert(texts.at(ind)->get_hash()==inst.get_subj_hash()); + texts.at(ind)->instances.push_back(inst); + } + else if(parts.size()==3 and parts.at(1)==tables_lbl) + { + int ind = std::stoi(parts.at(2)); + + //assert(tables.at(ind)->get_hash()==inst.get_subj_hash()); + if(tables.at(ind)->get_hash()==inst.get_subj_hash()) + { + tables.at(ind)->instances.push_back(inst); + } + else + { + LOG_S(INFO) << tables.at(ind)->get_hash() << "\t" << inst.get_subj_hash(); + LOG_S(INFO) << inst.to_json().dump(2); + } } - - int ind = std::stoi(parts.at(2)); - LOG_S(INFO) << " -> " << ind; - - if(parts.at(1)==tables_lbl and indget_hash()==inst.get_subj_hash()); + //figures.at(ind)->instances.push_back(inst); + + if(figures.at(ind)->get_hash()==inst.get_subj_hash()) + { + figures.at(ind)->instances.push_back(inst); + } + else + { + LOG_S(INFO) << figures.at(ind)->get_hash() << "\t" << inst.get_subj_hash(); + LOG_S(INFO) << inst.to_json().dump(2); + } + } + else if(parts.size()==5 and parts.at(1)==tables_lbl and parts.at(3)==captions_lbl) { - assert(tables.at(ind)->get_hash()==prop.get_subj_hash()); - tables.at(ind)->properties.push_back(prop); + int ti = std::stoi(parts.at(2)); + int ci = std::stoi(parts.at(4)); + + assert(tables.at(ti)->captions.at(ci)->get_hash()==inst.get_subj_hash()); + tables.at(ti)->captions.at(ci)->instances.push_back(inst); } + else if(parts.size()==5 and parts.at(1)==figures_lbl and parts.at(3)==captions_lbl) + { + int fi = std::stoi(parts.at(2)); + int ci = std::stoi(parts.at(4)); + + assert(figures.at(fi)->captions.at(ci)->get_hash()==inst.get_subj_hash()); + figures.at(fi)->captions.at(ci)->instances.push_back(inst); + } else - {} + { + LOG_S(WARNING) << "ignoring instances with subj-path: " << path; + } } - } + } } diff --git a/src/andromeda/tooling/structs/subjects/document/doc_captions.h b/src/andromeda/tooling/structs/subjects/document/doc_captions.h index d0f37483..3f0e6a45 100644 --- a/src/andromeda/tooling/structs/subjects/document/doc_captions.h +++ b/src/andromeda/tooling/structs/subjects/document/doc_captions.h @@ -71,7 +71,7 @@ namespace andromeda obj_to_caption={}; obj_to_notes={}; - auto& provs = doc.provs; + auto& provs = doc.get_provs(); page_nums={}; is_assigned={}; @@ -275,7 +275,7 @@ namespace andromeda template void doc_captions::assign_captions(doc_type& doc) { - auto& provs = doc.provs; + auto& provs = doc.get_provs(); for(auto itr=obj_to_caption.begin(); itr!=obj_to_caption.end(); itr++) { @@ -300,20 +300,11 @@ namespace andromeda { auto& table = prov_to_table.at(prov_i); - - //LOG_S(WARNING) << "table: " - //<< prov_i->maintext_ind; - for(ind_type j:itr->second) { auto& prov_j = provs.at(j); auto& caption = prov_to_text.at(prov_j); - //LOG_S(WARNING) << "\tassigning caption " - //<< prov_i->maintext_ind - //<< " to table " - //<< prov_j->maintext_ind; - table->captions.push_back(caption); } } @@ -321,18 +312,10 @@ namespace andromeda { auto& figure = prov_to_figure.at(prov_i); - //LOG_S(WARNING) << "figure: " - //<< prov_i->maintext_ind; - for(ind_type j:itr->second) { auto& prov_j = provs.at(j); auto& caption = prov_to_text.at(prov_j); - - //LOG_S(WARNING) << "\tassigning caption " - //<< prov_i->maintext_ind - //<< " to figure " - //<< prov_j->maintext_ind; figure->captions.push_back(caption); } diff --git a/src/andromeda/tooling/structs/subjects/document/doc_normalisation.h b/src/andromeda/tooling/structs/subjects/document/doc_normalisation.h index 182ea3b7..9cac97b3 100644 --- a/src/andromeda/tooling/structs/subjects/document/doc_normalisation.h +++ b/src/andromeda/tooling/structs/subjects/document/doc_normalisation.h @@ -13,7 +13,6 @@ namespace andromeda const static inline std::set is_text = { "title", "subtitle-level-1", "paragraph", "list-item", - //"footnote", "caption", "formula", "equation" }; @@ -84,7 +83,7 @@ namespace andromeda template void doc_normalisation::set_pdforder() { - auto& orig = doc.orig; + auto& orig = doc.get_orig(); if(orig.count(doc_type::maintext_lbl)==0) { @@ -102,9 +101,9 @@ namespace andromeda template void doc_normalisation::init_pages() { - auto& orig = doc.orig; - - auto& pages = doc.pages; + auto& orig = doc.get_orig(); + + auto& pages = doc.get_pages(); pages.clear(); for(ind_type l=0; l void doc_normalisation::unroll_provs() { - auto& orig = doc.orig; + auto& orig = doc.get_orig(); nlohmann::json& old_maintext = orig.at(doc_type::maintext_lbl); nlohmann::json new_maintext = nlohmann::json::array({}); @@ -191,10 +190,11 @@ namespace andromeda template void doc_normalisation::init_provs() { - std::string doc_name = doc.doc_name; + //std::string doc_name = doc.doc_name; + std::string doc_name = doc.get_name(); - auto& orig = doc.orig; - auto& provs = doc.provs; + auto& orig = doc.get_orig(); + auto& provs = doc.get_provs(); provs.clear(); @@ -273,10 +273,11 @@ namespace andromeda template void doc_normalisation::init_items() { - std::string doc_name = doc.doc_name; + //std::string doc_name = doc.doc_name; + std::string doc_name = doc.get_name(); - auto& orig = doc.orig; - auto& provs = doc.provs; + auto& orig = doc.get_orig(); + auto& provs = doc.get_provs(); auto& texts = doc.texts; auto& tables = doc.tables; @@ -325,7 +326,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); if(valid) @@ -344,7 +345,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); tables.push_back(subj); @@ -365,7 +366,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); figures.push_back(subj); @@ -382,7 +383,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); if(valid) @@ -401,7 +402,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); if(valid) @@ -420,7 +421,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); if(valid) @@ -445,7 +446,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); if(valid) diff --git a/src/andromeda/tooling/structs/subjects/document/doc_order.h b/src/andromeda/tooling/structs/subjects/document/doc_order.h index d135e0d0..de95f8dc 100644 --- a/src/andromeda/tooling/structs/subjects/document/doc_order.h +++ b/src/andromeda/tooling/structs/subjects/document/doc_order.h @@ -84,10 +84,10 @@ namespace andromeda template void doc_order::order_maintext(doc_type& doc) - { + { // make a deep-copy ! prov_vec_type provs={}; - for(auto& prov:doc.provs) + for(auto& prov:doc.get_provs()) { provs.push_back(*prov); } @@ -100,17 +100,19 @@ namespace andromeda template void doc_order::update_document(doc_type& doc, prov_vec_type& provs) { + nlohmann::json& orig = doc.get_orig(); + // copy ... - nlohmann::json maintext = doc.orig["main-text"]; + nlohmann::json maintext = orig["main-text"]; // re-order for(std::size_t l=0; l::from_json(const nlohmann::json& json_figure) { - base_subject::valid = true; - + LOG_S(INFO) << __FUNCTION__; + + { + base_subject::valid = true; + base_subject::_from_json(json_figure); + } + { conf = json_figure.value(base_subject::confidence_lbl, conf); created_by = json_figure.value(base_subject::created_by_lbl, created_by); @@ -151,6 +156,8 @@ namespace andromeda bool subject
::from_json(const nlohmann::json& json_figure, const std::vector >& doc_provs) { + LOG_S(INFO) << __FUNCTION__; + bool init_prov = base_subject::set_prov_refs(json_figure, doc_provs, provs); bool init_figure = this->from_json(json_figure); diff --git a/src/andromeda/tooling/structs/subjects/table.h b/src/andromeda/tooling/structs/subjects/table.h index 4b320547..b9c5b4c9 100644 --- a/src/andromeda/tooling/structs/subjects/table.h +++ b/src/andromeda/tooling/structs/subjects/table.h @@ -185,7 +185,6 @@ namespace andromeda json_table.push_back(row); } - result[base_subject::table_data_lbl] = json_table; } @@ -209,12 +208,22 @@ namespace andromeda bool subject
::from_json(const nlohmann::json& json_table) { + //LOG_S(INFO) << __FUNCTION__; + + { + base_subject::valid = true; + base_subject::_from_json(json_table); + } + { conf = json_table.value(base_subject::confidence_lbl, conf); created_by = json_table.value(base_subject::created_by_lbl, created_by); } - { + { + nrows = json_table.at("#-rows"); + ncols = json_table.at("#-cols"); + nlohmann::json grid = json_table.at("data"); for(ind_type i=0; i::set_data(const nlohmann::json& item) { + LOG_S(INFO) << __FUNCTION__; + base_subject::clear_models(); + data.clear(); { @@ -407,7 +419,7 @@ namespace andromeda valid = (valid and cell.set_tokens(char_normaliser, text_normaliser)); } } - + return valid; } diff --git a/src/andromeda/tooling/structs/subjects/text.h b/src/andromeda/tooling/structs/subjects/text.h index c9b6f37b..301a7300 100644 --- a/src/andromeda/tooling/structs/subjects/text.h +++ b/src/andromeda/tooling/structs/subjects/text.h @@ -21,8 +21,6 @@ namespace andromeda void finalise(); void clear(); - //std::string get_path() const { return (provs.size()>0? (provs.at(0)->get_item_ref()):"#"); } - bool is_valid() { return (base_subject::valid and text_element::text_valid); } virtual nlohmann::json to_json(const std::set& filters); diff --git a/tests/data/docs/1806.02284.nlp.json b/tests/data/docs/1806.02284.nlp.json index dfd19629..965b1c55 100644 --- a/tests/data/docs/1806.02284.nlp.json +++ b/tests/data/docs/1806.02284.nlp.json @@ -420,6 +420,7 @@ "$ref": "#/page-elements/21" } ], + "sref": "#/figures/0/captions/0", "text": "Figure 1: A diagram of the conversion pipeline in the Corpus Conversion Service platform. It consists of 5 components: (1) Parsing of the document and its contained bitmap images, (2) Annotating the text of the parsed documents with layout semantic labels, (3) Training models based on the ground-truth acquired by the annotations, (4) Applying machine learned models on the parsed documents to determine the layout semantic label of each cell and finally (5) Assembling the document into a structured data format (e. g. JSON). The main conversion pipeline is depicted in blue and allows you to process and convert documents at scale into a structured data format. The green and orange sections can be used optionally, in order to process scanned documents (green) or train new models based on human annotation (orange).", "text-hash": 9615465947839001361, "type": "caption" @@ -436,6 +437,7 @@ "$ref": "#/page-elements/20" } ], + "sref": "#/figures/0", "type": "figure" }, { @@ -449,6 +451,7 @@ "$ref": "#/page-elements/43" } ], + "sref": "#/figures/1/captions/0", "text": "Figure 3: The labelled cells annotated on the title page of a poster abstract about the CCS [11]. Here, the title, authors, affiliation, subtitle, main-text, caption and picture labels are represented respectively as red, green, purple, dark-red, yellow, orange and ivory.", "text-hash": 17324714532994059892, "type": "caption" @@ -465,6 +468,7 @@ "$ref": "#/page-elements/36" } ], + "sref": "#/figures/1", "type": "figure" }, { @@ -478,6 +482,7 @@ "$ref": "#/page-elements/38" } ], + "sref": "#/figures/2/captions/0", "text": "Figure 2: The cells obtained for the title page of a poster abstract about the CCS [11] after the parsing stage. During the parsing, we extract all bounding boxes of the text (or cells) in such a way that they all have: (1) a maximum width, (2) are only single line and (3) split into multiple cells in case of listidentifiers, multi-columns or crossing vertical lines (such as in tables).", "text-hash": 6754994759646241897, "type": "caption" @@ -494,6 +499,7 @@ "$ref": "#/page-elements/37" } ], + "sref": "#/figures/2", "type": "figure" }, { @@ -507,6 +513,7 @@ "$ref": "#/page-elements/59" } ], + "sref": "#/figures/3/captions/0", "text": "Figure 4: The annotation rate of pages for two different collections (Physical Review B and Elsevier papers) as a function of the number of annotated pages. As one can observe, the mean annotation rate is increasing after each training (depicted by a vertical dashed red line). After the first training, the human annotator is presented a pre-annotated page, using the predictions from the latest model. As the predictions become better with increasing size of the ground-truth, less corrections need to be made and hence more pages can be annotated in similar time intervals.", "text-hash": 504280783932681152, "type": "caption" @@ -523,6 +530,7 @@ "$ref": "#/page-elements/58" } ], + "sref": "#/figures/3", "type": "figure" }, { @@ -536,6 +544,7 @@ "$ref": "#/page-elements/65" } ], + "sref": "#/figures/4/captions/0", "text": "Figure 5: A typical image of a parsed PDF page that is fed to the default models. In red, we show the detection of the tables combined with the confidence of the model. The results displayed here originate from the YOLOv2 model.", "text-hash": 8628591081653072559, "type": "caption" @@ -552,6 +561,7 @@ "$ref": "#/page-elements/64" } ], + "sref": "#/figures/4", "type": "figure" }, { @@ -565,6 +575,7 @@ "$ref": "#/page-elements/105" } ], + "sref": "#/figures/5/captions/0", "text": "Figure 6: Diagram of the architecture of our platform. The architecture is composed from 4 layers: an interface layer with REST-API and frontend, an orchestration layer with a message broker and results backend, a compute layer consisting out of a variable number of asynchronous workers and finally a storage layer providing a NoSQL database and an object store. The NoSQL database stores the queryable meta-data of each file that is stored in the object store.", "text-hash": 4488590919374042342, "type": "paragraph" @@ -581,6 +592,7 @@ "$ref": "#/page-elements/104" } ], + "sref": "#/figures/5", "type": "figure" }, { @@ -594,6 +606,7 @@ "$ref": "#/page-elements/115" } ], + "sref": "#/figures/6/captions/0", "text": "Figure 7: Evolution of number of users and number of PDF pages on the platform. The jumps in the number of pages originates from big ingestions of documents performed by some users. This proves that the CCS platform is also able to accomodate these short burst of extreme activity.", "text-hash": 14863303056159196785, "type": "caption" @@ -610,6 +623,7 @@ "$ref": "#/page-elements/114" } ], + "sref": "#/figures/6", "type": "figure" }, { @@ -623,6 +637,7 @@ "$ref": "#/page-elements/122" } ], + "sref": "#/figures/7/captions/0", "text": "Figure 8: Speedup in the pipeline components as a function of the number of worker nodes (each with four cores, running four local worker processes).", "text-hash": 9976536719025941296, "type": "caption" @@ -639,9 +654,62 @@ "$ref": "#/page-elements/121" } ], + "sref": "#/figures/7", "type": "figure" } ], + "file-info": { + "#-pages": 9, + "document-hash": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be", + "filename": "1806.02284.pdf", + "page-hashes": [ + { + "hash": "5b2d0ee2a817876778b0007469f94e45d2d9ea591f45711215455a5f2fbec39a", + "model": "model", + "page": 1 + }, + { + "hash": "8d72db8e5854862f5472de9385457c1e4251d9a3e8a4797a687e102d96a701ba", + "model": "model", + "page": 2 + }, + { + "hash": "a1f460f273f25e1dd9e42ad2cd76faf0345f690c06c6686557a9fd2accd556d5", + "model": "model", + "page": 3 + }, + { + "hash": "1f93b41e4044e9957387fe7193c99331da63feec2437996a935c5935c5f20e7d", + "model": "model", + "page": 4 + }, + { + "hash": "80b62c8452fc364b0ccfc8afb5bb48742f6702588fdb478ee2f440f2e822183d", + "model": "model", + "page": 5 + }, + { + "hash": "d8bd307e4430a810bd71d2bb7dfb4eac41e34a4debbde60c08d446e78fe2d804", + "model": "model", + "page": 6 + }, + { + "hash": "da962246354e6f35eb1d4a837c0e9959a341947ec04653caa1f80d0214623653", + "model": "model", + "page": 7 + }, + { + "hash": "69be4180ff9be035e1123a026f3e6b8b00a99461a66ceecf8ec048285d3127e0", + "model": "model", + "page": 8 + }, + { + "hash": "78f72b9373b3bde393eaf09aae4d5b080f33d062ada168d51747278edf72acc7", + "model": "model", + "page": 9 + } + ] + }, "footnotes": [ { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/0", @@ -652,6 +720,7 @@ "$ref": "#/page-elements/11" } ], + "sref": "#/footnotes/0", "text": "Permission to make digital or hard copies of all or part of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for components of this work owned by others than the author(s) must be honored. Abstracting with credit is permitted. To copy otherwise, or republish, to post on servers or to redistribute to lists, requires prior specific permission and/or a fee. Request permissions from permissions@acm.org.", "text-hash": 13032800243621120549, "type": "footnote" @@ -665,6 +734,7 @@ "$ref": "#/page-elements/12" } ], + "sref": "#/footnotes/1", "text": "KDD \u201918, August 19-23, 2018, London, United Kingdom", "text-hash": 15473297532078357059, "type": "footnote" @@ -678,6 +748,7 @@ "$ref": "#/page-elements/13" } ], + "sref": "#/footnotes/2", "text": "\u00a9 2018 Copyright held by the owner/author(s). Publication rights licensed to ACM. ACM ISBN 978-1-4503-5552-0/18/08...$15.00", "text-hash": 3001373187661149606, "type": "footnote" @@ -691,6 +762,7 @@ "$ref": "#/page-elements/14" } ], + "sref": "#/footnotes/3", "text": "https://doi.org/10.1145/3219819.3219834", "text-hash": 3547103316902677392, "type": "footnote" @@ -704,6 +776,7 @@ "$ref": "#/page-elements/18" } ], + "sref": "#/footnotes/4", "text": "$^{1}$This number originates from a keynote talk by Phil Ydens, Adobe\u2019s VP Engineering for Document Cloud. A video of the presentation can be found here: https://youtu.be/ 5Axw6OGPYHw", "text-hash": 14549584251446631343, "type": "footnote" @@ -717,6 +790,7 @@ "$ref": "#/page-elements/19" } ], + "sref": "#/footnotes/5", "text": "$^{2}$This is clearly the case on the popular arXiv scientific online repository: https://arxiv. org/help/stats/2012_by_area/index", "text-hash": 7221931865252575858, "type": "footnote" @@ -730,6 +804,7 @@ "$ref": "#/page-elements/26" } ], + "sref": "#/footnotes/6", "text": "$^{3}$https://www.xpdfreader.com", "text-hash": 104933780092600391, "type": "footnote" @@ -743,6 +818,7 @@ "$ref": "#/page-elements/27" } ], + "sref": "#/footnotes/7", "text": "$^{4}$http://tabula.technology/", "text-hash": 11894228156061308002, "type": "footnote" @@ -756,6 +832,7 @@ "$ref": "#/page-elements/28" } ], + "sref": "#/footnotes/8", "text": "$^{5}$https://www.abbyy.com/", "text-hash": 3391629868238619420, "type": "footnote" @@ -769,6 +846,7 @@ "$ref": "#/page-elements/29" } ], + "sref": "#/footnotes/9", "text": "$^{6}$https://www.nuance.com/", "text-hash": 1693441792396921860, "type": "footnote" @@ -782,6 +860,7 @@ "$ref": "#/page-elements/30" } ], + "sref": "#/footnotes/10", "text": "$^{7}$https://www.ibm.com/us-en/marketplace/data-capture-and-imaging", "text-hash": 11939931591922575256, "type": "footnote" @@ -795,6 +874,7 @@ "$ref": "#/page-elements/49" } ], + "sref": "#/footnotes/11", "text": "$^{8}$a line of text might be printed character-by-character, word-by-word or the entire text snippet.", "text-hash": 14551310605717713161, "type": "footnote" @@ -808,6 +888,7 @@ "$ref": "#/page-elements/50" } ], + "sref": "#/footnotes/12", "text": "$^{9}$http://qpdf.sourceforge.net/", "text-hash": 17478669388996915759, "type": "footnote" @@ -821,6 +902,7 @@ "$ref": "#/page-elements/57" } ], + "sref": "#/footnotes/13", "text": "$^{10}$It is important to notice that there is no restriction on the number of labels nor the semantic meaning of these labels. The only limitation one has is that the set of semantic labels needs to be consistent across the dataset, but this is evidently true for any type of ML algorithm.", "text-hash": 13266614683838167520, "type": "footnote" @@ -834,6 +916,7 @@ "$ref": "#/page-elements/73" } ], + "sref": "#/footnotes/14", "text": "$^{11}$All the data is coming from the bulk data download https://arxiv.org/help/bulk_data_s3", "text-hash": 10131428201408538445, "type": "footnote" @@ -847,6 +930,7 @@ "$ref": "#/page-elements/95" } ], + "sref": "#/footnotes/15", "text": "$^{12}$https://journals.aps.org/prb", "text-hash": 9846388834475228858, "type": "footnote" @@ -860,6 +944,7 @@ "$ref": "#/page-elements/110" } ], + "sref": "#/footnotes/16", "text": "$^{13}$https://www.openapis.org/", "text-hash": 831347610428179229, "type": "footnote" @@ -873,6 +958,7 @@ "$ref": "#/page-elements/111" } ], + "sref": "#/footnotes/17", "text": "$^{14}$https://www.rabbitmq.com/", "text-hash": 15235037228412732729, "type": "footnote" @@ -886,6 +972,7 @@ "$ref": "#/page-elements/112" } ], + "sref": "#/footnotes/18", "text": "$^{15}$https://www.redis.io/", "text-hash": 782710111840296691, "type": "footnote" @@ -899,6 +986,7 @@ "$ref": "#/page-elements/113" } ], + "sref": "#/footnotes/19", "text": "$^{16}$http://www.celeryproject.org/", "text-hash": 1778492971410642442, "type": "footnote" @@ -912,6 +1000,7 @@ "$ref": "#/page-elements/120" } ], + "sref": "#/footnotes/20", "text": "$^{17}$https://www.mongodb.com/", "text-hash": 3489272016069066385, "type": "footnote" @@ -925,6 +1014,7 @@ "$ref": "#/page-elements/131" } ], + "sref": "#/footnotes/21", "text": "$^{18}$https://kubernetes.io/", "text-hash": 5145030134774826221, "type": "footnote" @@ -938,6 +1028,7 @@ "$ref": "#/page-elements/132" } ], + "sref": "#/footnotes/22", "text": "$^{19}$ibm.biz/privatecloud", "text-hash": 4585077909629360588, "type": "footnote" @@ -951,6 +1042,7 @@ "$ref": "#/page-elements/139" } ], + "sref": "#/footnotes/23", "text": "$^{20}$We don\u2019t show the number of documents, since the number of pages in a document can range from 1 to well above 1000. Consequently, the number of pages is a more robust metric to measure the scaling with regard to the corpus size.", "text-hash": 14814952417700014875, "type": "footnote" @@ -960,1831 +1052,1852 @@ "instances": { "data": [ [ - "numval", - "year", - 7377574370756688828, + "sentence", + "", + 18259197018396996238, "TEXT", - "#/texts/0", + "#/texts/51", 1.0, - 389609625548777054, - 1345153950666588077, + 11214795667451364706, + 15381220353542038442, 18446744073709551615, 18446744073709551615, - 34, - 38, - 34, - 38, - 4, - 5, + 0, + 83, + 0, + 83, + 0, + 14, true, - "2018", - "2018" + "Let us now discuss both deep neural network training microservices on the platform.", + "Let us now discuss both deep neural network training microservices on the platform." ], [ - "numval", - "ival", - 7377574370756688828, + "verb", + "single-verb", + 18259197018396996238, "TEXT", - "#/texts/0", + "#/texts/51", 1.0, - 15441160910541481790, - 218889966910406464, + 12178341415896275389, + 4652821010771256286, 18446744073709551615, 18446744073709551615, - 27, - 29, - 27, - 29, - 2, + 0, + 3, + 0, 3, + 0, + 1, true, - "24", - "24" + "Let", + "Let" ], [ - "parenthesis", - "square brackets", - 7377574370756688828, + "verb", + "single-verb", + 18259197018396996238, "TEXT", - "#/texts/0", + "#/texts/51", 1.0, - 8106340136782143757, - 305332543809292699, + 8106397868479560363, + 5980952610294528544, 18446744073709551615, 18446744073709551615, - 19, - 26, - 19, - 26, - 1, - 2, + 11, + 18, + 11, + 18, + 3, + 4, true, - "[cs.DL]", - "[cs.DL]" + "discuss", + "discuss" ], [ - "expression", - "wtoken-concatenation", - 7377574370756688828, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/0", + "#/texts/51", 1.0, - 5564484558542728887, - 6260400721402515593, + 13848731310568719727, + 15095939915134652393, 18446744073709551615, 18446744073709551615, - 0, - 18, - 0, - 18, - 0, - 1, + 24, + 66, + 24, + 66, + 5, + 10, true, - "arXiv:1806.02284v1", - "arXiv:1806.02284v1" + "deep neural network training microservices", + "deep neural network training microservices" ], [ - "expression", - "wtoken-concatenation", - 7377574370756688828, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/0", + "#/texts/51", 1.0, - 8106340136782143757, - 305332543809292699, + 16381206566339127348, + 7523956295610612753, 18446744073709551615, 18446744073709551615, - 19, - 26, - 19, - 26, - 1, - 2, + 67, + 73, + 67, + 73, + 10, + 12, true, - "[cs.DL]", - "[cs.DL]" + "on the", + "on the" + ], + [ + "term", + "single-term", + 18259197018396996238, + "TEXT", + "#/texts/51", + 1.0, + 14814125365076808131, + 10453527503990612347, + 18446744073709551615, + 18446744073709551615, + 74, + 82, + 74, + 82, + 12, + 13, + true, + "platform", + "platform" ], [ "sentence", "", - 10227328696767902037, + 18259197018396996238, "TEXT", - "#/texts/1", + "#/texts/51", 1.0, - 11303007895399162817, - 11350976242507888924, + 17449560956934989976, + 12526021364899620960, 18446744073709551615, 18446744073709551615, - 0, 84, - 0, + 227, 84, - 0, + 227, 14, + 41, true, - "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", - "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale." + "In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision.", + "In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision." ], [ - "term", - "single-term", - 10227328696767902037, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/1", + "#/texts/51", 1.0, - 12638008641667971393, - 2808934749433980912, + 15441160910541480354, + 2599356225275492892, 18446744073709551615, 18446744073709551615, - 0, - 25, - 0, - 25, - 0, - 3, + 84, + 86, + 84, + 86, + 14, + 15, true, - "Corpus Conversion Service", - "Corpus Conversion Service" + "In", + "In" ], [ - "term", - "single-term", - 10227328696767902037, + "numval", + "ival", + 18259197018396996238, "TEXT", - "#/texts/1", + "#/texts/51", 1.0, - 3953336115302703444, - 3908089371773344302, + 17767354399704235161, + 12733743888687180225, 18446744073709551615, 18446744073709551615, - 29, - 54, - 29, - 54, - 5, - 8, + 93, + 94, + 93, + 94, + 16, + 17, true, - "Machine Learning Platform", - "Machine Learning Platform" + "1", + "1" ], [ - "term", - "single-term", - 10227328696767902037, + "verb", + "single-verb", + 18259197018396996238, "TEXT", - "#/texts/1", + "#/texts/51", 1.0, - 2543543638813814383, - 14974042820297549065, + 389609625741152123, + 11698558665309690548, 18446744073709551615, 18446744073709551615, - 58, - 74, - 58, - 74, - 9, - 11, + 99, + 103, + 99, + 103, + 19, + 20, true, - "Ingest Documents", - "Ingest Documents" + "show", + "show" ], [ - "term", - "single-term", - 10227328696767902037, + "expression", + "word-concatenation", + 18259197018396996238, "TEXT", - "#/texts/1", + "#/texts/51", 1.0, - 329104162321612062, - 9665794625919571011, + 6285955549867796622, + 12901492066051428715, 18446744073709551615, 18446744073709551615, - 78, - 83, - 78, - 83, - 12, - 13, + 108, + 124, + 108, + 124, + 21, + 22, true, - "Scale", - "Scale" + "time-to-solution", + "time-to-solution" ], [ - "conn", - "single-conn", - 10227328696767902037, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/1", + "#/texts/51", 1.0, - 15441160910541487054, - 1862666054904793840, + 6285955549867796622, + 12901492066051428715, 18446744073709551615, 18446744073709551615, - 75, - 77, - 75, - 77, - 11, - 12, + 108, + 124, + 108, + 124, + 21, + 22, true, - "at", - "at" + "time-to-solution", + "time-to-solution" ], [ "conn", "single-conn", - 10227328696767902037, + 18259197018396996238, "TEXT", - "#/texts/1", + "#/texts/51", 1.0, - 15441160910541485865, - 1862717525379277583, + 12178341415895625940, + 4653059449996398372, 18446744073709551615, 18446744073709551615, - 55, - 57, - 55, - 57, - 8, - 9, + 125, + 128, + 125, + 128, + 22, + 23, true, - "to", - "to" + "for", + "for" ], [ - "link", - "email", - 18258237174351515285, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/3", + "#/texts/51", 1.0, - 7883794643982446593, - 9473083479424942219, + 14634153919632515335, + 365322755488345032, 18446744073709551615, 18446744073709551615, - 0, - 30, - 0, - 30, - 0, - 11, + 129, + 137, + 129, + 137, + 23, + 24, true, - "taa,dol,cau,bek@zurich.ibm.com", - "taa,dol,cau,bek@zurich.ibm.com" + "training", + "training" ], [ - "geoloc", - "country", - 11056873211244709904, + "verb", + "single-verb", + 18259197018396996238, "TEXT", - "#/texts/5", + "#/texts/51", 1.0, - 2664439525053388608, - 16906723856094244091, + 14103651237077222912, + 1262912573528208063, 18446744073709551615, 18446744073709551615, - 13, - 24, - 13, - 24, - 2, - 3, + 142, + 152, + 142, + 152, + 25, + 26, true, - "Switzerland", - "Switzerland" + "predicting", + "predicting" ], [ - "numval", - "ival", - 3624246356859711021, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/7", + "#/texts/51", 1.0, - 17767354399704235161, - 12573472761345255474, + 1353284443403550494, + 17158735888603064564, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 155, + 166, + 155, + 166, + 27, + 29, true, - "1", - "1" + "single page", + "single page" ], [ - "numval", - "ival", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 12178341415896436703, - 12968333296314215347, + 16381206568455155979, + 8062169836442615762, 18446744073709551615, 18446744073709551615, - 1491, - 1494, - 1491, - 1494, - 249, - 250, + 175, + 181, + 175, + 181, + 31, + 33, true, - "250", - "250" + "as the", + "as the" ], [ - "parenthesis", - "round brackets", - 17999848460847860039, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 8624098978506921550, - 8067551676911300261, + 5731695876385560379, + 1758035992340926235, 18446744073709551615, 18446744073709551615, - 309, - 347, - 309, - 347, - 51, - 60, + 182, + 193, + 182, + 193, + 33, + 34, true, - "(e.g. the PDF format or bitmap images)", - "(e.g. the PDF format or bitmap images)" + "performance", + "performance" ], [ - "parenthesis", - "round brackets", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 4552190965366435023, - 5994729969442454976, - 18446744073709551615, + 15441160910541486538, + 2599358879133688732, 18446744073709551615, - 388, - 409, - 388, - 409, - 68, - 73, + 18446744073709551615, + 194, + 196, + 194, + 196, + 34, + 35, true, - "(e.g. complex tables)", - "(e.g. complex tables)" + "in", + "in" ], [ - "parenthesis", - "round brackets", - 17999848460847860039, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 329104053210116957, - 3393895258272698836, + 329104159246284497, + 8646809584775625185, 18446744073709551615, 18446744073709551615, - 628, - 633, - 628, - 633, - 109, - 112, + 197, + 202, + 197, + 202, + 35, + 36, true, - "(CCS)", - "(CCS)" + "terms", + "terms" ], [ - "parenthesis", - "round brackets", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 8912272716224106832, - 12227152516026650269, + 15441160910541485670, + 2599358870315263905, 18446744073709551615, 18446744073709551615, - 708, - 735, - 708, - 735, - 124, - 131, + 203, + 205, + 203, + 205, + 36, + 37, true, - "(i.e. collect ground-truth)", - "(i.e. collect ground-truth)" + "of", + "of" ], [ - "expression", - "common", - 17999848460847860039, + "term", + "enum-term-mark-2", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 15441160910541486545, - 11606670743807693522, + 11037453576911667853, + 14703723871622436608, 18446744073709551615, 18446744073709551615, - 709, - 713, - 709, - 713, - 125, - 126, + 206, + 226, + 206, + 226, + 37, + 40, true, - "ie", - "i.e." + "recall and precision", + "recall and precision" ], [ - "expression", - "common", - 17999848460847860039, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 15441160910541487324, - 11606670863251774055, + 16381206521531485437, + 11024740562177031234, 18446744073709551615, 18446744073709551615, - 310, - 314, - 310, - 314, - 52, - 53, + 206, + 212, + 206, + 212, + 37, + 38, true, - "eg", - "e.g." + "recall", + "recall" ], [ - "expression", - "common", - 17999848460847860039, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 15441160910541487324, - 11606670863251791461, + 6184954595655792282, + 2740680839011190488, 18446744073709551615, 18446744073709551615, - 389, - 393, - 389, - 393, - 69, - 70, + 217, + 226, + 217, + 226, + 39, + 40, true, - "eg", - "e.g." + "precision", + "precision" ], [ - "expression", - "word-concatenation", - 17999848460847860039, + "sentence", + "", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 15169931585135175826, - 17270979630715224833, + 13058222401901188325, + 14090621328054154871, 18446744073709551615, 18446744073709551615, - 525, - 536, - 525, - 536, - 93, - 94, + 228, + 364, + 228, + 364, + 41, + 69, true, - "cloud-based", - "cloud-based" + "In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times.", + "In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times." ], [ - "expression", - "word-concatenation", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 6307689511527468252, - 12199545311202481186, + 16380809977974811061, + 11732651135400697626, 18446744073709551615, 18446744073709551615, - 743, - 759, - 743, - 759, - 133, - 134, + 228, + 234, + 228, + 234, + 41, + 43, true, - "machine-learning", - "machine-learning" + "In the", + "In the" ], [ - "expression", - "word-concatenation", - 17999848460847860039, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 3932662928795581219, - 3325076288347729928, + 12141441254112579393, + 8271858979549873106, 18446744073709551615, 18446744073709551615, - 828, - 844, - 828, - 844, - 144, - 145, + 235, + 249, + 235, + 249, + 43, + 45, true, - "bitmap-documents", - "bitmap-documents" + "training phase", + "training phase" ], [ - "expression", - "word-concatenation", - 17999848460847860039, + "verb", + "single-verb", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 3753411203337468488, - 16756051673090395246, + 16381206564578053366, + 7676681725158730412, 18446744073709551615, 18446744073709551615, - 1102, - 1114, - 1102, - 1114, - 187, - 188, + 254, + 260, + 254, + 260, + 47, + 48, true, - "ground-truth", - "ground-truth" + "ensure", + "ensure" ], [ - "expression", - "word-concatenation", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 6307689511527468252, - 12199545311202523423, + 3504047303032829403, + 14383519537824238604, 18446744073709551615, 18446744073709551615, - 1133, - 1149, - 1133, - 1149, - 191, - 192, + 261, + 270, + 261, + 270, + 48, + 50, true, - "machine-learning", - "machine-learning" + "that both", + "that both" ], [ - "expression", - "word-concatenation", - 17999848460847860039, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 3753411203337468488, - 16756051673090420119, + 15359670209433732834, + 11505488180295702106, 18446744073709551615, 18446744073709551615, - 1244, - 1256, - 1244, - 1256, - 210, - 211, + 271, + 281, + 271, + 281, + 50, + 51, true, - "ground-truth", - "ground-truth" + "algorithms", + "algorithms" ], [ - "expression", - "word-concatenation", - 17999848460847860039, + "verb", + "single-verb", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 10391722136816057200, - 4465071482523967093, + 12178341415895649364, + 4652781883350111182, 18446744073709551615, 18446744073709551615, - 1512, - 1533, - 1512, - 1533, - 253, - 254, + 282, + 285, + 282, + 285, + 51, + 52, true, - "knowledge-engineering", - "knowledge-engineering" + "ran", + "ran" ], [ - "expression", - "word-concatenation", - 17999848460847860039, + "numval", + "ival", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 11355983594424639335, - 375612941360355674, + 12178341415896426714, + 4652804192217870476, 18446744073709551615, 18446744073709551615, - 1298, - 1314, - 1298, - 1314, - 219, - 220, + 291, + 294, + 291, + 294, + 53, + 54, true, - "precision/recall", - "precision/recall" + "100", + "100" ], [ - "expression", - "wtoken-concatenation", - 17999848460847860039, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 12178341415896195376, - 12963254028349616217, + 16381206565270919865, + 7578403846550666862, 18446744073709551615, 18446744073709551615, - 1339, - 1342, - 1339, - 1342, - 225, - 226, + 295, + 301, + 295, + 301, + 54, + 55, true, - "99%", - "99%" + "epochs", + "epochs" ], [ - "sentence", - "", - 17999848460847860039, + "expression", + "common", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 8311273775079009361, - 18234444390399509646, + 15441160910541486545, + 2599358878961543341, 18446744073709551615, 18446744073709551615, - 0, - 122, - 0, - 122, - 0, - 20, + 303, + 307, + 303, + 307, + 56, + 57, true, - "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size.", - "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size." + "ie", + "i.e." ], [ - "sentence", - "", - 17999848460847860039, + "verb", + "single-verb", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 8652887973149281574, - 1544181945594032747, + 15441160910541486545, + 2599358878961543341, 18446744073709551615, 18446744073709551615, - 123, - 258, - 123, - 258, - 20, - 43, + 303, + 307, + 303, + 307, + 56, + 57, true, - "Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable.", - "Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable." + "ie", + "i.e." ], [ - "sentence", - "", - 17999848460847860039, + "numval", + "ival", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 5682935857557389413, - 3518340224243798686, + 329104147765109382, + 8033726402022826926, 18446744073709551615, 18446744073709551615, - 259, - 487, - 259, - 487, - 43, - 84, + 312, + 317, + 312, + 317, + 58, + 59, true, - "Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging.", - "Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging." + "25000", + "25000" ], [ - "sentence", - "", - 17999848460847860039, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 18403546089192870947, - 3375274648488008071, + 18169256434676190331, + 11634553033353850813, 18446744073709551615, 18446744073709551615, - 488, - 575, - 488, - 575, - 84, - 101, + 318, + 329, + 318, + 329, + 59, + 61, true, - "In this paper, we present a modular, cloud-based platform to ingest documents at scale.", - "In this paper, we present a modular, cloud-based platform to ingest documents at scale." + "page images", + "page images" ], [ - "sentence", - "", - 17999848460847860039, + "verb", + "compound-verb", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 15870780009666831983, - 2120332988466055117, + 8526860058636487735, + 15955870111469140752, 18446744073709551615, 18446744073709551615, - 576, - 891, - 576, - 891, - 101, - 152, + 330, + 341, + 330, + 341, + 61, + 64, true, - "This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format.", - "This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format." + "were fed to", + "were fed to" ], [ - "sentence", - "", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 10285604264132694933, - 1782145150804012891, + 16381206519425733256, + 7379223398534589543, 18446744073709551615, 18446744073709551615, - 892, - 1045, - 892, - 1045, - 152, - 177, + 339, + 345, + 339, + 345, + 63, + 65, true, - "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents.", - "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents." + "to the", + "to the" ], [ - "sentence", - "", - 17999848460847860039, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 696858082777940132, - 6587401266180559184, + 8106342689863369930, + 11135817727321581998, 18446744073709551615, 18446744073709551615, - 1046, - 1196, - 1046, - 1196, - 177, - 201, + 346, + 353, + 346, + 353, + 65, + 66, true, - "Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude.", - "Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude." + "network", + "network" ], [ - "sentence", - "", - 17999848460847860039, + "numval", + "ival", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 11949985654620491247, - 6433012828858116708, + 12178341415896426714, + 4652804192217923506, 18446744073709551615, 18446744073709551615, - 1197, - 1398, - 1197, - 1398, - 201, - 235, + 354, + 357, + 354, + 357, + 66, + 67, true, - "This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output.", - "This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output." + "100", + "100" ], [ - "sentence", - "", - 17999848460847860039, + "term", + "single-term", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 11602122462230219692, - 9062878903616548976, + 329104159219994925, + 8640251348534211245, 18446744073709551615, 18446744073709551615, - 1399, - 1554, - 1399, - 1554, - 235, - 257, + 358, + 363, + 358, + 363, + 67, + 68, true, - "The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", - "The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements." + "times", + "times" ], [ - "term", - "enum-term-mark-1", - 17999848460847860039, + "sentence", + "", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 9845754748010686003, - 13443808248487347009, + 16675190523738339061, + 7202929718160933759, 18446744073709551615, 18446744073709551615, - 433, - 464, - 433, - 464, - 77, - 81, + 365, + 587, + 365, + 587, + 69, + 107, true, - "qualitative and quantitive data", - "qualitative and quantitive data" + "We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied.", + "We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied." ], [ - "term", - "enum-term-mark-2", - 17999848460847860039, + "verb", + "single-verb", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 14506873166110432521, - 11857803489572599054, + 8106342033696543838, + 10720166011679309151, 18446744073709551615, 18446744073709551615, - 323, - 339, - 323, - 339, - 55, - 58, + 368, + 375, + 368, + 375, + 70, + 71, true, - "format or bitmap", - "format or bitmap" + "observe", + "observe" ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 16807436920751143074, - 14986987871760575963, + 14634130761162415388, + 10901511361886185107, 18446744073709551615, 18446744073709551615, - 9, - 25, - 9, - 25, - 2, - 5, + 376, + 384, + 376, + 384, + 71, + 73, true, - "past few decades", - "past few decades" + "that the", + "that the" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 7863808487922385366, - 2936430672705644663, + 1151653930094198889, + 6279210758650536115, 18446744073709551615, 18446744073709551615, - 41, - 60, - 41, - 60, - 9, - 11, + 385, + 411, + 385, + 411, + 73, + 76, true, - "scientific articles", - "scientific articles" + "out-ofthe-box Faster R-CNN", + "out-ofthe-box Faster R-CNN" ], [ - "term", - "single-term", - 17999848460847860039, + "expression", + "word-concatenation", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 7143078508811650826, - 1305762834470469664, + 15656590191683919916, + 3502038016915722737, 18446744073709551615, 18446744073709551615, - 65, - 85, - 65, - 85, - 12, - 14, + 385, + 398, + 385, + 398, + 73, + 74, true, - "technical literature", - "technical literature" + "out-ofthe-box", + "out-ofthe-box" ], [ - "term", - "single-term", - 17999848460847860039, + "expression", + "word-concatenation", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 2831583870146744553, - 1311385802074388264, + 329104162326555074, + 12378649640990487310, 18446744073709551615, 18446744073709551615, - 148, - 158, - 148, - 158, - 25, - 27, + 406, + 411, + 406, + 411, + 75, + 76, true, - "great need", - "great need" + "R-CNN", + "R-CNN" ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 1602384110795404989, - 1921537330407092158, + 389609625697843734, + 11702137981936100184, 18446744073709551615, 18446744073709551615, - 319, - 329, - 319, - 329, - 54, - 56, + 412, + 416, + 412, + 416, + 76, + 77, true, - "PDF format", - "PDF format" + "from", + "from" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 7850715239909526655, - 8028877058422980465, + 2455254482033220466, + 11766388440552122471, 18446744073709551615, 18446744073709551615, - 333, - 346, - 333, - 346, - 57, - 59, + 417, + 427, + 417, + 427, + 77, + 78, true, - "bitmap images", - "bitmap images" + "Tensorflow", + "Tensorflow" ], [ - "term", - "single-term", - 17999848460847860039, + "verb", + "compound-verb", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 1806804053579249155, - 8335167387144157878, + 436128332273723128, + 12647681645588449593, 18446744073709551615, 18446744073709551615, - 389, - 408, - 389, - 408, - 69, - 72, + 428, + 446, + 428, + 446, + 78, + 81, true, - "eg complex tables", - "e.g. complex tables" + "does not implement", + "does not implement" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 13450540556572295481, - 4139295332657747437, + 14652257119591248677, + 16033503133782517052, 18446744073709551615, 18446744073709551615, - 449, - 464, - 449, - 464, - 79, - 81, + 451, + 459, + 451, + 459, + 82, + 83, true, - "quantitive data", - "quantitive data" + "batching", + "batching" ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 12206009578906402256, - 12092500979427102718, + 2511937742856062086, + 2355253536228937084, 18446744073709551615, 18446744073709551615, - 525, - 545, - 525, - 545, - 93, - 95, + 460, + 470, + 460, + 470, + 83, + 85, true, - "cloud-based platform", - "cloud-based platform" + "during the", + "during the" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 12638008641667971393, - 6722150771778728224, + 12141441254112579393, + 8271858979549955993, 18446744073709551615, 18446744073709551615, - 602, - 627, - 602, - 627, - 106, - 109, + 471, + 485, + 471, + 485, + 85, + 87, true, - "Corpus Conversion Service", - "Corpus Conversion Service" + "training phase", + "training phase" ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 3735444463619010795, - 10473776487201094119, + 329104161580427521, + 12357508218241612915, 18446744073709551615, 18446744073709551615, - 709, - 728, - 709, - 728, - 125, - 128, + 487, + 492, + 487, + 492, + 88, + 89, true, - "ie collect ground", - "i.e. collect ground" + "while", + "while" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 3416039644310333922, - 4934158934704280837, + 2503288761659507641, + 9743919505994936922, 18446744073709551615, 18446744073709551615, - 737, - 785, - 737, - 785, - 132, - 136, + 493, + 507, + 493, + 507, + 89, + 91, true, - "train machine-learning classification algorithms", - "train machine-learning classification algorithms" + "YOLOv2 batches", + "YOLOv2 batches" ], [ - "term", - "single-term", - 17999848460847860039, + "expression", + "wtoken-concatenation", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 2954625771153872709, - 4652514773317300232, + 16381206533950151485, + 7463375822213972642, 18446744073709551615, 18446744073709551615, - 850, - 890, - 850, - 890, - 147, - 151, + 493, + 499, + 493, + 499, + 89, + 90, true, - "structured content representation format", - "structured content representation format" + "YOLOv2", + "YOLOv2" ], [ - "term", - "single-term", - 17999848460847860039, + "numval", + "ival", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 7838671148811051201, - 3585713728473930092, + 17767354399704235152, + 12733743887789901018, 18446744073709551615, 18446744073709551615, - 952, - 990, - 952, - 990, - 165, - 168, + 508, + 509, + 508, + 509, + 91, + 92, true, - "asynchronous microservice architecture", - "asynchronous microservice architecture" + "8", + "8" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 11942859038914222878, - 6623027391573465220, + 16381206560620045048, + 7774432132927566429, 18446744073709551615, 18446744073709551615, - 1016, - 1031, - 1016, - 1031, - 172, - 174, + 510, + 516, + 510, + 516, + 92, + 93, true, - "massive amounts", - "massive amounts" + "images", + "images" ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 5415884051047601374, - 4355778428986290778, + 389609625700792947, + 11701923673037716898, 18446744073709551615, 18446744073709551615, - 1133, - 1160, - 1133, - 1160, - 191, - 193, + 517, + 521, + 517, + 521, + 93, + 95, true, - "machine-learning algorithms", - "machine-learning algorithms" + "at a", + "at a" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 11805639520798919476, - 8476511316725219115, + 389609625631241985, + 11701890325058806343, 18446744073709551615, 18446744073709551615, - 1227, - 1240, - 1227, - 1240, - 207, - 209, + 522, + 526, + 522, + 526, + 95, + 96, true, - "large amounts", - "large amounts" + "time", + "time" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 5928632445065269445, - 14217942914367810037, + 16381206519429140242, + 7379520217990130218, 18446744073709551615, 18446744073709551615, - 1265, - 1276, - 1265, - 1276, - 213, - 215, + 528, + 534, + 528, + 534, + 97, + 98, true, - "little time", - "little time" + "thanks", + "thanks" ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 10100743957883477761, - 17954790962075745659, + 329104159243175056, + 8638673086732548345, 18446744073709551615, 18446744073709551615, - 1293, - 1322, - 1293, - 1322, - 218, - 221, + 535, + 540, + 535, + 540, + 98, + 100, true, - "good precision/recall metrics", - "good precision/recall metrics" + "to an", + "to an" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 14630472445500347050, - 6260595242788033664, + 329104161828335551, + 12350292282878253456, 18446744073709551615, 18446744073709551615, - 1380, - 1397, - 1380, - 1397, - 232, - 234, + 541, + 546, + 541, + 546, + 100, + 101, true, - "structured output", - "structured output" + "image", + "image" ], [ - "term", - "single-term", - 17999848460847860039, + "verb", + "single-verb", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 10465443055056631368, - 58866334284871721, + 14634109260174176887, + 3059970276159290973, 18446744073709551615, 18446744073709551615, - 1403, - 1415, - 1403, - 1415, - 236, - 238, + 547, + 555, + 547, + 555, + 101, + 102, true, - "CCS platform", - "CCS platform" + "resizing", + "resizing" ], [ - "term", - "single-term", - 17999848460847860039, + "verb", + "compound-verb", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 168078114375663109, - 12852846298920524296, + 2778023241922598008, + 5238034027547162597, 18446744073709551615, 18446744073709551615, - 1441, - 1468, - 1441, - 1468, - 242, - 245, + 562, + 586, + 562, + 586, + 103, + 106, true, - "IBM internal infrastructure", - "IBM internal infrastructure" + "is automatically applied", + "is automatically applied" ], [ - "term", - "single-term", - 17999848460847860039, + "sentence", + "", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 8462871836886525200, - 10493121872431814801, + 10235041227958384786, + 9628423971346406996, 18446744073709551615, 18446744073709551615, - 1495, - 1507, - 1495, - 1507, - 250, - 252, + 588, + 691, + 588, + 691, + 107, + 125, true, - "active users", - "active users" + "We believe that this is the main origin for the discrepancy of time-to-solution for the training phase.", + "We believe that this is the main origin for the discrepancy of time-to-solution for the training phase." ], [ - "term", - "single-term", - 17999848460847860039, + "verb", + "single-verb", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 12360325703059227080, - 15341633962216548312, + 8106397860663428876, + 2379893300042418437, 18446744073709551615, 18446744073709551615, - 1512, - 1553, - 1512, - 1553, - 253, - 256, + 591, + 598, + 591, + 598, + 108, + 109, true, - "knowledge-engineering project engagements", - "knowledge-engineering project engagements" + "believe", + "believe" ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 16381206569333693762, - 10666930667336151813, + 3504047303127782210, + 14386938221778026486, 18446744073709551615, 18446744073709551615, - 31, - 37, - 31, - 37, - 7, - 8, + 599, + 608, + 599, + 608, + 109, + 111, true, - "amount", - "amount" + "that this", + "that this" ], [ - "term", - "single-term", - 17999848460847860039, + "verb", + "single-verb", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 389609625741058932, - 1609635956783744714, + 15441160910541486535, + 2599358878751709903, 18446744073709551615, 18446744073709551615, - 117, - 121, - 117, - 121, - 18, - 19, + 609, + 611, + 609, + 611, + 111, + 112, true, - "size", - "size" + "is", + "is" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 8106478573663085763, - 2644249028750571186, + 16269569307198368878, + 14888617347479270783, 18446744073709551615, 18446744073709551615, - 163, - 170, - 163, - 170, - 28, - 29, + 616, + 627, + 616, + 627, + 113, + 115, true, - "systems", - "systems" + "main origin", + "main origin" ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 6167933651658664291, - 11942237281037682166, + 8106397727991264470, + 4625930078648415204, 18446744073709551615, 18446744073709551615, - 193, - 202, - 193, - 202, - 33, - 34, + 628, + 635, + 628, + 635, + 115, + 117, true, - "documents", - "documents" + "for the", + "for the" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 329104161785194305, - 772802872201272523, + 1478855739373258073, + 16768663803468661998, 18446744073709551615, 18446744073709551615, - 206, - 211, - 206, - 211, - 35, - 36, + 636, + 647, + 636, + 647, + 117, + 118, true, - "scale", - "scale" + "discrepancy", + "discrepancy" ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 6184122545182835014, - 10915241214874887145, + 15441160910541485670, + 2599358870315233503, 18446744073709551615, 18446744073709551615, - 235, - 244, - 235, - 244, - 40, - 41, + 648, + 650, + 648, + 650, + 118, + 119, true, - "knowledge", - "knowledge" + "of", + "of" ], [ - "term", - "single-term", - 17999848460847860039, + "expression", + "word-concatenation", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 16381206548538896813, - 17191059727726770924, + 6285955549867796622, + 12901492066051459793, 18446744073709551615, 18446744073709551615, - 283, - 289, - 283, - 289, - 47, - 48, - true, - "format", - "format" + 651, + 667, + 651, + 667, + 119, + 120, + true, + "time-to-solution", + "time-to-solution" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 6167933651658664291, - 11942237281037615868, + 6285955549867796622, + 12901492066051459793, 18446744073709551615, 18446744073709551615, - 299, - 308, - 299, - 308, - 50, - 51, + 651, + 667, + 651, + 667, + 119, + 120, true, - "documents", - "documents" + "time-to-solution", + "time-to-solution" ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 15493249494625550468, - 17136530455551824273, + 8106397727991264470, + 4625930078648412606, 18446744073709551615, 18446744073709551615, - 363, - 375, - 363, - 375, - 64, - 65, + 668, + 675, + 668, + 675, + 120, + 122, true, - "presentation", - "presentation" + "for the", + "for the" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 389609625696431489, - 1272382058296184235, + 12141441254112579393, + 8271858979549787104, 18446744073709551615, 18446744073709551615, - 383, - 387, - 383, - 387, - 67, - 68, + 676, + 690, + 676, + 690, + 122, + 124, true, - "data", - "data" + "training phase", + "training phase" ], [ - "term", - "single-term", - 17999848460847860039, + "sentence", + "", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 5303544497514782120, - 263131364412872028, + 11909429825414533491, + 7916582600131240808, 18446744073709551615, 18446744073709551615, - 419, - 429, - 419, - 429, - 75, - 76, + 692, + 731, + 692, + 731, + 125, + 133, true, - "extraction", - "extraction" + "The same holds true for the prediction.", + "The same holds true for the prediction." ], [ - "term", - "single-term", - 17999848460847860039, + "verb", + "single-verb", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 329104161668023890, - 773695676617294129, + 329104161533598953, + 11928511646589428500, 18446744073709551615, 18446744073709551615, - 496, - 501, - 496, - 501, - 86, - 87, + 701, + 706, + 701, + 706, + 127, + 128, true, - "paper", - "paper" + "holds", + "holds" ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 6167933651658664291, - 11942237281037632251, + 14634153888224917429, + 9004783391296823986, 18446744073709551615, 18446744073709551615, - 556, - 565, - 556, - 565, - 97, - 98, + 707, + 715, + 707, + 715, + 128, + 130, true, - "documents", - "documents" + "true for", + "true for" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 329104161785194305, - 772802872201252868, + 14103651237077221583, + 1262912962491166125, 18446744073709551615, 18446744073709551615, - 569, - 574, - 569, - 574, - 99, - 100, + 720, + 730, + 720, + 730, + 131, + 132, true, - "scale", - "scale" + "prediction", + "prediction" ], [ - "term", - "single-term", - 17999848460847860039, + "sentence", + "", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 14814125365076808131, - 9647025272576644413, + 7447987213947934224, + 363147361352019607, 18446744073709551615, 18446744073709551615, - 581, - 589, - 581, - 589, - 102, - 103, + 732, + 913, + 732, + 911, + 133, + 172, true, - "platform", - "platform" + "Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", + "Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node)." ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 12178341415896221596, - 12963251184768892790, + 14637917359887717745, + 11341143089950838331, 18446744073709551615, 18446744073709551615, - 629, - 632, - 629, - 632, - 110, - 111, + 743, + 751, + 743, + 751, + 135, + 137, true, - "CCS", - "CCS" + "from the", + "from the" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 14814125852840540191, - 2945478222614419396, + 329104161594416377, + 12352174572142722555, 18446744073709551615, 18446744073709551615, - 648, - 656, - 648, - 656, - 115, - 116, + 752, + 757, + 752, + 757, + 137, + 138, true, - "pipeline", - "pipeline" + "point", + "point" ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 329104159157820437, - 995383834556884589, + 15441160910541485670, + 2599358870315209500, 18446744073709551615, 18446744073709551615, - 670, - 675, - 670, - 675, - 118, - 119, + 758, + 760, + 758, + 760, + 138, + 139, true, - "users", - "users" + "of", + "of" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 6167933651658664291, - 11942237281037582534, + 389609625619349298, + 11674445135708463101, 18446744073709551615, 18446744073709551615, - 698, - 707, - 698, - 707, - 123, - 124, + 761, + 765, + 761, + 765, + 139, + 140, true, - "documents", - "documents" + "view", + "view" ], [ - "term", - "single-term", - 17999848460847860039, + "conn", + "single-conn", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 329104159241711235, - 991946153785165058, + 16381206565712212855, + 7825456364758516667, 18446744073709551615, 18446744073709551615, - 729, - 734, - 729, - 734, - 129, - 130, + 766, + 772, + 766, + 772, + 140, + 142, true, - "truth", - "truth" + "of the", + "of the" ], [ "term", @@ -2810,86 +2923,86 @@ [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 12178341415896289890, - 12968333890042400352, + 14814125365076808131, + 10453527503990666008, 18446744073709551615, 18446744073709551615, - 821, - 824, - 821, - 824, + 773, + 781, + 773, + 781, 142, 143, true, - "PDF", - "PDF" + "platform", + "platform" ], [ "term", "single-term", - 17999848460847860039, + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 3932662928795581219, - 3325076288347729928, + 4237078182846444452, + 7428907322213125011, 18446744073709551615, 18446744073709551615, - 828, - 844, - 828, - 844, - 144, + 787, + 806, + 787, + 806, 145, + 147, true, - "bitmap-documents", - "bitmap-documents" + "YOLOv2 architecture", + "YOLOv2 architecture" ], [ - "term", - "single-term", - 17999848460847860039, + "expression", + "wtoken-concatenation", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 8106464525640940249, - 12084772193525026048, + 16381206533950151485, + 7463375822214056128, 18446744073709551615, 18446744073709551615, - 922, - 929, - 922, - 929, - 159, - 160, + 787, + 793, + 787, + 793, + 145, + 146, true, - "modules", - "modules" + "YOLOv2", + "YOLOv2" ], [ - "term", - "single-term", - 17999848460847860039, + "verb", + "compound-verb", + 18259197018396996238, "TEXT", - "#/texts/8", + "#/texts/51", 1.0, - 6167933651658664291, - 11942237281037800116, + 18110906195041757747, + 18325478196446152715, 18446744073709551615, 18446744073709551615, - 1035, - 1044, - 1035, - 1044, - 175, - 176, + 807, + 826, + 807, + 826, + 147, + 150, true, - "documents", - "documents" + "seems better suited", + "seems better suited" ], [ "term", @@ -2913,88 +3026,88 @@ "capability" ], [ - "term", - "single-term", - 17999848460847860039, + "link", + "email", + 7377574370756688828, "TEXT", - "#/texts/8", + "#/texts/0", 1.0, - 329104161571401725, - 741255023938407211, + 5663610854084581987, + 12665388994729576179, 18446744073709551615, 18446744073709551615, - 1177, - 1182, - 1177, - 1182, - 197, - 198, + 0, + 38, + 0, + 38, + 0, + 5, true, - "order", - "order" + "arXiv:1806.02284v1[cs.DL]24May2018", + "arXiv:1806.02284v1 [cs.DL] 24 May 2018" ], [ - "term", - "single-term", - 17999848460847860039, + "expression", + "wtoken-concatenation", + 7377574370756688828, "TEXT", - "#/texts/8", + "#/texts/0", 1.0, - 6179392101937111178, - 13132284913272968426, + 5564484558542728887, + 6260400721402515593, 18446744073709551615, 18446744073709551615, - 1186, - 1195, - 1186, - 1195, - 199, - 200, + 0, + 18, + 0, + 18, + 0, + 1, true, - "magnitude", - "magnitude" + "arXiv:1806.02284v1", + "arXiv:1806.02284v1" ], [ - "term", - "single-term", - 17999848460847860039, + "parenthesis", + "square brackets", + 7377574370756688828, "TEXT", - "#/texts/8", + "#/texts/0", 1.0, - 3753411203337468488, - 16756051673090420119, + 8106340136782143757, + 305332543809292699, 18446744073709551615, 18446744073709551615, - 1244, - 1256, - 1244, - 1256, - 210, - 211, + 19, + 26, + 19, + 26, + 1, + 2, true, - "ground-truth", - "ground-truth" + "[cs.DL]", + "[cs.DL]" ], [ - "term", - "single-term", - 17999848460847860039, + "expression", + "wtoken-concatenation", + 7377574370756688828, "TEXT", - "#/texts/8", + "#/texts/0", 1.0, - 329104161634702433, - 739201814026917115, + 8106340136782143757, + 305332543809292699, 18446744073709551615, 18446744073709551615, - 1330, - 1335, - 1330, - 1335, - 223, - 224, + 19, + 26, + 19, + 26, + 1, + 2, true, - "range", - "range" + "[cs.DL]", + "[cs.DL]" ], [ "term", @@ -3018,487 +3131,487 @@ "regard" ], [ - "term", - "single-term", - 17999848460847860039, + "sentence", + "", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 2703018679320364082, - 15916371892854536925, + 7429795002768371766, + 12580216355924388710, 18446744073709551615, 18446744073709551615, - 1366, - 1376, - 1366, - 1376, - 230, - 231, + 0, + 136, + 0, + 136, + 0, + 21, true, - "conversion", - "conversion" + "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously.", + "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously." ], [ - "verb", - "compound-verb", - 17999848460847860039, + "conn", + "single-conn", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 11956062550033090038, - 9437126490011979695, + 8106351438779293396, + 7036921387199751321, 18446744073709551615, 18446744073709551615, - 86, - 113, - 86, - 113, - 14, - 17, + 0, + 7, + 0, + 7, + 0, + 2, true, - "has increased exponentially", - "has increased exponentially" + "For the", + "For the" ], [ - "verb", - "compound-verb", - 17999848460847860039, + "term", + "single-term", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 5690225847229166303, - 18320034715902341983, + 4471200074237295914, + 1456466697102274833, 18446744073709551615, 18446744073709551615, - 1115, - 1129, - 1115, - 1129, - 188, - 190, + 8, + 28, + 8, + 28, + 2, + 4, true, - "is accelerated", - "is accelerated" + "performance analysis", + "performance analysis" ], [ "verb", - "compound-verb", - 17999848460847860039, + "single-verb", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 9791407429604398000, - 14740221032007164243, + 12178341415895617983, + 6222927924466837926, 18446744073709551615, 18446744073709551615, - 1281, - 1292, - 1281, - 1292, - 216, - 218, + 30, + 33, + 30, + 33, + 5, + 6, true, - "obtain very", - "obtain very" + "let", + "let" ], [ "verb", - "compound-verb", - 17999848460847860039, + "single-verb", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 2604368229451749231, - 5954729608874990660, + 8106342536055423396, + 1623603363237275433, 18446744073709551615, 18446744073709551615, - 1416, - 1437, - 1416, - 1437, - 238, - 241, + 37, + 44, + 37, + 44, + 7, + 8, true, - "is currently deployed", - "is currently deployed" + "outline", + "outline" ], [ - "verb", - "single-verb", - 17999848460847860039, + "term", + "single-term", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 15441160910541486535, - 11606670739881444005, + 4048925549312111393, + 15542194947650577050, 18446744073709551615, 18446744073709551615, - 143, - 145, - 143, - 145, - 23, - 24, + 49, + 69, + 49, + 69, + 9, + 11, true, - "is", - "is" + "pre-processing stage", + "pre-processing stage" ], [ - "verb", - "single-verb", - 17999848460847860039, + "expression", + "word-concatenation", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 2873440693780286732, - 16242747501520400497, + 3002943871017471876, + 6314608314970297277, 18446744073709551615, 18446744073709551615, - 176, - 186, - 176, - 186, - 30, - 32, + 49, + 63, + 49, + 63, + 9, + 10, true, - "can ingest", - "can ingest" + "pre-processing", + "pre-processing" ], [ "verb", - "single-verb", - 17999848460847860039, + "compound-verb", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 389609625618412480, - 1610868918855298631, + 6181919773618307675, + 13087072183397009947, 18446744073709551615, 18446744073709551615, - 216, - 220, - 216, - 220, - 37, - 38, + 76, + 85, + 76, + 85, + 12, + 14, true, - "make", - "make" + "is needed", + "is needed" ], [ - "verb", - "single-verb", - 17999848460847860039, + "conn", + "single-conn", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 5947879769709188533, - 15628690943209790850, + 16381206569837301772, + 829894264837423586, 18446744073709551615, 18446744073709551615, - 225, - 234, - 225, - 234, - 39, - 40, + 86, + 92, + 86, + 92, + 14, + 15, true, - "contained", - "contained" + "before", + "before" ], [ "verb", "single-verb", - 17999848460847860039, + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 389609625618412480, - 1610868918855286250, + 5947879507992892292, + 3137884750946432419, 18446744073709551615, 18446744073709551615, - 410, - 414, - 410, - 414, - 73, - 74, + 93, + 102, + 93, + 102, + 15, + 16, true, - "make", - "make" + "computing", + "computing" ], [ - "verb", - "single-verb", - 17999848460847860039, + "term", + "single-term", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 8106476016677076976, - 2082360003734177772, + 8106464574171450434, + 15318495777273702751, 18446744073709551615, 18446744073709551615, - 506, - 513, - 506, - 513, - 89, - 90, + 107, + 114, + 107, + 114, + 17, + 18, true, - "present", - "present" + "metrics", + "metrics" ], [ "verb", - "single-verb", - 17999848460847860039, + "compound-verb", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 16381206560503286032, - 18414709282119286416, + 3312537848285575572, + 3682069485478563076, 18446744073709551615, 18446744073709551615, - 549, - 555, - 549, - 555, - 96, - 97, + 115, + 135, + 115, + 135, + 18, + 20, true, - "ingest", - "ingest" + "described previously", + "described previously" ], [ - "verb", - "single-verb", - 17999848460847860039, + "sentence", + "", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 16381206563350835754, - 16668546032725707234, + 16291040095568243120, + 1594236025068685140, 18446744073709551615, 18446744073709551615, - 591, - 597, - 591, - 597, - 104, - 105, + 137, + 239, + 137, + 239, + 21, + 39, true, - "called", - "called" + "The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1.", + "The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1." ], [ - "verb", - "single-verb", - 17999848460847860039, + "term", + "single-term", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 5584174880054122043, - 1259340301497714443, + 15479850329146856745, + 787461524154987429, 18446744073709551615, 18446744073709551615, - 635, - 645, - 635, - 645, - 113, - 114, + 141, + 166, + 141, + 166, + 22, + 24, true, - "implements", - "implements" + "object-detection networks", + "object-detection networks" ], [ - "verb", - "single-verb", - 17999848460847860039, + "expression", + "word-concatenation", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 16381206569317834029, - 10666754365487817153, + 3458523808570659318, + 9975991896240937817, 18446744073709551615, 18446744073709551615, - 663, - 669, - 663, - 669, - 117, - 118, + 141, + 157, + 141, + 157, + 22, + 23, true, - "allows", - "allows" + "object-detection", + "object-detection" ], [ "verb", "single-verb", - 17999848460847860039, + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 329104161667983915, - 773700989878712775, + 8106476016678293182, + 8897474810961070939, 18446744073709551615, 18446744073709551615, - 679, - 684, - 679, - 684, - 120, - 121, + 167, + 174, + 167, + 174, + 24, + 25, true, - "parse", - "parse" + "predict", + "predict" ], [ - "verb", - "single-verb", - 17999848460847860039, + "term", + "single-term", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 14650452911780017077, - 11510513167121376409, + 12178341415895638602, + 6222934568051327791, 18446744073709551615, 18446744073709551615, - 689, - 697, - 689, - 697, - 122, - 123, + 177, + 180, + 177, + 180, + 26, + 27, true, - "annotate", - "annotate" + "set", + "set" ], [ - "verb", - "single-verb", - 17999848460847860039, - "TEXT", - "#/texts/8", + "conn", + "single-conn", + 14663676516964431047, + "TEXT", + "#/texts/52", 1.0, - 8106398484416229602, - 5707746526356454429, + 15441160910541485670, + 15053982237527373603, 18446744073709551615, 18446744073709551615, - 801, - 808, - 801, - 808, - 138, - 139, + 181, + 183, + 181, + 183, + 27, + 28, true, - "convert", - "convert" + "of", + "of" ], [ "verb", "single-verb", - 17999848460847860039, + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 3534225588934870450, - 17328851096576172964, + 14652253380850532610, + 15688350870772298580, 18446744073709551615, 18446744073709551615, - 895, - 904, - 895, - 904, - 153, - 155, + 184, + 192, + 184, + 192, + 28, + 29, true, - "will show", - "will show" + "bounding", + "bounding" ], [ - "verb", - "single-verb", - 17999848460847860039, + "term", + "single-term", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 15441160910541486535, - 11606670739883745478, + 329104159325617355, + 15838640579331060931, 18446744073709551615, 18446744073709551615, - 930, - 932, - 930, - 932, - 160, - 161, + 193, + 198, + 193, + 198, + 29, + 30, true, - "is", - "is" + "boxes", + "boxes" ], [ - "verb", - "single-verb", - 17999848460847860039, + "conn", + "single-conn", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 16381206485955868973, - 16260582896355405879, + 16381206557726458966, + 4275353707798328089, 18446744073709551615, 18446744073709551615, - 1009, - 1015, - 1009, - 1015, - 171, - 172, + 199, + 205, + 199, + 205, + 30, + 32, true, - "handle", - "handle" + "with a", + "with a" ], [ - "verb", - "single-verb", - 17999848460847860039, + "term", + "single-term", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 3534225588934870450, - 17328851096575956236, + 4874473477449861741, + 3504061852580538950, 18446744073709551615, 18446744073709551615, - 1062, - 1071, - 1062, - 1071, - 180, - 182, + 206, + 222, + 206, + 222, + 32, + 34, true, - "will show", - "will show" + "confidence level", + "confidence level" ], [ - "verb", - "single-verb", - 17999848460847860039, + "conn", + "single-conn", + 14663676516964431047, "TEXT", - "#/texts/8", + "#/texts/52", 1.0, - 16381206562264646932, - 18168705856416964271, + 8106397860038858133, + 2367955007216749470, 18446744073709551615, 18446744073709551615, - 1095, - 1101, - 1095, - 1101, - 186, - 187, + 223, + 230, + 223, + 230, + 34, + 35, true, - "gather", - "gather" + "between", + "between" ], [ "verb", @@ -3522,46 +3635,25 @@ "allows" ], [ - "verb", - "single-verb", - 17999848460847860039, - "TEXT", - "#/texts/8", - 1.0, - 8106398484416916345, - 5707744688882101082, - 18446744073709551615, - 18446744073709551615, - 1358, - 1365, - 1358, - 1365, - 229, - 230, - true, - "content", - "content" - ], - [ - "verb", - "single-verb", + "sentence", + "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 8106478708506631920, - 17126853238947237410, + 8311273775079009361, + 18234444390399509646, 18446744073709551615, 18446744073709551615, - 1473, - 1480, - 1473, - 1480, - 246, - 247, + 0, + 122, + 0, + 122, + 0, + 20, true, - "serving", - "serving" + "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size.", + "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size." ], [ "conn", @@ -3585,46 +3677,46 @@ "Over the" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541485670, - 11606670832821546960, + 16807436920751143074, + 14986987871760575963, 18446744073709551615, 18446744073709551615, - 38, - 40, - 38, - 40, - 8, 9, + 25, + 9, + 25, + 2, + 5, true, - "of", - "of" + "past few decades", + "past few decades" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541486538, - 11606670739901094601, + 16381206569333693762, + 10666930667336151813, 18446744073709551615, 18446744073709551615, - 114, - 116, - 114, - 116, - 17, - 18, + 31, + 37, + 31, + 37, + 7, + 8, true, - "in", - "in" + "amount", + "amount" ], [ "conn", @@ -3633,82 +3725,82 @@ "TEXT", "#/texts/8", 1.0, - 12178341415895625940, - 12963192413398852201, + 15441160910541485670, + 11606670832821546960, 18446744073709551615, 18446744073709551615, - 159, - 162, - 159, - 162, - 27, - 28, + 38, + 40, + 38, + 40, + 8, + 9, true, - "for", - "for" + "of", + "of" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541487054, - 11606670851925858322, + 7863808487922385366, + 2936430672705644663, 18446744073709551615, 18446744073709551615, - 203, - 205, - 203, - 205, - 34, - 35, + 41, + 60, + 41, + 60, + 9, + 11, true, - "at", - "at" + "scientific articles", + "scientific articles" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 14814148868025447689, - 10464458716096298180, + 7143078508811650826, + 1305762834470469664, 18446744073709551615, 18446744073709551615, - 290, - 298, - 290, - 298, - 48, - 50, + 65, + 85, + 65, + 85, + 12, + 14, true, - "of these", - "of these" + "technical literature", + "technical literature" ], [ - "conn", - "single-conn", + "verb", + "compound-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 16381206564601699726, - 16611998392190665699, + 11956062550033090038, + 9437126490011979695, 18446744073709551615, 18446744073709551615, - 310, - 318, - 310, - 318, - 52, - 54, + 86, + 113, + 86, + 113, + 14, + 17, true, - "eg the", - "e.g. the" + "has increased exponentially", + "has increased exponentially" ], [ "conn", @@ -3717,103 +3809,103 @@ "TEXT", "#/texts/8", 1.0, - 16381206568455155979, - 10578923885508625435, + 15441160910541486538, + 11606670739901094601, 18446744073709551615, 18446744073709551615, - 356, - 362, - 356, - 362, - 62, - 64, + 114, + 116, + 114, + 116, + 17, + 18, true, - "as the", - "as the" + "in", + "in" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 16381206565712212855, - 18288882301375407275, + 389609625741058932, + 1609635956783744714, 18446744073709551615, 18446744073709551615, - 376, - 382, - 376, - 382, - 65, - 67, + 117, + 121, + 117, + 121, + 18, + 19, true, - "of the", - "of the" + "size", + "size" ], [ - "conn", - "single-conn", + "sentence", + "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541485670, - 11606670832821473010, + 8652887973149281574, + 1544181945594032747, 18446744073709551615, 18446744073709551615, - 430, - 432, - 430, - 432, - 76, - 77, + 123, + 258, + 123, + 258, + 20, + 43, true, - "of", - "of" + "Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable.", + "Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable." ], [ - "conn", - "single-conn", + "verb", + "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 8106396862006371970, - 13002336324491202712, + 15441160910541486535, + 11606670739881444005, 18446744073709551615, 18446744073709551615, - 488, - 495, - 488, - 495, - 84, - 86, + 143, + 145, + 143, + 145, + 23, + 24, true, - "In this", - "In this" + "is", + "is" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541487054, - 11606670851925882070, + 2831583870146744553, + 1311385802074388264, 18446744073709551615, 18446744073709551615, - 566, - 568, - 566, - 568, - 98, - 99, + 148, + 158, + 148, + 158, + 25, + 27, true, - "at", - "at" + "great need", + "great need" ], [ "conn", @@ -3822,82 +3914,82 @@ "TEXT", "#/texts/8", 1.0, - 15441160910541485670, - 11606670832821399597, + 12178341415895625940, + 12963192413398852201, 18446744073709551615, 18446744073709551615, - 818, - 820, - 818, - 820, - 141, - 142, + 159, + 162, + 159, + 162, + 27, + 28, true, - "of", - "of" + "for", + "for" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 3504047303033029818, - 12858913108667382047, + 8106478573663085763, + 2644249028750571186, 18446744073709551615, 18446744073709551615, - 905, - 914, - 905, - 914, - 155, - 157, + 163, + 170, + 163, + 170, + 28, + 29, true, - "that each", - "that each" + "systems", + "systems" ], [ - "conn", - "single-conn", + "verb", + "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 16381206565712212855, - 18288882301375701872, + 2873440693780286732, + 16242747501520400497, 18446744073709551615, 18446744073709551615, - 915, - 921, - 915, - 921, - 157, - 159, + 176, + 186, + 176, + 186, + 30, + 32, true, - "of the", - "of the" + "can ingest", + "can ingest" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541485670, - 11606670832821377067, + 6167933651658664291, + 11942237281037682166, 18446744073709551615, 18446744073709551615, - 1032, - 1034, - 1032, - 1034, - 174, - 175, + 193, + 202, + 193, + 202, + 33, + 34, true, - "of", - "of" + "documents", + "documents" ], [ "conn", @@ -3906,145 +3998,145 @@ "TEXT", "#/texts/8", 1.0, - 389609625631229034, - 1612226062922593249, + 15441160910541487054, + 11606670851925858322, 18446744073709551615, 18446744073709551615, - 1072, - 1076, - 1072, - 1076, - 182, - 183, + 203, + 205, + 203, + 205, + 34, + 35, true, - "that", - "that" + "at", + "at" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541486989, - 11606670853486674912, + 329104161785194305, + 772802872201272523, 18446744073709551615, 18446744073709551615, - 1130, - 1132, - 1130, - 1132, - 190, - 191, + 206, + 211, + 206, + 211, + 35, + 36, true, - "by", - "by" + "scale", + "scale" ], [ - "conn", - "single-conn", + "verb", + "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541486989, - 11606670853486803944, + 389609625618412480, + 1610868918855298631, 18446744073709551615, 18446744073709551615, - 1161, - 1163, - 1161, - 1163, - 193, - 194, + 216, + 220, + 216, + 220, + 37, + 38, true, - "by", - "by" + "make", + "make" ], [ - "conn", - "single-conn", + "verb", + "single-verb", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541487054, - 11606670851925780672, + 5947879769709188533, + 15628690943209790850, 18446744073709551615, 18446744073709551615, - 1164, - 1166, - 1164, - 1166, - 194, - 195, + 225, + 234, + 225, + 234, + 39, + 40, true, - "at", - "at" + "contained", + "contained" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541485670, - 11606670832821349359, + 6184122545182835014, + 10915241214874887145, 18446744073709551615, 18446744073709551615, - 1183, - 1185, - 1183, - 1185, - 198, - 199, + 235, + 244, + 235, + 244, + 40, + 41, true, - "of", - "of" + "knowledge", + "knowledge" ], [ - "conn", - "single-conn", + "sentence", + "", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541485670, - 11606670832821388621, + 5682935857557389413, + 3518340224243798686, 18446744073709551615, 18446744073709551615, - 1241, - 1243, - 1241, - 1243, - 209, - 210, + 259, + 487, + 259, + 487, + 43, + 84, true, - "of", - "of" + "Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging.", + "Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging." ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541486538, - 11606670739900210613, + 16381206548538896813, + 17191059727726770924, 18446744073709551615, 18446744073709551615, - 1257, - 1259, - 1257, - 1259, - 211, - 212, + 283, + 289, + 283, + 289, + 47, + 48, true, - "in", - "in" + "format", + "format" ], [ "conn", @@ -4053,61 +4145,61 @@ "TEXT", "#/texts/8", 1.0, - 16381206560518651853, - 18414993880775571288, + 14814148868025447689, + 10464458716096298180, 18446744073709551615, 18446744073709551615, - 1323, - 1329, - 1323, - 1329, - 221, - 223, + 290, + 298, + 290, + 298, + 48, + 50, true, - "in the", - "in the" + "of these", + "of these" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541485670, - 11606670832821292551, + 6167933651658664291, + 11942237281037615868, 18446744073709551615, 18446744073709551615, - 1336, - 1338, - 1336, - 1338, - 224, - 225, + 299, + 308, + 299, + 308, + 50, + 51, true, - "of", - "of" + "documents", + "documents" ], [ - "conn", - "single-conn", + "parenthesis", + "round brackets", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 389609625618037948, - 1610651885976451134, + 8624098978506921550, + 8067551676911300261, 18446744073709551615, 18446744073709551615, - 1343, - 1347, - 1343, - 1347, - 226, - 227, + 309, + 347, + 309, + 347, + 51, + 60, true, - "with", - "with" + "(e.g. the PDF format or bitmap images)", + "(e.g. the PDF format or bitmap images)" ], [ "conn", @@ -4116,103 +4208,103 @@ "TEXT", "#/texts/8", 1.0, - 15441160910541485678, - 11606670855875426468, + 16381206564601699726, + 16611998392190665699, 18446744073709551615, 18446744073709551615, - 1438, - 1440, - 1438, - 1440, - 241, - 242, + 310, + 318, + 310, + 318, + 52, + 54, true, - "on", - "on" + "eg the", + "e.g. the" ], [ - "conn", - "single-conn", + "expression", + "common", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 389609625631229040, - 1612226037052379844, + 15441160910541487324, + 11606670863251774055, 18446744073709551615, 18446744073709551615, - 1486, - 1490, - 1486, - 1490, - 248, - 249, + 310, + 314, + 310, + 314, + 52, + 53, true, - "than", - "than" + "eg", + "e.g." ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 12178341415895625940, - 12963192413398671002, + 1602384110795404989, + 1921537330407092158, 18446744073709551615, 18446744073709551615, - 1508, - 1511, - 1508, - 1511, - 252, - 253, + 319, + 329, + 319, + 329, + 54, + 56, true, - "for", - "for" + "PDF format", + "PDF format" ], [ - "conn", - "single-conn", + "term", + "enum-term-mark-2", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541485865, - 11606670830397324540, + 14506873166110432521, + 11857803489572599054, 18446744073709551615, 18446744073709551615, - 546, - 548, - 546, - 548, - 95, - 96, + 323, + 339, + 323, + 339, + 55, + 58, true, - "to", - "to" + "format or bitmap", + "format or bitmap" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541485865, - 11606670830397301532, + 7850715239909526655, + 8028877058422980465, 18446744073709551615, 18446744073709551615, - 676, - 678, - 676, - 678, - 119, - 120, + 333, + 346, + 333, + 346, + 57, + 59, true, - "to", - "to" + "bitmap images", + "bitmap images" ], [ "conn", @@ -4221,40 +4313,40 @@ "TEXT", "#/texts/8", 1.0, - 389609625631408052, - 1612210503630929212, + 16381206568455155979, + 10578923885508625435, 18446744073709551615, 18446744073709551615, - 845, - 849, - 845, - 849, - 145, - 147, + 356, + 362, + 356, + 362, + 62, + 64, true, - "to a", - "to a" + "as the", + "as the" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 329104159243175056, - 993032465640498236, + 15493249494625550468, + 17136530455551824273, 18446744073709551615, 18446744073709551615, - 946, - 951, - 946, - 951, - 163, - 165, + 363, + 375, + 363, + 375, + 64, + 65, true, - "to an", - "to an" + "presentation", + "presentation" ], [ "conn", @@ -4263,586 +4355,607 @@ "TEXT", "#/texts/8", 1.0, - 15441160910541485865, - 11606670830397529924, + 16381206565712212855, + 18288882301375407275, 18446744073709551615, 18446744073709551615, - 1092, - 1094, - 1092, - 1094, - 185, - 186, + 376, + 382, + 376, + 382, + 65, + 67, true, - "to", - "to" + "of the", + "of the" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 8106351192274276906, - 17899388016831785682, + 389609625696431489, + 1272382058296184235, 18446744073709551615, 18446744073709551615, - 1212, - 1219, - 1212, - 1219, - 204, - 206, + 383, + 387, + 383, + 387, + 67, + 68, true, - "to both", - "to both" + "data", + "data" ], [ - "conn", - "single-conn", + "parenthesis", + "round brackets", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541485865, - 11606670830397545800, + 4552190965366435023, + 5994729969442454976, 18446744073709551615, 18446744073709551615, - 1355, - 1357, - 1355, - 1357, - 228, - 229, + 388, + 409, + 388, + 409, + 68, + 73, true, - "to", - "to" + "(e.g. complex tables)", + "(e.g. complex tables)" ], [ - "conn", - "single-conn", + "term", + "single-term", 17999848460847860039, "TEXT", "#/texts/8", 1.0, - 15441160910541485865, - 11606670830397544434, + 1806804053579249155, + 8335167387144157878, 18446744073709551615, 18446744073709551615, - 1377, - 1379, - 1377, - 1379, - 231, - 232, + 389, + 408, + 389, + 408, + 69, + 72, true, - "to", - "to" + "eg complex tables", + "e.g. complex tables" ], [ - "numval", - "year", - 11222145795862225841, + "expression", + "common", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 389609625548777054, - 918164764798402581, + 15441160910541487324, + 11606670863251791461, 18446744073709551615, 18446744073709551615, - 62, - 66, - 62, - 66, - 14, - 15, + 389, + 393, + 389, + 393, + 69, + 70, true, - "2018", - "2018" + "eg", + "e.g." ], [ - "numval", - "year", - 11222145795862225841, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 389609625548777054, - 918164764798382455, + 389609625618412480, + 1610868918855286250, 18446744073709551615, 18446744073709551615, - 263, - 267, - 263, - 267, - 52, - 53, + 410, + 414, + 410, + 414, + 73, + 74, true, - "2018", - "2018" + "make", + "make" ], [ - "numval", - "fval", - 11222145795862225841, + "term", + "single-term", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 11541938200508964503, - 6621613840590615166, + 5303544497514782120, + 263131364412872028, 18446744073709551615, 18446744073709551615, - 351, - 366, - 351, - 366, + 419, + 429, + 419, + 429, 75, 76, true, - "3219819.3219834", - "3219819.3219834" + "extraction", + "extraction" ], [ - "numval", - "irng", - 11222145795862225841, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 329104147759644091, - 11978218711906185056, + 15441160910541485670, + 11606670832821473010, 18446744073709551615, 18446744073709551615, - 256, - 261, - 256, - 261, - 50, - 51, + 430, + 432, + 430, + 432, + 76, + 77, true, - "19-23", - "19-23" + "of", + "of" ], [ - "numval", - "ival", - 11222145795862225841, + "term", + "enum-term-mark-1", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 15441160910541481862, - 12820302901235644324, + 9845754748010686003, + 13443808248487347009, 18446744073709551615, 18446744073709551615, - 162, - 164, - 162, - 164, - 34, - 35, + 433, + 464, + 433, + 464, + 77, + 81, true, - "18", - "18" + "qualitative and quantitive data", + "qualitative and quantitive data" ], [ - "numval", - "ival", - 11222145795862225841, + "term", + "single-term", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 17767354399704235153, - 5919416028440889582, + 13450540556572295481, + 4139295332657747437, 18446744073709551615, 18446744073709551615, - 317, - 318, - 317, - 318, - 68, - 69, + 449, + 464, + 449, + 464, + 79, + 81, true, - "9", - "9" + "quantitive data", + "quantitive data" ], [ - "numval", - "ival", - 11222145795862225841, + "sentence", + "", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 389609625536247226, - 914428205219130181, + 18403546089192870947, + 3375274648488008071, 18446744073709551615, 18446744073709551615, - 346, - 350, - 346, - 350, - 73, - 74, + 488, + 575, + 488, + 575, + 84, + 101, true, - "1145", - "1145" + "In this paper, we present a modular, cloud-based platform to ingest documents at scale.", + "In this paper, we present a modular, cloud-based platform to ingest documents at scale." ], [ - "link", - "url", - 11222145795862225841, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 3534146179424153776, - 16664784081959773586, + 8106396862006371970, + 13002336324491202712, 18446744073709551615, 18446744073709551615, - 326, - 344, - 326, - 344, - 71, - 72, + 488, + 495, + 488, + 495, + 84, + 86, true, - "https://doi.org/10", - "https://doi.org/10" + "In this", + "In this" ], [ - "expression", - "wtoken-concatenation", - 11222145795862225841, + "term", + "single-term", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 389609625548781308, - 918163733627828877, + 329104161668023890, + 773695676617294129, 18446744073709551615, 18446744073709551615, - 170, - 174, - 170, - 174, - 37, - 38, + 496, + 501, + 496, + 501, + 86, + 87, true, - "24th", - "24th" + "paper", + "paper" ], [ - "expression", - "wtoken-concatenation", - 11222145795862225841, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 3534146179424153776, - 16664784081959773586, + 8106476016677076976, + 2082360003734177772, 18446744073709551615, 18446744073709551615, - 326, - 344, - 326, - 344, - 71, - 72, + 506, + 513, + 506, + 513, + 89, + 90, true, - "https://doi.org/10", - "https://doi.org/10" + "present", + "present" ], [ - "sentence", - "", - 11222145795862225841, + "term", + "single-term", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 6693772321601013182, - 4815660570213750530, + 12206009578906402256, + 12092500979427102718, 18446744073709551615, 18446744073709551615, - 0, - 61, - 0, - 61, - 0, - 14, + 525, + 545, + 525, + 545, + 93, + 95, true, - "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas.", - "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas." + "cloud-based platform", + "cloud-based platform" ], [ - "sentence", - "", - 11222145795862225841, + "expression", + "word-concatenation", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 11303007895399162817, - 1663341249273745902, + 15169931585135175826, + 17270979630715224833, 18446744073709551615, 18446744073709551615, - 68, - 152, - 68, - 152, - 16, - 30, + 525, + 536, + 525, + 536, + 93, + 94, true, - "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", - "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale." + "cloud-based", + "cloud-based" ], [ - "sentence", - "", - 11222145795862225841, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 7000258542330205625, - 12590258289379456668, + 15441160910541485865, + 11606670830397324540, 18446744073709551615, 18446744073709551615, - 154, - 292, - 154, - 292, - 31, - 59, + 546, + 548, + 546, + 548, + 95, + 96, true, - "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom.", - "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom." + "to", + "to" ], [ - "sentence", - "", - 11222145795862225841, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 17980062243523090453, - 3043178868879598133, + 16381206560503286032, + 18414709282119286416, 18446744073709551615, 18446744073709551615, - 293, - 325, - 293, - 325, - 59, - 71, + 549, + 555, + 549, + 555, + 96, + 97, true, - "ACM, New York, NY, USA, 9 pages.", - "ACM, New York, NY, USA, 9 pages." + "ingest", + "ingest" ], [ - "sentence", - "", - 11222145795862225841, + "term", + "single-term", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 3268516227836428987, - 8296109392654892130, + 6167933651658664291, + 11942237281037632251, 18446744073709551615, 18446744073709551615, - 326, - 345, - 326, - 345, - 71, - 73, + 556, + 565, + 556, + 565, + 97, + 98, true, - "https://doi.org/10.", - "https://doi.org/10." + "documents", + "documents" ], [ - "term", - "enum-term-mark-4", - 11222145795862225841, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 795735363451947563, - 16676628183188309306, + 15441160910541487054, + 11606670851925882070, 18446744073709551615, 18446744073709551615, - 214, - 247, - 214, - 247, - 43, - 48, + 566, + 568, + 566, + 568, + 98, + 99, true, - "Knowledge Discovery & Data Mining", - "Knowledge Discovery & Data Mining" + "at", + "at" ], [ "term", "single-term", - 11222145795862225841, + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 4686361850733567621, - 14659076240775980364, + 329104161785194305, + 772802872201252868, 18446744073709551615, 18446744073709551615, - 0, - 15, - 0, - 15, - 0, - 4, + 569, + 574, + 569, + 574, + 99, + 100, true, - "Peter W J Staar", - "Peter W J Staar" + "scale", + "scale" ], [ - "term", - "single-term", - 11222145795862225841, + "sentence", + "", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 1571808557594152175, - 2521268111811279239, + 15870780009666831983, + 2120332988466055117, 18446744073709551615, 18446744073709551615, - 17, - 30, - 17, - 30, - 5, - 7, + 576, + 891, + 576, + 891, + 101, + 152, true, - "Michele Dolfi", - "Michele Dolfi" + "This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format.", + "This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format." ], [ "term", "single-term", - 11222145795862225841, + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 9737597816447750448, - 18360796446007226291, + 14814125365076808131, + 9647025272576644413, 18446744073709551615, 18446744073709551615, - 32, - 46, - 32, - 46, - 8, - 10, + 581, + 589, + 581, + 589, + 102, + 103, true, - "Christoph Auer", - "Christoph Auer" + "platform", + "platform" ], [ - "term", - "single-term", - 11222145795862225841, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 10999349626623612055, - 7141385911209629847, + 16381206563350835754, + 16668546032725707234, 18446744073709551615, 18446744073709551615, - 48, - 60, - 48, - 60, - 11, - 13, + 591, + 597, + 591, + 597, + 104, + 105, true, - "Costas Bekas", - "Costas Bekas" + "called", + "called" ], [ "term", "single-term", - 11222145795862225841, + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, 12638008641667971393, - 8431069953230460203, + 6722150771778728224, 18446744073709551615, 18446744073709551615, - 68, - 93, - 68, - 93, - 16, - 19, + 602, + 627, + 602, + 627, + 106, + 109, true, "Corpus Conversion Service", "Corpus Conversion Service" ], [ - "term", - "single-term", - 11222145795862225841, + "parenthesis", + "round brackets", + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 3953336115302703444, - 2106931920663782483, + 329104053210116957, + 3393895258272698836, 18446744073709551615, 18446744073709551615, - 97, - 122, - 97, - 122, - 21, - 24, + 628, + 633, + 628, + 633, + 109, + 112, true, - "Machine Learning Platform", - "Machine Learning Platform" + "(CCS)", + "(CCS)" ], [ "term", "single-term", - 11222145795862225841, + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 2543543638813814383, - 10045085706299781635, + 12178341415896221596, + 12963251184768892790, 18446744073709551615, 18446744073709551615, - 126, - 142, - 126, - 142, - 25, - 27, + 629, + 632, + 629, + 632, + 110, + 111, true, - "Ingest Documents", - "Ingest Documents" + "CCS", + "CCS" + ], + [ + "verb", + "single-verb", + 17999848460847860039, + "TEXT", + "#/texts/8", + 1.0, + 5584174880054122043, + 1259340301497714443, + 18446744073709551615, + 18446744073709551615, + 635, + 645, + 635, + 645, + 113, + 114, + true, + "implements", + "implements" ], [ "term", "single-term", - 11222145795862225841, + 17999848460847860039, "TEXT", - "#/texts/10", + "#/texts/8", 1.0, - 3830746689439412878, - 10628297214120553798, + 14814125852840540191, + 2945478222614419396, 18446744073709551615, 18446744073709551615, - 170, - 210, - 170, - 210, - 37, - 42, + 648, + 656, + 648, + 656, + 115, + 116, true, - "24th ACM SIGKDD International Conference", - "24th ACM SIGKDD International Conference" + "pipeline", + "pipeline" ], [ "term", @@ -4866,361 +4979,361 @@ "Knowledge Discovery" ], [ - "term", - "single-term", - 11222145795862225841, + "sentence", + "", + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 9639847902089872401, - 15642530745605263941, + 13412490586202463721, + 17653988074073433733, 18446744073709551615, 18446744073709551615, - 236, - 247, - 236, - 247, - 46, - 48, + 0, + 95, + 0, + 95, + 0, + 17, true, - "Data Mining", - "Data Mining" + "Table 2: Performance results for the template specific model of the Physical Review B journals.", + "Table 2: Performance results for the template specific model of the Physical Review B journals." ], [ - "term", - "single-term", - 11222145795862225841, + "numval", + "ival", + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 17782056979161528852, - 4690987004959947827, + 17767354399704235162, + 15759397524433803932, 18446744073709551615, 18446744073709551615, - 277, - 291, - 277, - 291, - 56, - 58, + 6, + 7, + 6, + 7, + 1, + 2, true, - "United Kingdom", - "United Kingdom" + "2", + "2" ], [ "term", "single-term", - 11222145795862225841, + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 14650948201816210252, - 2694576768786644093, + 8087581502811400566, + 7573439973442034769, 18446744073709551615, 18446744073709551615, - 298, - 306, - 298, - 306, - 61, - 63, + 9, + 28, + 9, + 28, + 3, + 5, true, - "New York", - "New York" + "Performance results", + "Performance results" ], [ - "term", - "single-term", - 11222145795862225841, + "conn", + "single-conn", + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 329104162321612062, - 11274361467332635215, + 8106397727991264470, + 13939727220022896426, 18446744073709551615, 18446744073709551615, - 146, - 151, - 146, - 151, - 28, 29, + 36, + 29, + 36, + 5, + 7, true, - "Scale", - "Scale" + "for the", + "for the" ], [ "term", "single-term", - 11222145795862225841, + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 12178341415896253943, - 1738717073979978820, + 13356790934987174038, + 18420992769499992239, 18446744073709551615, 18446744073709551615, - 157, - 160, - 157, - 160, - 32, - 33, + 37, + 60, + 37, + 60, + 7, + 10, true, - "KDD", - "KDD" + "template specific model", + "template specific model" ], [ - "term", - "single-term", - 11222145795862225841, + "conn", + "single-conn", + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 16381206562442326159, - 10586055992353118926, + 16381206565712212855, + 15527423972997370423, 18446744073709551615, 18446744073709551615, - 249, - 255, - 249, - 255, - 49, - 50, + 61, + 67, + 61, + 67, + 10, + 12, true, - "August", - "August" + "of the", + "of the" ], [ "term", "single-term", - 11222145795862225841, + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 16381206531301571445, - 12510416255984707889, + 9872729223299515659, + 7908640068811257205, 18446744073709551615, 18446744073709551615, - 269, - 275, - 269, - 275, - 54, - 55, + 68, + 94, + 68, + 94, + 12, + 16, true, - "London", - "London" + "Physical Review B journals", + "Physical Review B journals" ], [ - "term", - "single-term", - 11222145795862225841, + "sentence", + "", + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 12178341415896228980, - 1738757751107532979, + 2713668199866952841, + 4447940936101437620, 18446744073709551615, 18446744073709551615, - 293, - 296, - 293, - 296, - 59, - 60, + 96, + 202, + 96, + 202, + 17, + 34, true, - "ACM", - "ACM" + "The confusion matrix highlights the huge imbalance between the number of text cells with different labels.", + "The confusion matrix highlights the huge imbalance between the number of text cells with different labels." ], [ "term", "single-term", - 11222145795862225841, + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 15441160910541487804, - 12820302595509217913, + 5497358094214601811, + 7433163521566214246, 18446744073709551615, 18446744073709551615, - 308, - 310, - 308, - 310, - 64, - 65, + 100, + 116, + 100, + 116, + 18, + 20, true, - "NY", - "NY" + "confusion matrix", + "confusion matrix" ], [ - "term", - "single-term", - 11222145795862225841, + "verb", + "single-verb", + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 12178341415895650394, - 1738736899274670576, + 15927123199600624159, + 11830974991863511971, 18446744073709551615, 18446744073709551615, - 312, - 315, - 312, - 315, - 66, - 67, + 117, + 127, + 117, + 127, + 20, + 21, true, - "USA", - "USA" + "highlights", + "highlights" ], [ "term", "single-term", - 11222145795862225841, + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 329104161667992688, - 12637076450269003134, + 1488936167715046380, + 16637143750883657942, 18446744073709551615, 18446744073709551615, - 319, - 324, - 319, - 324, - 69, - 70, + 132, + 146, + 132, + 146, + 22, + 24, true, - "pages", - "pages" + "huge imbalance", + "huge imbalance" ], [ "conn", "single-conn", - 11222145795862225841, + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 15441160910541487054, - 12820303060826396831, + 2011002864325523456, + 16665978214615422828, 18446744073709551615, 18446744073709551615, - 143, - 145, - 143, - 145, - 27, - 28, + 147, + 158, + 147, + 158, + 24, + 26, true, - "at", - "at" + "between the", + "between the" ], [ - "conn", - "single-conn", - 11222145795862225841, + "term", + "single-term", + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 15441160910541480354, - 12820298442232007515, + 16381206574973295053, + 15664074499384566316, 18446744073709551615, 18446744073709551615, - 154, - 156, - 154, - 156, - 31, - 32, + 159, + 165, + 159, + 165, + 26, + 27, true, - "In", - "In" + "number", + "number" ], [ "conn", "single-conn", - 11222145795862225841, + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 15441160910541485678, - 12820303021843804862, + 15441160910541485670, + 10632466984953712528, 18446744073709551615, 18446744073709551615, - 211, - 213, - 211, - 213, - 42, - 43, + 166, + 168, + 166, + 168, + 27, + 28, true, - "on", - "on" + "of", + "of" ], [ - "conn", - "single-conn", - 11222145795862225841, + "term", + "single-term", + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 15441160910541485865, - 12820302971854609335, + 5748925367544727060, + 15357132638157717228, 18446744073709551615, 18446744073709551615, - 123, - 125, - 123, - 125, - 24, - 25, + 169, + 179, + 169, + 179, + 28, + 30, true, - "to", - "to" + "text cells", + "text cells" ], [ - "geoloc", - "country", - 11222145795862225841, + "conn", + "single-conn", + 4577067829072175096, "TEXT", - "#/texts/10", + "#/texts/53", 1.0, - 17782056979161528852, - 4690987004959947827, + 389609625618037948, + 18050712937266565062, 18446744073709551615, 18446744073709551615, - 277, - 291, - 277, - 291, - 56, - 58, + 180, + 184, + 180, + 184, + 30, + 31, true, - "United Kingdom", - "United Kingdom" + "with", + "with" ], [ - "numval", - "fval", - 16923207262044929933, + "term", + "single-term", + 4577067829072175096, "TEXT", - "#/texts/11", + "#/texts/53", 1.0, - 12178341415896439107, - 14800962307501710678, + 220880076010336098, + 14991640362132342656, 18446744073709551615, 18446744073709551615, - 39, - 42, - 39, - 42, - 7, - 8, + 185, + 201, + 185, + 201, + 31, + 33, true, - "2.5", - "2.5" + "different labels", + "different labels" ], [ "parenthesis", @@ -5244,172 +5357,172 @@ "(e.g. find me a phase-diagram of material XYZ)" ], [ - "parenthesis", - "round brackets", - 16923207262044929933, + "sentence", + "", + 4577067829072175096, "TEXT", - "#/texts/11", + "#/texts/53", 1.0, - 4516846515356980393, - 4935623304895828855, + 12325075441819606052, + 4798224535047183092, 18446744073709551615, 18446744073709551615, - 1196, - 1246, - 1196, - 1246, - 199, - 210, + 203, + 310, + 203, + 310, + 34, + 53, true, - "(with the PDF format being the most prevalent one)", - "(with the PDF format being the most prevalent one)" + "The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types.", + "The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types." ], [ - "parenthesis", - "round brackets", - 16923207262044929933, + "term", + "single-term", + 4577067829072175096, "TEXT", - "#/texts/11", + "#/texts/53", 1.0, - 5879944210728656410, - 9673170177479615330, + 329104159157898666, + 7979932887321468479, 18446744073709551615, 18446744073709551615, - 1432, - 1473, - 1432, - 1473, - 246, - 257, + 207, + 212, + 207, + 212, + 35, + 36, true, - "(documents, images, authors, tables, etc)", - "(documents, images, authors, tables, etc)" + "usage", + "usage" ], [ - "expression", - "common", - 16923207262044929933, + "conn", + "single-conn", + 4577067829072175096, "TEXT", - "#/texts/11", + "#/texts/53", 1.0, - 15441160910541487324, - 9094674364011169527, + 15441160910541485670, + 10632466984953723750, 18446744073709551615, 18446744073709551615, - 1049, - 1053, - 1049, - 1053, - 171, - 172, + 213, + 215, + 213, + 215, + 36, + 37, true, - "eg", - "e.g." + "of", + "of" ], [ - "expression", - "word-concatenation", - 16923207262044929933, + "term", + "single-term", + 4577067829072175096, "TEXT", - "#/texts/11", + "#/texts/53", 1.0, - 12555128312158075374, - 3585475568588858575, + 4360412890788129778, + 6086964040649348468, 18446744073709551615, 18446744073709551615, - 1064, - 1077, - 1064, - 1077, - 175, - 176, + 216, + 232, + 216, + 232, + 37, + 39, true, - "phase-diagram", - "phase-diagram" + "ensemble machine", + "ensemble machine" ], [ - "expression", - "wtoken-concatenation", - 16923207262044929933, + "verb", + "single-verb", + 4577067829072175096, "TEXT", - "#/texts/11", + "#/texts/53", 1.0, - 9623123605532099037, - 12825981064550354106, + 14639581097006750428, + 17977442740486581742, 18446744073709551615, 18446744073709551615, - 79, - 96, - 79, - 96, - 13, - 14, + 233, + 241, + 233, + 241, + 39, + 40, true, - "circulation^{1}", - "circulation$^{1}$" + "learning", + "learning" ], [ - "expression", - "wtoken-concatenation", - 16923207262044929933, + "term", + "single-term", + 4577067829072175096, "TEXT", - "#/texts/11", + "#/texts/53", 1.0, - 9653568957037915764, - 1159839439008018639, + 8106464574531629743, + 13092511743146000891, 18446744073709551615, 18446744073709551615, - 863, - 882, - 863, - 882, - 138, - 139, + 242, + 249, + 242, + 249, + 40, + 41, true, - "exponentially^{2}", - "exponentially$^{2}$" + "methods", + "methods" ], [ - "sentence", - "", - 16923207262044929933, + "verb", + "compound-verb", + 4577067829072175096, "TEXT", - "#/texts/11", + "#/texts/53", 1.0, - 17192639608086865650, - 10639035648049775025, + 12736124800502880399, + 3048726189598552717, 18446744073709551615, 18446744073709551615, - 0, - 97, - 0, - 97, - 0, - 15, + 250, + 267, + 250, + 267, + 41, + 44, true, - "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$.", - "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$." + "allows to achieve", + "allows to achieve" ], [ - "sentence", - "", - 16923207262044929933, + "conn", + "single-conn", + 4577067829072175096, "TEXT", - "#/texts/11", + "#/texts/53", 1.0, - 9088786707146406857, - 17567093053494849836, + 15441160910541485865, + 10632466981388317765, 18446744073709551615, 18446744073709551615, - 98, - 252, - 98, - 252, - 15, - 41, + 257, + 259, + 257, + 259, + 42, + 43, true, - "These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery.", - "These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery." + "to", + "to" ], [ "sentence", @@ -5439,481 +5552,481 @@ "TEXT", "#/texts/11", 1.0, - 17647932338360720997, - 5716030233811874384, + 17192639608086865650, + 10639035648049775025, 18446744073709551615, 18446744073709551615, - 360, - 509, - 360, - 509, - 59, - 84, + 0, + 97, + 0, + 97, + 0, + 15, true, - "However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout.", - "However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout." + "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$.", + "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$." ], [ - "sentence", - "", + "verb", + "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15487015001052727581, - 14484812293778889252, + 17551793109234931072, + 9841315996119329650, 18446744073709551615, 18446744073709551615, - 510, - 722, - 510, - 722, - 84, - 115, + 3, + 15, + 3, + 15, + 1, + 3, true, - "Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery.", - "Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery." + "is estimated", + "is estimated" ], [ - "sentence", - "", + "conn", + "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 3574328216950930229, - 4905315167294659186, + 389609625631229034, + 542250596720887578, 18446744073709551615, 18446744073709551615, - 723, - 883, - 723, - 883, - 115, - 140, + 16, + 20, + 16, + 20, + 3, + 4, true, - "In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$.", - "In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$." + "that", + "that" ], [ - "sentence", - "", + "verb", + "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 8347632587306657460, - 16097912844310233617, + 696181546770410912, + 10657444457642612809, 18446744073709551615, 18446744073709551615, - 884, - 988, - 884, - 988, - 140, - 160, + 27, + 38, + 27, + 38, + 5, + 7, true, - "This poses a real problem, since more and more information published in the PDF documents is going dark.", - "This poses a real problem, since more and more information published in the PDF documents is going dark." + "are roughly", + "are roughly" ], [ - "sentence", - "", + "numval", + "fval", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 7315676043002615146, - 3020292113144700597, + 12178341415896439107, + 14800962307501710678, 18446744073709551615, 18446744073709551615, - 989, - 1133, - 989, - 1133, - 160, - 187, + 39, + 42, + 39, + 42, + 7, + 8, true, - "In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components.", - "In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components." + "2.5", + "2.5" ], [ - "sentence", - "", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 8292138896065382931, - 17716571591104291388, + 3693395590591757392, + 2559252195012720165, 18446744073709551615, 18446744073709551615, - 1134, - 1345, - 1134, - 1345, - 187, - 228, + 43, + 65, + 43, + 65, + 8, + 11, true, - "First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML.", - "First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML." + "trillion PDF documents", + "trillion PDF documents" ], [ - "sentence", - "", + "conn", + "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 18073096319598857596, - 14789900833203243228, + 15441160910541486538, + 9094674367373732264, 18446744073709551615, 18446744073709551615, - 1346, - 1532, - 1346, - 1532, - 228, - 267, + 76, + 78, + 76, + 78, + 12, + 13, true, - "Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context.", - "Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context." + "in", + "in" ], [ - "term", - "enum-term-mark-1", + "expression", + "wtoken-concatenation", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 12322374974058800893, - 6816531868111142674, + 9623123605532099037, + 12825981064550354106, 18446744073709551615, 18446744073709551615, - 280, - 329, - 280, - 329, - 47, - 52, + 79, + 96, + 79, + 96, + 13, + 14, true, - "valuable qualitative and quantitative information", - "valuable qualitative and quantitative information" + "circulation^{1}", + "circulation$^{1}$" ], [ - "term", - "enum-term-mark-4", - 16923207262044929933, + "sentence", + "", + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 11674491770136657522, - 11680961660123138230, + 6693772321601013182, + 4815660570213750530, 18446744073709551615, 18446744073709551615, - 1333, - 1344, - 1333, - 1344, - 224, - 227, + 0, + 61, + 0, + 61, + 0, + 14, true, - "JSON or XML", - "JSON or XML" + "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas.", + "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas." ], [ "term", "single-term", - 16923207262044929933, + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 3693395590591757392, - 2559252195012720165, + 4686361850733567621, + 14659076240775980364, 18446744073709551615, 18446744073709551615, - 43, - 65, - 43, - 65, - 8, - 11, + 0, + 15, + 0, + 15, + 0, + 4, true, - "trillion PDF documents", - "trillion PDF documents" + "Peter W J Staar", + "Peter W J Staar" ], [ "term", "single-term", - 16923207262044929933, + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 8414271082704541626, - 9829432072489958078, + 1571808557594152175, + 2521268111811279239, 18446744073709551615, 18446744073709551615, - 149, - 163, - 149, - 163, - 23, - 25, + 17, + 30, + 17, + 30, + 5, + 7, true, - "annual reports", - "annual reports" + "Michele Dolfi", + "Michele Dolfi" ], [ "term", "single-term", - 16923207262044929933, + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 3282133738476528713, - 6601164231648618886, + 9737597816447750448, + 18360796446007226291, 18446744073709551615, 18446744073709551615, - 193, - 208, - 193, - 208, 32, - 34, + 46, + 32, + 46, + 8, + 10, true, - "research papers", - "research papers" + "Christoph Auer", + "Christoph Auer" ], [ "term", "single-term", - 16923207262044929933, + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 1102904554370006265, - 13125714652652128474, + 10999349626623612055, + 7141385911209629847, 18446744073709551615, 18446744073709551615, - 222, - 251, - 222, - 251, - 37, - 40, + 48, + 60, + 48, + 60, + 11, + 13, true, - "specific scientific discovery", - "specific scientific discovery" + "Costas Bekas", + "Costas Bekas" ], [ - "term", - "single-term", - 16923207262044929933, + "numval", + "year", + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 7668210657519556598, - 8800539397108400539, + 389609625548777054, + 918164764798402581, 18446744073709551615, 18446744073709551615, - 305, - 329, - 305, - 329, - 50, - 52, + 62, + 66, + 62, + 66, + 14, + 15, true, - "quantitative information", - "quantitative information" + "2018", + "2018" ], [ - "term", - "single-term", - 16923207262044929933, + "sentence", + "", + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 13935212089545515210, - 4563100627799985741, + 11303007895399162817, + 1663341249273745902, 18446744073709551615, 18446744073709551615, - 431, - 452, - 431, - 452, - 73, - 75, + 68, + 152, + 68, + 152, + 16, + 30, true, - "printing instructions", - "printing instructions" + "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", + "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale." ], [ "term", "single-term", - 16923207262044929933, + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 16582444977748815769, - 16919788927196448661, + 12638008641667971393, + 8431069953230460203, 18446744073709551615, 18446744073709551615, - 486, - 508, - 486, - 508, - 80, - 83, + 68, + 93, + 68, + 93, + 16, + 19, true, - "pleasing visual layout", - "pleasing visual layout" + "Corpus Conversion Service", + "Corpus Conversion Service" ], [ "term", "single-term", - 16923207262044929933, + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 4929058514881842733, - 10224787839479118537, + 3953336115302703444, + 2106931920663782483, 18446744073709551615, 18446744073709551615, - 519, - 538, - 519, - 538, - 86, - 88, + 97, + 122, + 97, + 122, + 21, + 24, true, - "data representation", - "data representation" + "Machine Learning Platform", + "Machine Learning Platform" ], [ - "term", - "single-term", - 16923207262044929933, + "conn", + "single-conn", + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 14929125759175486455, - 13997854025989108072, + 15441160910541485865, + 12820302971854609335, 18446744073709551615, 18446744073709551615, - 547, - 567, - 547, - 567, - 90, - 92, + 123, + 125, + 123, + 125, + 24, + 25, true, - "enormous variability", - "enormous variability" + "to", + "to" ], [ "term", "single-term", - 16923207262044929933, + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 5746783959074166208, - 15517192707477599154, + 2543543638813814383, + 10045085706299781635, 18446744073709551615, 18446744073709551615, - 635, - 649, - 635, - 649, - 102, - 104, + 126, + 142, + 126, + 142, + 25, + 27, true, - "access content", - "access content" + "Ingest Documents", + "Ingest Documents" ], [ - "term", - "single-term", - 16923207262044929933, + "conn", + "single-conn", + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 2730405582718102128, - 15726970596030809890, + 15441160910541487054, + 12820303060826396831, 18446744073709551615, 18446744073709551615, - 702, - 721, - 702, - 721, - 112, - 114, + 143, + 145, + 143, + 145, + 27, + 28, true, - "knowledge discovery", - "knowledge discovery" + "at", + "at" ], [ "term", "single-term", - 16923207262044929933, + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 16813764953769919795, - 4260210876529689133, + 329104162321612062, + 11274361467332635215, 18446744073709551615, 18446744073709551615, - 742, - 764, - 742, - 764, - 119, - 122, + 146, + 151, + 146, + 151, + 28, + 29, true, - "sheer current quantity", - "sheer current quantity" + "Scale", + "Scale" ], [ - "term", - "single-term", - 16923207262044929933, + "sentence", + "", + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 16688986026552560644, - 17177901629424753408, + 7000258542330205625, + 12590258289379456668, 18446744073709551615, 18446744073709551615, - 783, - 798, - 783, - 798, - 126, - 128, + 154, + 292, + 154, + 292, + 31, + 59, true, - "submission rate", - "submission rate" + "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom.", + "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom." ], [ - "term", - "single-term", - 16923207262044929933, + "conn", + "single-conn", + 11222145795862225841, "TEXT", - "#/texts/11", + "#/texts/10", 1.0, - 12621877848489179259, - 15237617635766653290, + 15441160910541480354, + 12820298442232007515, 18446744073709551615, 18446744073709551615, - 829, - 846, - 829, - 846, - 133, - 135, + 154, + 156, + 154, + 156, + 31, + 32, true, - "scientific domain", - "scientific domain" + "In", + "In" ], [ "term", @@ -5937,1033 +6050,1033 @@ "real problem" ], [ - "term", - "single-term", - 16923207262044929933, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 12653831733608918357, - 6140974263001666382, + 14638857868319795209, + 11777518988570895518, 18446744073709551615, 18446744073709551615, - 960, - 973, - 960, - 973, - 154, - 156, + 0, + 8, + 0, + 8, + 0, + 2, true, - "PDF documents", - "PDF documents" + "with the", + "with the" ], [ - "term", - "single-term", - 16923207262044929933, + "verb", + "single-verb", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 10167329824705672383, - 12701577379507576649, + 6184954633443293966, + 15964917443528191555, 18446744073709551615, 18446744073709551615, - 1081, - 1093, - 1081, - 1093, - 177, - 179, + 9, + 18, + 9, + 18, + 2, + 3, true, - "material XYZ", - "material XYZ" + "predicted", + "predicted" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 1602384110795404989, - 18168403198260411892, + 7379815267840909102, + 11228728212639806867, 18446744073709551615, 18446744073709551615, - 1206, - 1216, - 1206, - 1216, - 202, - 204, + 19, + 31, + 19, + 31, + 3, + 5, true, - "PDF format", - "PDF format" + "bounding box", + "bounding box" ], [ - "term", - "single-term", - 16923207262044929933, + "sentence", + "", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 12595883072252114156, - 7039273758002805758, + 784428348664963687, + 2735229758044296436, 18446744073709551615, 18446744073709551615, - 1232, - 1245, - 1232, - 1245, - 207, - 209, + 33, + 133, + 33, + 133, + 6, + 20, true, - "prevalent one", - "prevalent one" + "The corresponding recall and precision are then computed for this dual-class classification problem.", + "The corresponding recall and precision are then computed for this dual-class classification problem." ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 4066887494406769292, - 14849727204374093143, + 7737036869804521677, + 431221867393766623, 18446744073709551615, 18446744073709551615, - 1278, - 1299, - 1278, - 1299, - 215, - 218, + 37, + 57, + 37, + 57, + 7, + 9, true, - "structured data files", - "structured data files" + "corresponding recall", + "corresponding recall" ], [ "term", - "single-term", - 16923207262044929933, - "TEXT", - "#/texts/11", - 1.0, - 14630472899120924944, - 15550065915551638064, - 18446744073709551615, - 18446744073709551615, - 1307, - 1324, - 1307, - 1324, - 220, - 222, - true, - "structured format", - "structured format" - ], - [ - "term", - "single-term", - 16923207262044929933, + "enum-term-mark-2", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 13018076357583391135, - 18265178771346204830, + 11037453576911667853, + 12443097430245333421, 18446744073709551615, 18446744073709551615, - 1365, - 1377, - 1365, - 1377, - 233, - 235, + 51, + 71, + 51, + 71, + 8, + 11, true, - "query engine", - "query engine" + "recall and precision", + "recall and precision" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 11805624357079379862, - 2927818536118337064, + 6184954595655792282, + 18387321712019319773, 18446744073709551615, 18446744073709551615, - 1406, - 1419, - 1406, - 1419, - 242, - 244, + 62, + 71, + 62, + 71, + 10, + 11, true, - "large variety", - "large variety" + "precision", + "precision" ], [ - "term", - "single-term", - 16923207262044929933, + "verb", + "compound-verb", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 9623123605532099037, - 12825981064550354106, + 11891944663675020942, + 13358251629780069780, 18446744073709551615, 18446744073709551615, - 79, - 96, - 79, - 96, - 13, + 72, + 89, + 72, + 89, + 11, 14, true, - "circulation^{1}", - "circulation$^{1}$" + "are then computed", + "are then computed" ], [ - "term", - "single-term", - 16923207262044929933, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 6167933651658664291, - 3744443950143797819, + 14637917333165224513, + 10908983268505451281, 18446744073709551615, 18446744073709551615, - 104, - 113, - 104, - 113, + 90, + 98, + 90, + 98, + 14, 16, - 17, true, - "documents", - "documents" + "for this", + "for this" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 8106464587474035829, - 6502274748172348363, + 11075783049363921732, + 14381818982688268241, 18446744073709551615, 18446744073709551615, - 125, + 99, 132, - 125, + 99, 132, + 16, 19, - 20, true, - "manuals", - "manuals" + "dual-class classification problem", + "dual-class classification problem" ], [ - "term", - "single-term", - 16923207262044929933, + "expression", + "word-concatenation", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 15361659830789508523, - 8413399544610388116, + 2772095701715059387, + 18429532044600751065, 18446744073709551615, 18446744073709551615, - 137, - 147, - 137, - 147, - 21, - 22, + 99, + 109, + 99, + 109, + 16, + 17, true, - "appliances", - "appliances" + "dual-class", + "dual-class" ], [ - "term", - "single-term", - 16923207262044929933, + "sentence", + "", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 5947879506556567994, - 16771512443857485166, + 3927917834152176938, + 12569591881522562313, 18446744073709551615, 18446744073709551615, - 167, - 176, - 167, - 176, - 26, - 27, + 134, + 273, + 134, + 273, + 20, + 46, true, - "companies", - "companies" + "In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence.", + "In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence." ], [ - "term", - "single-term", - 16923207262044929933, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 12178341415895525628, - 14794601526936094944, + 15441160910541480354, + 10477275240531848205, 18446744073709551615, 18446744073709551615, - 186, - 189, - 186, - 189, - 30, - 31, + 134, + 136, + 134, + 136, + 20, + 21, true, - "way", - "way" + "In", + "In" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 8106398484416916345, - 17530806449434366453, + 329104161571401725, + 15575423851065642052, 18446744073709551615, 18446744073709551615, - 369, - 376, - 369, - 376, - 61, - 62, + 137, + 142, + 137, + 142, + 21, + 22, true, - "content", - "content" + "order", + "order" ], [ - "term", - "single-term", - 16923207262044929933, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 12178341415896289890, - 14799990756781414830, + 15441160910541485865, + 10477275215095288698, 18446744073709551615, 18446744073709551615, - 388, - 391, - 388, - 391, - 64, - 65, + 143, + 145, + 143, + 145, + 22, + 23, true, - "PDF", - "PDF" + "to", + "to" ], [ - "term", - "single-term", - 16923207262044929933, + "verb", + "single-verb", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 16381206597113188775, - 13905938768963750102, + 15441160910541486853, + 10477289391110259759, 18446744073709551615, 18446744073709551615, - 402, - 408, - 402, - 408, - 68, - 69, + 146, + 148, + 146, + 148, + 23, + 24, true, - "nature", - "nature" + "do", + "do" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 8106478700233620678, - 8336023496233777462, + 8581372359543855162, + 10333944193716453687, 18446744073709551615, 18446744073709551615, - 420, - 427, - 420, - 427, - 71, - 72, + 151, + 166, + 151, + 166, + 25, + 27, true, - "streams", - "streams" + "fair comparison", + "fair comparison" ], [ - "term", - "single-term", - 16923207262044929933, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 8106342461491420046, - 4172004388378103877, + 16381206565712212855, + 1966173897978141572, 18446744073709551615, 18446744073709551615, - 571, - 578, - 571, - 578, - 93, - 94, + 167, + 173, + 167, + 173, + 27, + 29, true, - "layouts", - "layouts" + "of the", + "of the" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 6167933651658664291, - 3744443950143438881, + 14814151113413570861, + 12729204908894192489, 18446744073709551615, 18446744073709551615, - 592, - 601, - 592, - 601, - 96, - 97, + 178, + 186, + 178, + 186, + 30, + 31, true, - "documents", - "documents" + "networks", + "networks" ], [ - "term", - "single-term", - 16923207262044929933, + "verb", + "single-verb", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 11387678566946341343, - 4163904415113468966, + 14814150880980441564, + 5851167619774412175, 18446744073709551615, 18446744073709551615, - 674, - 688, - 674, - 688, - 109, - 110, + 191, + 199, + 191, + 199, + 33, + 34, true, - "representation", - "representation" + "optimise", + "optimise" ], [ "term", - "single-term", - 16923207262044929933, + "enum-term-mark-2", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 14650447861280948245, - 18066875144210692331, + 767578358531619449, + 1472685584560725507, 18446744073709551615, 18446744073709551615, - 726, - 734, - 726, - 734, - 116, - 117, + 204, + 224, + 204, + 224, + 35, + 38, true, - "addition", - "addition" + "precision and recall", + "precision and recall" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 6167933651658664291, - 3744443950142901645, + 6184954595655792282, + 18387321712019245881, 18446744073709551615, 18446744073709551615, - 768, - 777, - 768, - 777, - 123, - 124, + 204, + 213, + 204, + 213, + 35, + 36, true, - "documents", - "documents" + "precision", + "precision" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 6167933651658664291, - 3744443950142902450, + 16904814960714419182, + 7305130667909903014, 18446744073709551615, 18446744073709551615, - 812, - 821, - 812, - 821, - 130, - 131, + 218, + 232, + 218, + 232, + 37, + 39, true, - "documents", - "documents" + "recall metrics", + "recall metrics" ], [ - "term", - "single-term", - 16923207262044929933, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 9653568957037915764, - 1159839439008018639, + 389609625618037948, + 1242787593333487218, 18446744073709551615, 18446744073709551615, - 863, - 882, - 863, - 882, - 138, - 139, + 233, + 237, + 233, + 237, + 39, + 40, true, - "exponentially^{2}", - "exponentially$^{2}$" + "with", + "with" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 14388065630035882329, - 2686196032102535307, + 16381206521526353544, + 16408450721845756506, 18446744073709551615, 18446744073709551615, - 931, - 942, - 931, - 942, - 150, - 151, + 238, + 244, + 238, + 244, + 40, + 41, true, - "information", - "information" + "regard", + "regard" ], [ - "term", - "single-term", - 16923207262044929933, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 329104161571401725, - 8768421271667196313, + 16381206519425733256, + 370344314517327407, 18446744073709551615, 18446744073709551615, - 992, - 997, - 992, - 997, - 161, - 162, + 245, + 251, + 245, + 251, + 41, + 43, true, - "order", - "order" + "to the", + "to the" ], [ - "term", - "single-term", - 16923207262044929933, + "verb", + "single-verb", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 8106398484416916345, - 17530806449433194901, + 6184954633443293966, + 15964917443528178420, 18446744073709551615, 18446744073709551615, - 1010, - 1017, - 1010, - 1017, - 165, - 166, + 252, + 261, + 252, + 261, + 43, + 44, true, - "content", - "content" + "predicted", + "predicted" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 6167933651658664291, - 3744443950142885131, + 2702871111219879214, + 2512541272008941381, 18446744073709551615, 18446744073709551615, - 1027, - 1036, - 1027, - 1036, - 168, - 169, + 262, + 272, + 262, + 272, + 44, + 45, true, - "documents", - "documents" + "confidence", + "confidence" ], [ - "term", - "single-term", - 16923207262044929933, + "sentence", + "", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 15441160910541487324, - 9094674364011169527, + 3956872905292683881, + 2752157999599851583, 18446744073709551615, 18446744073709551615, - 1049, - 1053, - 1049, - 1053, - 171, - 172, + 274, + 445, + 274, + 445, + 46, + 80, true, - "eg", - "e.g." + "For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0.", + "For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0." ], [ - "term", - "single-term", - 16923207262044929933, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 12555128312158075374, - 3585475568588858575, + 12178341415896108722, + 156309885604541418, 18446744073709551615, 18446744073709551615, - 1064, - 1077, - 1064, - 1077, - 175, - 176, + 274, + 277, + 274, + 277, + 46, + 47, true, - "phase-diagram", - "phase-diagram" + "For", + "For" ], [ - "term", - "single-term", - 16923207262044929933, + "expression", + "wtoken-concatenation", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 2703018952916355661, - 10279229622173728080, + 16381206533950151485, + 198566132787583629, 18446744073709551615, 18446744073709551615, - 1122, - 1132, - 1122, - 1132, - 185, - 186, + 278, + 284, + 278, + 284, + 47, + 48, true, - "components", - "components" + "YOLOv2", + "YOLOv2" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 6167933651658664291, - 3744443950142942051, + 16381206533950151485, + 198566132787583629, 18446744073709551615, 18446744073709551615, - 1160, - 1169, - 1160, - 1169, - 193, - 194, + 278, + 284, + 278, + 284, + 47, + 48, true, - "documents", - "documents" + "YOLOv2", + "YOLOv2" ], [ - "term", - "single-term", - 16923207262044929933, + "verb", + "single-verb", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 8106478777441543540, - 773597955729195721, + 8106342033696543838, + 18232753974273180210, 18446744073709551615, 18446744073709551615, - 1177, - 1184, - 1177, - 1184, - 196, - 197, + 288, + 295, + 288, + 295, + 49, + 50, true, - "variety", - "variety" + "observe", + "observe" ], [ - "term", - "single-term", - 16923207262044929933, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 8106397728035763965, - 11508792142722132367, + 14634130761162415388, + 14288776936577427060, 18446744073709551615, 18446744073709551615, - 1188, - 1195, - 1188, - 1195, - 198, - 199, + 296, + 304, + 296, + 304, + 50, + 52, true, - "formats", - "formats" + "that the", + "that the" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 6167933651658664291, - 3744443950142865755, + 16381206521531485437, + 16408606466535231414, 18446744073709551615, 18446744073709551615, - 1265, - 1274, - 1265, - 1274, - 213, - 214, + 305, + 311, + 305, + 311, + 52, + 53, true, - "documents", - "documents" + "recall", + "recall" ], [ - "term", - "single-term", - 16923207262044929933, + "verb", + "compound-verb", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 389609625541450799, - 476546803986815687, + 6183880245133195430, + 11375315636474919011, 18446744073709551615, 18446744073709551615, - 1333, - 1337, - 1333, - 1337, - 224, - 225, + 312, + 321, + 312, + 321, + 53, + 55, true, - "JSON", - "JSON" + "goes down", + "goes down" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 12178341415895541463, - 14794406103722084656, + 6184954595655792282, + 18387321712019270016, 18446744073709551615, 18446744073709551615, - 1341, - 1344, - 1341, - 1344, - 226, - 227, + 330, + 339, + 330, + 339, + 57, + 58, true, - "XML", - "XML" + "precision", + "precision" ], [ - "term", - "single-term", - 16923207262044929933, + "verb", + "single-verb", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 14652282388618227426, - 14047491818249905874, + 389609625699055541, + 1239396878369861980, 18446744073709551615, 18446744073709551615, - 1423, - 1431, - 1423, - 1431, - 245, - 246, + 340, + 344, + 340, + 344, + 58, + 59, true, - "concepts", - "concepts" + "goes", + "goes" ], [ - "term", - "single-term", - 16923207262044929933, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 6167933651658664291, - 3744443950142859841, + 16381206568455155979, + 1869095877123778211, 18446744073709551615, 18446744073709551615, - 1433, - 1442, - 1433, - 1442, - 247, - 248, + 348, + 354, + 348, + 354, + 60, + 62, true, - "documents", - "documents" + "as the", + "as the" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 16381206560620045048, - 15910167584621803731, + 2702871111219879214, + 2512541272008894019, 18446744073709551615, 18446744073709551615, - 1444, - 1450, - 1444, - 1450, - 249, - 250, + 355, + 365, + 355, + 365, + 62, + 63, true, - "images", - "images" + "confidence", + "confidence" ], [ - "term", - "single-term", - 16923207262044929933, + "verb", + "compound-verb", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 8106397759446161562, - 17038239979594063466, + 2694830089385977061, + 235012322887490211, 18446744073709551615, 18446744073709551615, - 1452, - 1459, - 1452, - 1459, - 251, - 252, + 366, + 378, + 366, + 378, + 63, + 65, true, - "authors", - "authors" + "is increased", + "is increased" ], [ - "term", - "single-term", - 16923207262044929933, + "verb", + "single-verb", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 16381206513098478539, - 8569522873910347573, + 6168826060228989821, + 9992741985777267919, 18446744073709551615, 18446744073709551615, - 1461, - 1467, - 1461, - 1467, - 253, - 254, + 380, + 389, + 380, + 389, + 66, + 67, true, - "tables", - "tables" + "obtaining", + "obtaining" ], [ "term", "single-term", - 16923207262044929933, + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 6167933651658664291, - 3744443950142863448, + 5859613489047657680, + 4575208165015881094, 18446744073709551615, 18446744073709551615, - 1495, - 1504, - 1495, - 1504, - 260, - 261, + 392, + 408, + 392, + 408, + 68, + 71, true, - "documents", - "documents" + "maximum F1 score", + "maximum F1 score" ], [ - "term", - "single-term", - 16923207262044929933, + "expression", + "wtoken-concatenation", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 8106398484416909789, - 17530798545720977035, + 15441160910541480158, + 10477275210029982213, 18446744073709551615, 18446744073709551615, - 1524, - 1531, - 1524, - 1531, - 265, - 266, + 400, + 402, + 400, + 402, + 69, + 70, true, - "context", - "context" + "F1", + "F1" ], [ - "verb", - "compound-verb", - 16923207262044929933, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 17551793109234931072, - 9841315996119329650, + 15441160910541485670, + 10477275256518274646, 18446744073709551615, 18446744073709551615, - 3, - 15, - 3, - 15, - 1, - 3, + 409, + 411, + 409, + 411, + 71, + 72, true, - "is estimated", - "is estimated" + "of", + "of" ], [ - "verb", - "compound-verb", - 16923207262044929933, + "expression", + "wtoken-concatenation", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 696181546770410912, - 10657444457642612809, + 329104147618556708, + 15461264859114081015, 18446744073709551615, 18446744073709551615, - 27, - 38, - 27, - 38, - 5, - 7, + 412, + 417, + 412, + 417, + 72, + 73, true, - "are roughly", - "are roughly" + "98.7%", + "98.7%" ], [ - "verb", - "compound-verb", - 16923207262044929933, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 17466643417440400812, - 16357177041782037840, + 389609625700792947, + 1238530397841875604, 18446744073709551615, 18446744073709551615, - 330, - 342, - 330, - 342, - 52, - 54, + 418, + 422, + 418, + 422, + 73, + 75, true, - "is contained", - "is contained" + "at a", + "at a" ], [ - "verb", - "compound-verb", - 16923207262044929933, + "term", + "single-term", + 2569392033451362672, "TEXT", - "#/texts/11", + "#/texts/54", 1.0, - 15984679469930005672, - 16512266362137627548, + 4874473477449861741, + 7312361899298084317, 18446744073709551615, 18446744073709551615, - 409, - 419, - 409, - 419, - 69, - 71, + 423, + 439, + 423, + 439, + 75, + 77, true, - "reduced to", - "reduced to" + "confidence level", + "confidence level" + ], + [ + "conn", + "single-conn", + 2569392033451362672, + "TEXT", + "#/texts/54", + 1.0, + 15441160910541485670, + 10477275256518295884, + 18446744073709551615, + 18446744073709551615, + 440, + 442, + 440, + 442, + 77, + 78, + true, + "of", + "of" ], [ "verb", @@ -6987,109 +7100,67 @@ "purposed to faithfully present" ], [ - "verb", - "compound-verb", - 16923207262044929933, - "TEXT", - "#/texts/11", - 1.0, - 14133501046094794901, - 4250240326135716646, - 18446744073709551615, - 18446744073709551615, - 620, - 634, - 620, - 634, - 100, - 102, - true, - "challenging to", - "challenging to" - ], - [ - "verb", - "compound-verb", - 16923207262044929933, - "TEXT", - "#/texts/11", - 1.0, - 18329554120394908623, - 17010976290898309846, - 18446744073709551615, - 18446744073709551615, - 847, - 862, - 847, - 862, - 135, - 138, - true, - "is also growing", - "is also growing" - ], - [ - "verb", - "compound-verb", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 14637952034068646347, - 9688733531448391553, + 9623123605532099037, + 12825981064550354106, 18446744073709551615, 18446744073709551615, - 974, - 982, - 974, - 982, - 156, - 158, + 79, + 96, + 79, + 96, + 13, + 14, true, - "is going", - "is going" + "circulation^{1}", + "circulation$^{1}$" ], [ - "verb", - "compound-verb", + "sentence", + "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 14364253828417975278, - 8778810672464165894, + 9088786707146406857, + 17567093053494849836, 18446744073709551615, 18446744073709551615, - 1100, - 1117, - 1100, - 1117, - 182, - 184, + 98, + 252, + 98, + 252, + 15, + 41, true, - "needs essentially", - "needs essentially" + "These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery.", + "These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery." ], [ - "verb", - "compound-verb", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 16971139354256206394, - 8359549146932405741, + 6167933651658664291, + 3744443950143797819, 18446744073709551615, 18446744073709551615, - 1145, - 1159, - 1145, - 1159, - 190, - 193, + 104, + 113, + 104, + 113, + 16, + 17, true, - "need to ingest", - "need to ingest" + "documents", + "documents" ], [ "verb", @@ -7113,214 +7184,214 @@ "range" ], [ - "verb", - "single-verb", + "conn", + "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 6180169263126451304, - 4214562769527423312, + 389609625697843734, + 497670111222755023, 18446744073709551615, 18446744073709551615, - 210, - 219, - 210, - 219, - 35, - 36, + 120, + 124, + 120, + 124, + 18, + 19, true, - "detailing", - "detailing" + "from", + "from" ], [ - "verb", - "single-verb", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541486535, - 9094674367324716363, + 8106464587474035829, + 6502274748172348363, 18446744073709551615, 18446744073709551615, - 256, - 258, - 256, - 258, - 42, - 43, + 125, + 132, + 125, + 132, + 19, + 20, true, - "is", - "is" + "manuals", + "manuals" ], [ - "verb", - "single-verb", + "conn", + "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 12178341415895645562, - 14799989741446549720, + 12178341415895625940, + 14799992967704466108, 18446744073709551615, 18446744073709551615, - 271, - 274, - 271, - 274, - 45, - 46, + 133, + 136, + 133, + 136, + 20, + 21, true, - "say", - "say" + "for", + "for" ], [ - "verb", - "single-verb", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 8106397531449655911, - 14632270885483087688, + 15361659830789508523, + 8413399544610388116, 18446744073709551615, 18446744073709551615, - 377, - 384, - 377, - 384, - 62, - 63, + 137, + 147, + 137, + 147, + 21, + 22, true, - "encoded", - "encoded" + "appliances", + "appliances" ], [ - "verb", - "single-verb", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541486535, - 9094674367324724925, + 8414271082704541626, + 9829432072489958078, 18446744073709551615, 18446744073709551615, - 392, - 394, - 392, - 394, - 65, - 66, + 149, + 163, + 149, + 163, + 23, + 25, true, - "is", - "is" + "annual reports", + "annual reports" ], [ - "verb", - "single-verb", + "conn", + "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 389609625618412480, - 541954499163841946, + 15441160910541485670, + 9094674364219234676, 18446744073709551615, 18446744073709551615, - 602, - 606, - 602, - 606, - 97, - 98, + 164, + 166, + 164, + 166, + 25, + 26, true, - "make", - "make" + "of", + "of" ], [ - "verb", - "single-verb", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 3503810711254267897, - 327944184510617093, + 5947879506556567994, + 16771512443857485166, 18446744073709551615, 18446744073709551615, - 654, - 663, - 654, - 663, - 105, - 106, + 167, + 176, + 167, + 176, + 26, + 27, true, - "transform", - "transform" + "companies", + "companies" ], [ - "verb", - "single-verb", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 8106397529675133622, - 9265134128656073394, + 12178341415895525628, + 14794601526936094944, 18446744073709551615, 18446744073709551615, - 694, - 701, - 694, - 701, - 111, - 112, + 186, + 189, + 186, + 189, + 30, + 31, true, - "enables", - "enables" + "way", + "way" ], [ - "verb", - "single-verb", + "conn", + "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 6185033796712833759, - 8158902570488040634, + 15441160910541485865, + 9094674369429163415, 18446744073709551615, 18446744073709551615, - 802, - 811, - 802, - 811, - 129, - 130, + 190, + 192, + 190, + 192, + 31, + 32, true, - "published", - "published" + "to", + "to" ], [ - "verb", - "single-verb", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 329104161594697075, - 8726414758277463017, + 3282133738476528713, + 6601164231648618886, 18446744073709551615, 18446744073709551615, - 889, - 894, - 889, - 894, - 141, - 142, + 193, + 208, + 193, + 208, + 32, + 34, true, - "poses", - "poses" + "research papers", + "research papers" ], [ "verb", @@ -7329,40 +7400,40 @@ "TEXT", "#/texts/11", 1.0, - 6185033796712833759, - 8158902570488066017, + 6180169263126451304, + 4214562769527423312, 18446744073709551615, 18446744073709551615, - 943, - 952, - 943, - 952, - 151, - 152, + 210, + 219, + 210, + 219, + 35, + 36, true, - "published", - "published" + "detailing", + "detailing" ], [ - "verb", - "single-verb", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 389609625618412480, - 541954499163850098, + 1102904554370006265, + 13125714652652128474, 18446744073709551615, 18446744073709551615, - 1001, - 1005, - 1001, - 1005, - 163, - 164, + 222, + 251, + 222, + 251, + 37, + 40, true, - "make", - "make" + "specific scientific discovery", + "specific scientific discovery" ], [ "verb", @@ -7371,40 +7442,40 @@ "TEXT", "#/texts/11", 1.0, - 389609625697824147, - 497671517323247955, + 15441160910541486535, + 9094674367324716363, 18446744073709551615, 18446744073709551615, - 1054, - 1058, - 1054, - 1058, - 172, - 173, + 256, + 258, + 256, + 258, + 42, + 43, true, - "find", - "find" + "is", + "is" ], [ - "verb", - "single-verb", + "conn", + "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 329104159301007417, - 8863033603552468338, + 15441160910541485865, + 9094674369429174755, 18446744073709551615, 18446744073709551615, - 1217, - 1222, - 1217, - 1222, - 204, - 205, + 268, + 270, + 268, + 270, + 44, + 45, true, - "being", - "being" + "to", + "to" ], [ "verb", @@ -7413,124 +7484,124 @@ "TEXT", "#/texts/11", 1.0, - 8106398484416229602, - 17530813820733868718, + 12178341415895645562, + 14799989741446549720, 18446744073709551615, 18446744073709551615, - 1251, - 1258, - 1251, - 1258, - 211, - 212, + 271, + 274, + 271, + 274, + 45, + 46, true, - "convert", - "convert" + "say", + "say" ], [ - "verb", - "single-verb", + "conn", + "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 389609625621532398, - 554816074249930520, + 389609625631229034, + 542250596720775319, 18446744073709551615, 18446744073709551615, - 1358, - 1362, - 1358, - 1362, - 231, - 232, + 275, + 279, + 275, + 279, + 46, + 47, true, - "need", - "need" + "that", + "that" ], [ - "verb", - "single-verb", + "term", + "enum-term-mark-1", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541486535, - 9094674367323996407, + 12322374974058800893, + 6816531868111142674, 18446744073709551615, 18446744073709551615, - 1383, - 1385, - 1383, - 1385, - 236, - 237, + 280, + 329, + 280, + 329, + 47, + 52, true, - "is", - "is" + "valuable qualitative and quantitative information", + "valuable qualitative and quantitative information" ], [ - "verb", - "single-verb", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 389609625696287852, - 497722139527509467, + 7668210657519556598, + 8800539397108400539, 18446744073709551615, 18446744073709551615, - 1394, - 1398, - 1394, - 1398, - 239, - 240, + 305, + 329, + 305, + 329, + 50, + 52, true, - "deal", - "deal" + "quantitative information", + "quantitative information" ], [ "verb", - "single-verb", + "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 6168374324562720592, - 185665609222125727, + 17466643417440400812, + 16357177041782037840, 18446744073709551615, 18446744073709551615, - 1474, - 1483, - 1474, - 1483, - 257, - 258, + 330, + 342, + 330, + 342, + 52, + 54, true, - "extracted", - "extracted" + "is contained", + "is contained" ], [ - "verb", - "single-verb", + "conn", + "single-conn", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 12178341415895640485, - 14799993819747716499, + 15441160910541486538, + 9094674367373513839, 18446744073709551615, 18446744073709551615, - 1509, - 1512, - 1509, - 1512, - 262, - 263, + 343, + 345, + 343, + 345, + 54, + 55, true, - "put", - "put" + "in", + "in" ], [ "conn", @@ -7554,46 +7625,67 @@ "many of" ], [ - "conn", - "single-conn", + "sentence", + "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 8106478685702231057, - 1428751967817183488, + 17647932338360720997, + 5716030233811874384, 18446744073709551615, 18446744073709551615, - 1325, - 1332, - 1325, - 1332, - 222, - 224, + 360, + 509, + 360, + 509, + 59, + 84, true, - "such as", - "such as" + "However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout.", + "However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout." ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 389609625631229034, - 542250596720887578, + 8106398484416916345, + 17530806449434366453, 18446744073709551615, 18446744073709551615, - 16, - 20, - 16, - 20, - 3, - 4, + 369, + 376, + 369, + 376, + 61, + 62, true, - "that", - "that" + "content", + "content" + ], + [ + "verb", + "single-verb", + 16923207262044929933, + "TEXT", + "#/texts/11", + 1.0, + 8106397531449655911, + 14632270885483087688, + 18446744073709551615, + 18446744073709551615, + 377, + 384, + 377, + 384, + 62, + 63, + true, + "encoded", + "encoded" ], [ "conn", @@ -7603,60 +7695,60 @@ "#/texts/11", 1.0, 15441160910541486538, - 9094674367373732264, + 9094674367373523345, 18446744073709551615, 18446744073709551615, - 76, - 78, - 76, - 78, - 12, - 13, + 385, + 387, + 385, + 387, + 63, + 64, true, "in", "in" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 389609625697843734, - 497670111222755023, + 12178341415896289890, + 14799990756781414830, 18446744073709551615, 18446744073709551615, - 120, - 124, - 120, - 124, - 18, - 19, + 388, + 391, + 388, + 391, + 64, + 65, true, - "from", - "from" + "PDF", + "PDF" ], [ - "conn", - "single-conn", + "verb", + "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 12178341415895625940, - 14799992967704466108, + 15441160910541486535, + 9094674367324724925, 18446744073709551615, 18446744073709551615, - 133, - 136, - 133, - 136, - 20, - 21, + 392, + 394, + 392, + 394, + 65, + 66, true, - "for", - "for" + "is", + "is" ], [ "conn", @@ -7665,61 +7757,61 @@ "TEXT", "#/texts/11", 1.0, - 15441160910541485670, - 9094674364219234676, + 15441160910541486989, + 9094674356673776478, 18446744073709551615, 18446744073709551615, - 164, - 166, - 164, - 166, - 25, - 26, + 395, + 397, + 395, + 397, + 66, + 67, true, - "of", - "of" + "by", + "by" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 389609625631229034, - 542250596720775319, + 16381206597113188775, + 13905938768963750102, 18446744073709551615, 18446744073709551615, - 275, - 279, - 275, - 279, - 46, - 47, + 402, + 408, + 402, + 408, + 68, + 69, true, - "that", - "that" + "nature", + "nature" ], [ - "conn", - "single-conn", + "verb", + "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541486538, - 9094674367373513839, + 15984679469930005672, + 16512266362137627548, 18446744073709551615, 18446744073709551615, - 343, - 345, - 343, - 345, - 54, - 55, + 409, + 419, + 409, + 419, + 69, + 71, true, - "in", - "in" + "reduced to", + "reduced to" ], [ "conn", @@ -7728,40 +7820,40 @@ "TEXT", "#/texts/11", 1.0, - 15441160910541486538, - 9094674367373523345, + 15441160910541485865, + 9094674369429173582, 18446744073709551615, 18446744073709551615, - 385, - 387, - 385, - 387, - 63, - 64, + 417, + 419, + 417, + 419, + 70, + 71, true, - "in", - "in" + "to", + "to" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541486989, - 9094674356673776478, + 8106478700233620678, + 8336023496233777462, 18446744073709551615, 18446744073709551615, - 395, - 397, - 395, - 397, - 66, - 67, + 420, + 427, + 420, + 427, + 71, + 72, true, - "by", - "by" + "streams", + "streams" ], [ "conn", @@ -7785,25 +7877,25 @@ "of" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541485670, - 9094674364219227273, + 13935212089545515210, + 4563100627799985741, 18446744073709551615, 18446744073709551615, - 568, - 570, - 568, - 570, - 92, - 93, + 431, + 452, + 431, + 452, + 73, + 75, true, - "of", - "of" + "printing instructions", + "printing instructions" ], [ "conn", @@ -7812,103 +7904,103 @@ "TEXT", "#/texts/11", 1.0, - 14154242830791309661, - 1004085954587590076, + 15441160910541485865, + 9094674369429146067, 18446744073709551615, 18446744073709551615, - 579, - 591, - 579, - 591, - 94, - 96, + 462, + 464, + 462, + 464, + 76, + 77, true, - "across these", - "across these" + "to", + "to" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 16381206560517276114, - 15945165859804744982, + 16582444977748815769, + 16919788927196448661, 18446744073709551615, 18446744073709551615, - 667, - 673, - 667, - 673, - 107, - 109, + 486, + 508, + 486, + 508, + 80, + 83, true, - "into a", - "into a" + "pleasing visual layout", + "pleasing visual layout" ], [ - "conn", - "single-conn", + "sentence", + "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541480354, - 9094674546964354786, + 15487015001052727581, + 14484812293778889252, 18446744073709551615, 18446744073709551615, - 723, - 725, - 723, - 725, + 510, + 722, + 510, + 722, + 84, 115, - 116, true, - "In", - "In" + "Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery.", + "Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery." ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541485670, - 9094674364219198352, + 4929058514881842733, + 10224787839479118537, 18446744073709551615, 18446744073709551615, - 765, - 767, - 765, - 767, - 122, - 123, + 519, + 538, + 519, + 538, + 86, + 88, true, - "of", - "of" + "data representation", + "data representation" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541485670, - 9094674364219210972, + 14929125759175486455, + 13997854025989108072, 18446744073709551615, 18446744073709551615, - 799, - 801, - 799, - 801, - 128, - 129, + 547, + 567, + 547, + 567, + 90, + 92, true, - "of", - "of" + "enormous variability", + "enormous variability" ], [ "conn", @@ -7917,40 +8009,40 @@ "TEXT", "#/texts/11", 1.0, - 16381206560518651853, - 15945529371230903899, + 15441160910541485670, + 9094674364219227273, 18446744073709551615, 18446744073709551615, - 822, - 828, - 822, - 828, - 131, - 133, + 568, + 570, + 568, + 570, + 92, + 93, true, - "in the", - "in the" + "of", + "of" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 329104161786618045, - 8725299555592485331, + 8106342461491420046, + 4172004388378103877, 18446744073709551615, 18446744073709551615, - 911, - 916, - 911, - 916, - 146, - 147, + 571, + 578, + 571, + 578, + 93, + 94, true, - "since", - "since" + "layouts", + "layouts" ], [ "conn", @@ -7959,82 +8051,82 @@ "TEXT", "#/texts/11", 1.0, - 16381206560518651853, - 15945529371230859398, + 14154242830791309661, + 1004085954587590076, 18446744073709551615, 18446744073709551615, - 953, - 959, - 953, - 959, - 152, - 154, + 579, + 591, + 579, + 591, + 94, + 96, true, - "in the", - "in the" + "across these", + "across these" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541480354, - 9094674546964371620, + 6167933651658664291, + 3744443950143438881, 18446744073709551615, 18446744073709551615, - 989, - 991, - 989, - 991, - 160, - 161, + 592, + 601, + 592, + 601, + 96, + 97, true, - "In", - "In" + "documents", + "documents" ], [ - "conn", - "single-conn", + "verb", + "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 14814148868025447689, - 3694567760357366516, + 389609625618412480, + 541954499163841946, 18446744073709551615, 18446744073709551615, - 1018, - 1026, - 1018, - 1026, - 166, - 168, + 602, + 606, + 602, + 606, + 97, + 98, true, - "of these", - "of these" + "make", + "make" ], [ - "conn", - "single-conn", + "verb", + "compound-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541485670, - 9094674364219324564, + 14133501046094794901, + 4250240326135716646, 18446744073709551615, 18446744073709551615, - 1078, - 1080, - 1078, - 1080, - 176, - 177, + 620, + 634, + 620, + 634, + 100, + 102, true, - "of", - "of" + "challenging to", + "challenging to" ], [ "conn", @@ -8043,61 +8135,61 @@ "TEXT", "#/texts/11", 1.0, - 16381206549292198744, - 15968280101146838290, + 15441160910541485865, + 9094674369429194340, 18446744073709551615, 18446744073709551615, - 1170, - 1176, - 1170, - 1176, - 194, - 196, + 632, + 634, + 632, + 634, + 101, + 102, true, - "from a", - "from a" + "to", + "to" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541485670, - 9094674364219318707, + 5746783959074166208, + 15517192707477599154, 18446744073709551615, 18446744073709551615, - 1185, - 1187, - 1185, - 1187, - 197, - 198, + 635, + 649, + 635, + 649, + 102, + 104, true, - "of", - "of" + "access content", + "access content" ], [ - "conn", - "single-conn", + "verb", + "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 14638857868319795209, - 3807143954092066612, + 3503810711254267897, + 327944184510617093, 18446744073709551615, 18446744073709551615, - 1197, - 1205, - 1197, - 1205, - 200, - 202, + 654, + 663, + 654, + 663, + 105, + 106, true, - "with the", - "with the" + "transform", + "transform" ], [ "conn", @@ -8106,103 +8198,103 @@ "TEXT", "#/texts/11", 1.0, - 16381206557726458966, - 16025464328456092215, + 16381206560517276114, + 15945165859804744982, 18446744073709551615, 18446744073709551615, - 1300, - 1306, - 1300, - 1306, - 218, - 220, + 667, + 673, + 667, + 673, + 107, + 109, true, - "with a", - "with a" + "into a", + "into a" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 16381206557726458966, - 16025464328456099242, + 11387678566946341343, + 4163904415113468966, 18446744073709551615, 18446744073709551615, - 1399, - 1405, - 1399, - 1405, - 240, - 242, + 674, + 688, + 674, + 688, + 109, + 110, true, - "with a", - "with a" + "representation", + "representation" ], [ - "conn", - "single-conn", + "verb", + "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541485670, - 9094674364219303614, + 8106397529675133622, + 9265134128656073394, 18446744073709551615, 18446744073709551615, - 1420, - 1422, - 1420, - 1422, - 244, - 245, + 694, + 701, + 694, + 701, + 111, + 112, true, - "of", - "of" + "enables", + "enables" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 16057368201763467386, - 216739275376297295, + 2730405582718102128, + 15726970596030809890, 18446744073709551615, 18446744073709551615, - 1484, - 1494, - 1484, - 1494, - 258, - 260, + 702, + 721, + 702, + 721, + 112, + 114, true, - "from these", - "from these" + "knowledge discovery", + "knowledge discovery" ], [ - "conn", - "single-conn", + "sentence", + "", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 5748787292106066554, - 4405126515520980867, + 3574328216950930229, + 4905315167294659186, 18446744073709551615, 18446744073709551615, - 1513, - 1523, - 1513, - 1523, - 263, - 265, + 723, + 883, + 723, + 883, + 115, + 140, true, - "these into", - "these into" + "In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$.", + "In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$." ], [ "conn", @@ -8211,40 +8303,40 @@ "TEXT", "#/texts/11", 1.0, - 15441160910541485865, - 9094674369429163415, + 15441160910541480354, + 9094674546964354786, 18446744073709551615, 18446744073709551615, - 190, - 192, - 190, - 192, - 31, - 32, + 723, + 725, + 723, + 725, + 115, + 116, true, - "to", - "to" + "In", + "In" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541485865, - 9094674369429174755, + 14650447861280948245, + 18066875144210692331, 18446744073709551615, 18446744073709551615, - 268, - 270, - 268, - 270, - 44, - 45, + 726, + 734, + 726, + 734, + 116, + 117, true, - "to", - "to" + "addition", + "addition" ], [ "conn", @@ -8253,40 +8345,40 @@ "TEXT", "#/texts/11", 1.0, - 15441160910541485865, - 9094674369429173582, + 16381206519425733256, + 5984372374891954420, 18446744073709551615, 18446744073709551615, - 417, - 419, - 417, - 419, - 70, - 71, + 735, + 741, + 735, + 741, + 117, + 119, true, - "to", - "to" + "to the", + "to the" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541485865, - 9094674369429146067, + 16813764953769919795, + 4260210876529689133, 18446744073709551615, 18446744073709551615, - 462, - 464, - 462, - 464, - 76, - 77, + 742, + 764, + 742, + 764, + 119, + 122, true, - "to", - "to" + "sheer current quantity", + "sheer current quantity" ], [ "conn", @@ -8295,61 +8387,61 @@ "TEXT", "#/texts/11", 1.0, - 15441160910541485865, - 9094674369429194340, + 15441160910541485670, + 9094674364219198352, 18446744073709551615, 18446744073709551615, - 632, - 634, - 632, - 634, - 101, - 102, + 765, + 767, + 765, + 767, + 122, + 123, true, - "to", - "to" + "of", + "of" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 16381206519425733256, - 5984372374891954420, + 6167933651658664291, + 3744443950142901645, 18446744073709551615, 18446744073709551615, - 735, - 741, - 735, - 741, - 117, - 119, + 768, + 777, + 768, + 777, + 123, + 124, true, - "to the", - "to the" + "documents", + "documents" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541485865, - 9094674369429185213, + 16688986026552560644, + 17177901629424753408, 18446744073709551615, 18446744073709551615, - 998, - 1000, - 998, - 1000, - 162, - 163, + 783, + 798, + 783, + 798, + 126, + 128, true, - "to", - "to" + "submission rate", + "submission rate" ], [ "conn", @@ -8358,775 +8450,796 @@ "TEXT", "#/texts/11", 1.0, - 15441160910541485865, - 9094674369429226599, + 15441160910541485670, + 9094674364219210972, 18446744073709551615, 18446744073709551615, - 1150, - 1152, - 1150, - 1152, - 191, - 192, + 799, + 801, + 799, + 801, + 128, + 129, true, - "to", - "to" + "of", + "of" ], [ - "conn", - "single-conn", + "verb", + "single-verb", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541485865, - 9094674369429235584, + 6185033796712833759, + 8158902570488040634, 18446744073709551615, 18446744073709551615, - 1275, - 1277, - 1275, - 1277, - 214, - 215, + 802, + 811, + 802, + 811, + 129, + 130, true, - "to", - "to" + "published", + "published" ], [ - "conn", - "single-conn", + "term", + "single-term", 16923207262044929933, "TEXT", "#/texts/11", 1.0, - 15441160910541485865, - 9094674369429209693, + 6167933651658664291, + 3744443950142902450, 18446744073709551615, 18446744073709551615, - 1391, - 1393, - 1391, - 1393, - 238, - 239, + 812, + 821, + 812, + 821, + 130, + 131, true, - "to", - "to" + "documents", + "documents" ], [ - "parenthesis", - "round brackets", - 3749305213430885773, + "conn", + "single-conn", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 329104053210116957, - 4933919093561563747, + 16381206560518651853, + 15945529371230903899, 18446744073709551615, 18446744073709551615, - 295, - 300, - 295, - 300, - 52, - 55, + 822, + 828, + 822, + 828, + 131, + 133, true, - "(CCS)", - "(CCS)" + "in the", + "in the" ], [ - "sentence", - "", - 3749305213430885773, + "term", + "single-term", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 5306542014856411002, - 14493189109864111156, + 12621877848489179259, + 15237617635766653290, 18446744073709551615, 18446744073709551615, - 0, - 132, - 0, - 132, - 0, - 24, + 829, + 846, + 829, + 846, + 133, + 135, true, - "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files.", - "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files." + "scientific domain", + "scientific domain" ], [ - "sentence", - "", - 3749305213430885773, + "verb", + "compound-verb", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 2369217517028793827, - 11890147189063173430, + 18329554120394908623, + 17010976290898309846, 18446744073709551615, 18446744073709551615, - 133, - 246, - 133, - 246, - 24, - 45, + 847, + 862, + 847, + 862, + 135, + 138, true, - "The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms.", - "The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms." + "is also growing", + "is also growing" ], [ - "sentence", - "", - 3749305213430885773, + "expression", + "wtoken-concatenation", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 12064124790943537514, - 9632597734224986436, + 9653568957037915764, + 1159839439008018639, 18446744073709551615, 18446744073709551615, - 247, - 375, - 247, - 375, - 45, - 69, + 863, + 882, + 863, + 882, + 138, + 139, true, - "This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components.", - "This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components." + "exponentially^{2}", + "exponentially$^{2}$" ], [ - "sentence", - "", - 3749305213430885773, + "term", + "single-term", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 1805453063572196406, - 15284543814810892665, + 9653568957037915764, + 1159839439008018639, 18446744073709551615, 18446744073709551615, - 376, - 440, - 376, - 440, - 69, - 82, + 863, + 882, + 863, + 882, + 138, + 139, true, - "Each of these microservices can be consumed by its own REST API.", - "Each of these microservices can be consumed by its own REST API." + "exponentially^{2}", + "exponentially$^{2}$" ], [ "sentence", "", - 3749305213430885773, + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 210366145485171616, - 10779316463372138244, + 8347632587306657460, + 16097912844310233617, 18446744073709551615, 18446744073709551615, - 441, - 606, - 441, - 606, - 82, - 109, + 884, + 988, + 884, + 988, + 140, + 160, true, - "This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform.", - "This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform." + "This poses a real problem, since more and more information published in the PDF documents is going dark.", + "This poses a real problem, since more and more information published in the PDF documents is going dark." ], [ - "sentence", - "", - 3749305213430885773, + "verb", + "single-verb", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 13863701154380798624, - 5607686807400793153, + 329104161594697075, + 8726414758277463017, 18446744073709551615, 18446744073709551615, - 607, - 891, - 607, - 891, - 109, - 153, + 889, + 894, + 889, + 894, + 141, + 142, true, - "In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures.", - "In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures." + "poses", + "poses" ], [ - "term", - "single-term", - 3749305213430885773, + "conn", + "single-conn", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 3741141293805179509, - 9675794815446093236, + 329104161786618045, + 8725299555592485331, 18446744073709551615, 18446744073709551615, - 40, - 55, - 40, - 55, - 9, - 11, + 911, + 916, + 911, + 916, + 146, + 147, true, - "first component", - "first component" + "since", + "since" ], [ "term", "single-term", - 3749305213430885773, + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 4066887494406769292, - 15944572553884562120, + 14388065630035882329, + 2686196032102535307, 18446744073709551615, 18446744073709551615, - 110, - 131, - 110, - 131, - 20, - 23, + 931, + 942, + 931, + 942, + 150, + 151, true, - "structured data files", - "structured data files" + "information", + "information" ], [ - "term", - "single-term", - 3749305213430885773, + "verb", + "single-verb", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 15684933964106580812, - 12993940953903139083, + 6185033796712833759, + 8158902570488066017, 18446744073709551615, 18446744073709551615, - 208, - 225, - 208, - 225, - 40, - 42, + 943, + 952, + 943, + 952, + 151, + 152, true, - "trainable machine", - "trainable machine" + "published", + "published" ], [ - "term", - "single-term", - 3749305213430885773, + "conn", + "single-conn", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 12638008641667971393, - 14590037144173376663, + 16381206560518651853, + 15945529371230859398, 18446744073709551615, 18446744073709551615, - 269, - 294, - 269, - 294, - 49, - 52, + 953, + 959, + 953, + 959, + 152, + 154, true, - "Corpus Conversion Service", - "Corpus Conversion Service" + "in the", + "in the" ], [ "term", "single-term", - 3749305213430885773, + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 3812062755894317903, - 5752895239615977865, + 12653831733608918357, + 6140974263001666382, 18446744073709551615, 18446744073709551615, - 359, - 374, - 359, - 374, - 66, - 68, + 960, + 973, + 960, + 973, + 154, + 156, true, - "main components", - "main components" + "PDF documents", + "PDF documents" ], [ - "term", - "single-term", - 3749305213430885773, + "verb", + "compound-verb", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 7904009099850099728, - 6069321302342300412, + 14637952034068646347, + 9688733531448391553, 18446744073709551615, 18446744073709551615, - 427, - 439, - 427, - 439, - 78, - 81, + 974, + 982, + 974, + 982, + 156, + 158, true, - "own REST API", - "own REST API" + "is going", + "is going" ], [ - "term", - "single-term", - 3749305213430885773, + "sentence", + "", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 14315066823203278267, - 5715163301899035549, + 7315676043002615146, + 3020292113144700597, 18446744073709551615, 18446744073709551615, - 483, - 500, - 483, - 500, - 90, - 92, + 989, + 1133, + 989, + 1133, + 160, + 187, true, - "complex pipelines", - "complex pipelines" + "In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components.", + "In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components." ], [ - "term", - "single-term", - 3749305213430885773, + "conn", + "single-conn", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 7501920923775581134, - 5285457240038734782, + 15441160910541480354, + 9094674546964371620, 18446744073709551615, 18446744073709551615, - 567, - 584, - 567, - 584, - 103, - 105, + 989, + 991, + 989, + 991, + 160, + 161, true, - "new microservices", - "new microservices" + "In", + "In" ], [ "term", "single-term", - 3749305213430885773, + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 9920918086675479799, - 11129371561875838665, + 329104161571401725, + 8768421271667196313, 18446744073709551615, 18446744073709551615, - 689, - 725, - 689, - 725, - 122, - 125, + 992, + 997, + 992, + 997, + 161, + 162, true, - "asynchronous communication protocols", - "asynchronous communication protocols" + "order", + "order" ], [ - "term", - "single-term", - 3749305213430885773, + "conn", + "single-conn", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 17000938524684089439, - 12283057491291530260, + 15441160910541485865, + 9094674369429185213, 18446744073709551615, 18446744073709551615, - 742, - 755, - 742, - 755, - 129, - 131, + 998, + 1000, + 998, + 1000, + 162, + 163, true, - "many benefits", - "many benefits" + "to", + "to" ], [ - "term", - "single-term", - 3749305213430885773, + "verb", + "single-verb", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 4253886245479866309, - 90465651070093109, + 389609625618412480, + 541954499163850098, 18446744073709551615, 18446744073709551615, - 773, - 799, - 773, - 799, - 136, - 139, + 1001, + 1005, + 1001, + 1005, + 163, + 164, true, - "proper resource management", - "proper resource management" + "make", + "make" ], [ "term", "single-term", - 3749305213430885773, + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 17671651082391847352, - 4285231550406356710, + 8106398484416916345, + 17530806449433194901, 18446744073709551615, 18446744073709551615, - 812, - 831, - 812, - 831, - 141, - 143, + 1010, + 1017, + 1010, + 1017, + 165, + 166, true, - "strong dependencies", - "strong dependencies" + "content", + "content" ], [ - "term", - "single-term", - 3749305213430885773, + "conn", + "single-conn", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 239702429653970881, - 11301722290661797635, + 14814148868025447689, + 3694567760357366516, 18446744073709551615, 18446744073709551615, - 870, - 890, - 870, - 890, - 149, - 152, + 1018, + 1026, + 1018, + 1026, + 166, + 168, true, - "single task failures", - "single task failures" + "of these", + "of these" ], [ "term", "single-term", - 3749305213430885773, + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 329104161668023890, - 13427899720650205831, + 6167933651658664291, + 3744443950142885131, 18446744073709551615, 18446744073709551615, - 8, - 13, - 8, - 13, - 2, - 3, + 1027, + 1036, + 1027, + 1036, + 168, + 169, true, - "paper", - "paper" + "documents", + "documents" ], [ - "term", - "single-term", - 3749305213430885773, + "expression", + "common", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 6182654480499682241, - 9496359210917921791, + 15441160910541487324, + 9094674364011169527, 18446744073709551615, 18446744073709551615, - 61, - 70, - 61, - 70, - 13, - 14, + 1049, + 1053, + 1049, + 1053, + 171, + 172, true, - "ingestion", - "ingestion" + "eg", + "e.g." ], [ "term", "single-term", - 3749305213430885773, + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 6167933651658664291, - 16598695715373476800, + 15441160910541487324, + 9094674364011169527, 18446744073709551615, 18446744073709551615, - 74, - 83, - 74, - 83, - 15, - 16, + 1049, + 1053, + 1049, + 1053, + 171, + 172, true, - "documents", - "documents" + "eg", + "e.g." ], [ - "term", - "single-term", - 3749305213430885773, + "verb", + "single-verb", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 2703018679320364082, - 14545924726949564279, + 389609625697824147, + 497671517323247955, 18446744073709551615, 18446744073709551615, - 94, - 104, - 94, - 104, - 18, - 19, + 1054, + 1058, + 1054, + 1058, + 172, + 173, true, - "conversion", - "conversion" + "find", + "find" ], [ - "term", - "single-term", - 3749305213430885773, + "expression", + "word-concatenation", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 14635106751859230946, - 3899039667786064358, + 12555128312158075374, + 3585475568588858575, 18446744073709551615, 18446744073709551615, - 137, - 145, - 137, - 145, - 25, - 26, + 1064, + 1077, + 1064, + 1077, + 175, + 176, true, - "solution", - "solution" + "phase-diagram", + "phase-diagram" ], [ "term", "single-term", - 3749305213430885773, + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 14814125365076808131, - 9349977279496653565, + 12555128312158075374, + 3585475568588858575, 18446744073709551615, 18446744073709551615, + 1064, + 1077, + 1064, + 1077, + 175, 176, - 184, - 176, - 184, - 33, - 34, true, - "platform", - "platform" + "phase-diagram", + "phase-diagram" ], [ - "term", - "single-term", - 3749305213430885773, + "conn", + "single-conn", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 389609625695918821, - 3664761358525422290, + 15441160910541485670, + 9094674364219324564, 18446744073709551615, 18446744073709551615, - 199, - 203, - 199, - 203, - 38, - 39, + 1078, + 1080, + 1078, + 1080, + 176, + 177, true, - "core", - "core" + "of", + "of" ], [ "term", "single-term", - 3749305213430885773, + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 15359670209433732834, - 1709633722429132795, + 10167329824705672383, + 12701577379507576649, 18446744073709551615, 18446744073709551615, - 235, - 245, - 235, - 245, - 43, - 44, + 1081, + 1093, + 1081, + 1093, + 177, + 179, true, - "algorithms", - "algorithms" + "material XYZ", + "material XYZ" ], [ - "term", - "single-term", - 3749305213430885773, + "verb", + "compound-verb", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 14814125365076808131, - 9349977279496707806, + 14364253828417975278, + 8778810672464165894, 18446744073709551615, 18446744073709551615, - 252, - 260, - 252, - 260, - 46, - 47, + 1100, + 1117, + 1100, + 1117, + 182, + 184, true, - "platform", - "platform" + "needs essentially", + "needs essentially" ], [ "term", "single-term", - 3749305213430885773, + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 12178341415896221596, - 5842744026410738636, + 2703018952916355661, + 10279229622173728080, 18446744073709551615, 18446744073709551615, - 296, - 299, - 296, - 299, - 53, - 54, + 1122, + 1132, + 1122, + 1132, + 185, + 186, true, - "CCS", - "CCS" + "components", + "components" ], [ - "term", - "single-term", - 3749305213430885773, + "sentence", + "", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 12178341415895638602, - 5842694294408079134, + 8292138896065382931, + 17716571591104291388, 18446744073709551615, 18446744073709551615, - 320, - 323, - 320, - 323, - 60, - 61, + 1134, + 1345, + 1134, + 1345, + 187, + 228, true, - "set", - "set" + "First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML.", + "First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML." ], [ - "term", - "single-term", - 3749305213430885773, + "verb", + "compound-verb", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 990358581043194791, - 393905999985964694, + 16971139354256206394, + 8359549146932405741, 18446744073709551615, 18446744073709551615, - 327, - 340, - 327, - 340, - 62, - 63, + 1145, + 1159, + 1145, + 1159, + 190, + 193, true, - "microservices", - "microservices" + "need to ingest", + "need to ingest" ], [ - "term", - "single-term", - 3749305213430885773, + "conn", + "single-conn", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 990358581043194791, - 393905999985936006, + 15441160910541485865, + 9094674369429226599, 18446744073709551615, 18446744073709551615, - 390, - 403, - 390, - 403, - 72, - 73, + 1150, + 1152, + 1150, + 1152, + 191, + 192, true, - "microservices", - "microservices" + "to", + "to" ], [ "term", "single-term", - 3749305213430885773, + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 14650448032998792781, - 15963759494992376767, + 6167933651658664291, + 3744443950142942051, 18446744073709551615, 18446744073709551615, - 446, - 454, - 446, - 454, - 83, - 84, + 1160, + 1169, + 1160, + 1169, + 193, + 194, true, - "approach", - "approach" + "documents", + "documents" + ], + [ + "conn", + "single-conn", + 16923207262044929933, + "TEXT", + "#/texts/11", + 1.0, + 16381206549292198744, + 15968280101146838290, + 18446744073709551615, + 18446744073709551615, + 1170, + 1176, + 1170, + 1176, + 194, + 196, + true, + "from a", + "from a" ], [ "term", @@ -9152,275 +9265,275 @@ [ "term", "single-term", - 3749305213430885773, + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 14814125365076808131, - 9349977279496610029, + 8106478777441543540, + 773597955729195721, 18446744073709551615, 18446744073709551615, - 597, - 605, - 597, - 605, - 107, - 108, + 1177, + 1184, + 1177, + 1184, + 196, + 197, true, - "platform", - "platform" + "variety", + "variety" ], [ - "term", - "single-term", - 3749305213430885773, + "conn", + "single-conn", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 329104161571401725, - 13426123714444340915, + 15441160910541485670, + 9094674364219318707, 18446744073709551615, 18446744073709551615, - 610, - 615, - 610, - 615, - 110, - 111, + 1185, + 1187, + 1185, + 1187, + 197, + 198, true, - "order", - "order" + "of", + "of" ], [ "term", "single-term", - 3749305213430885773, + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 14814125365076808131, - 9349977279496698149, + 8106397728035763965, + 11508792142722132367, 18446744073709551615, 18446744073709551615, - 629, - 637, - 629, - 637, - 114, - 115, + 1188, + 1195, + 1188, + 1195, + 198, + 199, true, - "platform", - "platform" + "formats", + "formats" ], [ - "term", - "single-term", - 3749305213430885773, + "parenthesis", + "round brackets", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 990358581043194791, - 393905999985528944, + 4516846515356980393, + 4935623304895828855, 18446744073709551615, 18446744073709551615, - 652, - 665, - 652, - 665, - 118, - 119, + 1196, + 1246, + 1196, + 1246, + 199, + 210, true, - "microservices", - "microservices" + "(with the PDF format being the most prevalent one)", + "(with the PDF format being the most prevalent one)" ], [ - "term", - "single-term", - 3749305213430885773, + "conn", + "single-conn", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 14814125365076808131, - 9349977279496695017, + 14638857868319795209, + 3807143954092066612, 18446744073709551615, 18446744073709551615, - 846, - 854, - 846, - 854, - 146, - 147, + 1197, + 1205, + 1197, + 1205, + 200, + 202, true, - "platform", - "platform" + "with the", + "with the" ], [ - "verb", - "compound-verb", - 3749305213430885773, + "term", + "single-term", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 8568388710680918302, - 1832540720065690143, + 1602384110795404989, + 18168403198260411892, 18446744073709551615, 18446744073709551615, - 18, - 32, - 18, - 32, - 5, - 7, + 1206, + 1216, + 1206, + 1216, + 202, + 204, true, - "focus entirely", - "focus entirely" + "PDF format", + "PDF format" ], [ "verb", - "compound-verb", - 3749305213430885773, + "single-verb", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 5237537207757377628, - 6864205941272212007, + 329104159301007417, + 8863033603552468338, 18446744073709551615, 18446744073709551615, - 149, - 167, - 149, - 167, - 27, - 30, + 1217, + 1222, + 1217, + 1222, + 204, + 205, true, - "propose is thought", - "propose is thought" + "being", + "being" ], [ - "verb", - "compound-verb", - 3749305213430885773, + "term", + "single-term", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 15903921305565697154, - 7448795222128154927, + 12595883072252114156, + 7039273758002805758, 18446744073709551615, 18446744073709551615, - 404, - 419, - 404, - 419, - 73, - 76, + 1232, + 1245, + 1232, + 1245, + 207, + 209, true, - "can be consumed", - "can be consumed" + "prevalent one", + "prevalent one" ], [ "verb", - "compound-verb", - 3749305213430885773, + "single-verb", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 8944903948136983007, - 3100279804263702344, + 8106398484416229602, + 17530813820733868718, 18446744073709551615, 18446744073709551615, - 666, - 680, - 666, - 680, - 119, - 121, + 1251, + 1258, + 1251, + 1258, + 211, + 212, true, - "are integrated", - "are integrated" + "convert", + "convert" ], [ - "verb", - "compound-verb", - 3749305213430885773, + "term", + "single-term", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 7780068026497460305, - 562602692899396130, + 6167933651658664291, + 3744443950142865755, 18446744073709551615, 18446744073709551615, - 760, - 772, - 760, - 772, - 133, - 136, + 1265, + 1274, + 1265, + 1274, + 213, + 214, true, - "allows to do", - "allows to do" + "documents", + "documents" ], [ - "verb", - "single-verb", - 3749305213430885773, + "conn", + "single-conn", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 12178341415895601584, - 5841349058796574805, + 15441160910541485865, + 9094674369429235584, 18446744073709551615, 18446744073709551615, - 204, - 207, - 204, - 207, - 39, - 40, + 1275, + 1277, + 1275, + 1277, + 214, + 215, true, - "has", - "has" + "to", + "to" ], [ - "verb", - "single-verb", - 3749305213430885773, + "term", + "single-term", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 14639581097006750428, - 4101766079705362430, + 4066887494406769292, + 14849727204374093143, 18446744073709551615, 18446744073709551615, - 226, - 234, - 226, - 234, - 42, - 43, + 1278, + 1299, + 1278, + 1299, + 215, + 218, true, - "learning", - "learning" + "structured data files", + "structured data files" ], [ - "verb", - "single-verb", - 3749305213430885773, + "conn", + "single-conn", + 16923207262044929933, "TEXT", - "#/texts/12", + "#/texts/11", 1.0, - 16381206563350835754, - 15338244529159273971, + 16381206557726458966, + 16025464328456092215, 18446744073709551615, 18446744073709551615, - 262, - 268, - 262, - 268, - 48, - 49, + 1300, + 1306, + 1300, + 1306, + 218, + 220, true, - "called", - "called" + "with a", + "with a" ], [ "verb", @@ -9444,676 +9557,676 @@ "consists" ], [ - "verb", - "single-verb", - 3749305213430885773, + "numval", + "ival", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 6167774653473311671, - 8932714637044289580, + 17767354399704235160, + 13994996428325642210, 18446744073709551615, 18446744073709551615, - 341, - 350, - 341, - 350, - 63, - 64, + 443, + 444, + 443, + 444, + 78, + 79, true, - "organized", - "organized" + "0", + "0" ], [ - "verb", - "single-verb", - 3749305213430885773, + "numval", + "ival", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 16381206569317834029, - 15127822949531520780, + 17767354399704235157, + 13994996428928834278, 18446744073709551615, 18446744073709551615, - 464, - 470, - 464, - 470, - 86, - 87, + 446, + 447, + 446, + 447, + 80, + 81, true, - "allows", - "allows" + "5", + "5" ], [ - "verb", - "single-verb", - 3749305213430885773, + "sentence", + "", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 329104159303279946, - 13502145352581782916, + 17055744903410885404, + 12761534484507818149, 18446744073709551615, 18446744073709551615, - 477, - 482, - 477, - 482, - 89, - 90, + 449, + 556, + 449, + 556, + 82, + 101, true, - "build", - "build" + "The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers.", + "The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers." ], [ - "verb", - "single-verb", - 3749305213430885773, + "term", + "single-term", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 8106476000254393164, - 1725287517912256023, + 6927970521128218953, + 6482828839300817669, 18446744073709551615, 18446744073709551615, - 504, - 511, - 504, - 511, - 93, - 94, + 453, + 472, + 453, + 472, + 83, + 86, true, - "process", - "process" + "Faster R-CNN method", + "Faster R-CNN method" ], [ - "verb", - "single-verb", - 3749305213430885773, + "expression", + "word-concatenation", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 16381206569317834029, - 15127822949531294179, + 329104162326555074, + 15570664097727008132, 18446744073709551615, 18446744073709551615, - 546, - 552, - 546, - 552, - 99, - 100, + 460, + 465, + 460, + 465, + 84, + 85, true, - "allows", - "allows" - ], + "R-CNN", + "R-CNN" + ], [ "verb", - "single-verb", - 3749305213430885773, + "compound-verb", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 8106396517344986388, - 5854485364096172979, + 7743689594175537908, + 4826765463732452457, 18446744073709551615, 18446744073709551615, - 559, - 566, - 559, - 566, - 102, - 103, + 473, + 502, + 473, + 502, + 86, + 91, true, - "develop", - "develop" + "is also performing quite well", + "is also performing quite well" ], [ "verb", - "single-verb", - 3749305213430885773, + "compound-verb", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 389609625618412480, - 3672855485569275414, + 14568989124066371477, + 1068965357575472568, 18446744073709551615, 18446744073709551615, - 619, - 623, - 619, - 623, - 112, - 113, + 508, + 520, + 508, + 520, + 93, + 95, true, - "make", - "make" + "has slightly", + "has slightly" ], [ - "verb", - "single-verb", - 3749305213430885773, + "term", + "enum-term-mark-2", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 329104159209890617, - 13606843864069204390, + 767578358531619449, + 1472685584560746355, 18446744073709551615, 18446744073709551615, - 733, - 738, - 733, - 738, - 127, - 128, + 527, + 547, + 527, + 547, + 96, + 99, true, - "gives", - "gives" + "precision and recall", + "precision and recall" ], [ - "verb", - "single-verb", - 3749305213430885773, + "term", + "single-term", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 5305301449677211216, - 8681985492456152514, + 6184954595655792282, + 18387321712019273929, 18446744073709551615, 18446744073709551615, - 801, - 811, - 801, - 811, - 140, - 141, + 527, + 536, + 527, + 536, + 96, + 97, true, - "eliminates", - "eliminates" + "precision", + "precision" ], [ - "verb", - "single-verb", - 3749305213430885773, + "term", + "single-term", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 329104161505838030, - 13472448784809337111, + 16904814894749305757, + 5737021334745277149, 18446744073709551615, 18446744073709551615, - 836, - 841, - 836, - 841, - 144, - 145, + 541, + 555, + 541, + 555, + 98, + 100, true, - "makes", - "makes" + "recall numbers", + "recall numbers" ], [ - "conn", - "single-conn", - 3749305213430885773, + "sentence", + "", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 1993790582685692910, - 3267300742396852093, + 14420414998277701657, + 3037581738866623003, 18446744073709551615, 18446744073709551615, - 855, - 869, - 855, - 869, - 147, - 149, + 557, + 667, + 557, + 667, + 101, + 119, true, - "robust against", - "robust against" + "We believe this originates from the selective search algorithm which is used to determine regions of interest.", + "We believe this originates from the selective search algorithm which is used to determine regions of interest." ], [ - "conn", - "single-conn", - 3749305213430885773, + "verb", + "single-verb", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 8106396862006371970, - 13009000795262405678, + 8106397860663428876, + 16964248131253901291, 18446744073709551615, 18446744073709551615, - 0, - 7, - 0, - 7, - 0, - 2, + 560, + 567, + 560, + 567, + 102, + 103, true, - "In this", - "In this" + "believe", + "believe" ], [ - "conn", - "single-conn", - 3749305213430885773, + "verb", + "single-verb", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 16381206566339127348, - 15334506191466791715, + 13983620007877845674, + 12955352785275452378, 18446744073709551615, 18446744073709551615, - 33, - 39, - 33, - 39, - 7, - 9, + 573, + 583, + 573, + 583, + 104, + 105, true, - "on the", - "on the" + "originates", + "originates" ], [ "conn", "single-conn", - 3749305213430885773, + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 15441160910541485670, - 4857876500911665887, + 14637917359887717745, + 1544745809668392834, 18446744073709551615, 18446744073709551615, - 71, - 73, - 71, - 73, - 14, - 15, + 584, + 592, + 584, + 592, + 105, + 107, true, - "of", - "of" + "from the", + "from the" ], [ - "conn", - "single-conn", - 3749305213430885773, + "term", + "single-term", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 389609625698622943, - 3653991554605439637, + 4349380732135272089, + 16458298459980248480, 18446744073709551615, 18446744073709551615, - 105, - 109, - 105, - 109, - 19, - 20, + 593, + 619, + 593, + 619, + 107, + 110, true, - "into", - "into" + "selective search algorithm", + "selective search algorithm" ], [ - "conn", - "single-conn", - 3749305213430885773, + "verb", + "compound-verb", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 15441160910541485670, - 4857876500911708855, + 16534452113033443144, + 7065494418204761025, 18446744073709551615, 18446744073709551615, - 168, - 170, - 168, - 170, - 30, - 31, + 626, + 646, + 626, + 646, + 111, + 115, true, - "of", - "of" + "is used to determine", + "is used to determine" ], [ "conn", "single-conn", - 3749305213430885773, + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 389609625700764258, - 3654402694655081504, + 15441160910541485865, + 10477275215095322459, 18446744073709551615, 18446744073709551615, - 171, - 175, - 171, - 175, - 31, - 33, + 634, + 636, + 634, + 636, + 113, + 114, true, - "as a", - "as a" + "to", + "to" ], [ - "conn", - "single-conn", - 3749305213430885773, + "term", + "single-term", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 14638855195670894879, - 12124056112419286236, + 8106478448964548679, + 12701825139671272799, 18446744073709551615, 18446744073709551615, - 186, - 194, - 186, - 194, - 35, - 37, + 647, + 654, + 647, + 654, + 115, + 116, true, - "which at", - "which at" + "regions", + "regions" ], [ "conn", "single-conn", - 3749305213430885773, + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 12178341415895623120, - 5842693827432037020, + 15441160910541485670, + 10477275256518301113, 18446744073709551615, 18446744073709551615, - 311, - 314, - 311, - 314, - 57, - 58, + 655, + 657, + 655, + 657, + 116, + 117, true, - "out", - "out" + "of", + "of" ], [ - "conn", - "single-conn", - 3749305213430885773, + "term", + "single-term", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 389609625620237736, - 3672771697496836670, + 14637953883246475850, + 7956817731702541219, 18446744073709551615, 18446744073709551615, - 315, - 319, - 315, - 319, - 58, - 60, + 658, + 666, + 658, + 666, + 117, + 118, true, - "of a", - "of a" + "interest", + "interest" ], [ - "conn", - "single-conn", - 3749305213430885773, + "sentence", + "", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 15441160910541485670, - 4857876500911649386, + 14678097696923692160, + 11491609575789433741, 18446744073709551615, 18446744073709551615, - 324, - 326, - 324, - 326, - 61, - 62, + 668, + 773, + 668, + 773, + 119, + 139, true, - "of", - "of" + "The images we feed it are not typical photographic images (made with a camera) but layout visualisations.", + "The images we feed it are not typical photographic images (made with a camera) but layout visualisations." ], [ - "conn", - "single-conn", - 3749305213430885773, + "term", + "single-term", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 15441160910541486538, - 4857876073127839401, + 16381206560620045048, + 3784940468244328560, 18446744073709551615, 18446744073709551615, - 351, - 353, - 351, - 353, - 64, - 65, + 672, + 678, + 672, + 678, + 120, + 121, true, - "in", - "in" + "images", + "images" ], [ - "conn", - "single-conn", - 3749305213430885773, + "verb", + "single-verb", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 13852121904094090198, - 14590995273314953312, + 389609625697838276, + 1239402610955961201, 18446744073709551615, 18446744073709551615, - 376, - 389, - 376, - 389, - 69, - 72, + 682, + 686, + 682, + 686, + 122, + 123, true, - "Each of these", - "Each of these" + "feed", + "feed" ], [ - "conn", - "single-conn", - 3749305213430885773, + "verb", + "compound-verb", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 15441160910541486989, - 4857876114906482442, + 8106397797831668975, + 18220343756781523026, 18446744073709551615, 18446744073709551615, - 420, - 422, - 420, - 422, - 76, - 77, - true, - "by", - "by" + 690, + 697, + 690, + 697, + 124, + 126, + true, + "are not", + "are not" ], [ - "conn", - "single-conn", - 3749305213430885773, + "term", + "single-term", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 752127337293867046, - 13713074507145666172, + 2351536754407393176, + 12969141846351017301, 18446744073709551615, 18446744073709551615, - 585, - 596, - 585, - 596, - 105, - 107, + 698, + 725, + 698, + 725, + 126, + 129, true, - "against the", - "against the" + "typical photographic images", + "typical photographic images" ], [ - "conn", - "single-conn", - 3749305213430885773, + "parenthesis", + "round brackets", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 15441160910541480354, - 4857876037199396344, + 5763721985249138201, + 11333613653010201493, 18446744073709551615, 18446744073709551615, - 607, - 609, - 607, - 609, - 109, - 110, + 726, + 746, + 726, + 746, + 129, + 135, true, - "In", - "In" + "(made with a camera)", + "(made with a camera)" ], [ - "conn", - "single-conn", - 3749305213430885773, + "verb", + "single-verb", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 8106478041484051995, - 2311188108209868134, + 389609625618411791, + 1242783662433971802, 18446744073709551615, 18446744073709551615, - 681, - 688, - 681, - 688, - 121, - 122, + 727, + 731, + 727, + 731, + 130, + 131, true, - "through", - "through" + "made", + "made" ], [ "conn", "single-conn", - 3749305213430885773, + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 15441160910541485865, - 4857876500092540787, + 16381206557726458966, + 3788832551851477825, 18446744073709551615, 18446744073709551615, - 474, - 476, - 474, - 476, - 88, - 89, + 732, + 738, + 732, + 738, + 131, + 133, true, - "to", - "to" + "with a", + "with a" ], [ - "conn", - "single-conn", - 3749305213430885773, + "term", + "single-term", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 15441160910541485865, - 4857876500092539243, + 16381206563351041630, + 1952046848832586628, 18446744073709551615, 18446744073709551615, - 501, - 503, - 501, - 503, - 92, - 93, + 739, + 745, + 739, + 745, + 133, + 134, true, - "to", - "to" + "camera", + "camera" ], [ - "conn", - "single-conn", - 3749305213430885773, + "term", + "single-term", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 15441160910541485865, - 4857876500092543847, + 18245848170103364623, + 3851473044777784430, 18446744073709551615, 18446744073709551615, - 556, - 558, - 556, - 558, - 101, - 102, + 751, + 772, + 751, + 772, + 136, + 138, true, - "to", - "to" + "layout visualisations", + "layout visualisations" ], [ - "conn", - "single-conn", - 3749305213430885773, + "sentence", + "", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 15441160910541485865, - 4857876500092547312, + 1336288703622935510, + 15435580690586079242, 18446744073709551615, 18446744073709551615, - 616, - 618, - 616, - 618, - 111, - 112, + 774, + 867, + 774, + 867, + 139, + 156, true, - "to", - "to" + "The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", + "The selective search algorithm in Faster R-CNN might not be optimal for such type of objects." ], [ - "conn", - "single-conn", - 3749305213430885773, + "term", + "single-term", + 2569392033451362672, "TEXT", - "#/texts/12", + "#/texts/54", 1.0, - 15441160910541485865, - 4857876500092426670, + 4349380732135272089, + 16458298459980260537, 18446744073709551615, 18446744073709551615, - 767, - 769, - 767, - 769, - 134, - 135, + 778, + 804, + 778, + 804, + 140, + 143, true, - "to", - "to" + "selective search algorithm", + "selective search algorithm" ], [ "numval", @@ -10137,592 +10250,634 @@ "2" ], [ - "expression", - "word-concatenation", - 3409470577915009676, + "sentence", + "", + 14539041145469267811, "TEXT", - "#/texts/13", + "#/texts/55", 1.0, - 5044385734724420019, - 14795950652192688492, + 7718133462399744108, + 17823198661305637266, 18446744073709551615, 18446744073709551615, - 175, - 191, - 175, - 191, - 34, - 35, + 0, + 31, + 0, + 31, + 0, + 5, true, - "state-of-the-art", - "state-of-the-art" + "3.4.3 Template specific Models.", + "3.4.3 Template specific Models." ], [ - "numval", - "ival", - 17187299362680072378, + "expression", + "wtoken-concatenation", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 17767354399704235163, - 1719697440342653142, + 329104147725158908, + 18028372742913290156, 18446744073709551615, 18446744073709551615, - 33, - 34, - 33, - 34, + 0, 5, - 6, + 0, + 5, + 0, + 1, true, - "3", - "3" + "3.4.3", + "3.4.3" ], [ - "numval", - "ival", - 17187299362680072378, + "term", + "single-term", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 17767354399704235156, - 1719697438695412307, + 11907907877741579530, + 940094317087021995, 18446744073709551615, 18446744073709551615, - 105, - 106, - 105, - 106, - 20, - 21, + 6, + 30, + 6, + 30, + 1, + 4, true, - "4", - "4" + "Template specific Models", + "Template specific Models" ], [ - "numval", - "ival", - 17187299362680072378, + "sentence", + "", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 17767354399704235157, - 1719697440128552642, + 10092485441396158590, + 1921679794908306598, 18446744073709551615, 18446744073709551615, - 301, - 302, - 301, - 302, - 58, - 59, + 32, + 159, + 32, + 159, + 5, + 27, true, - "5", - "5" + "The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template.", + "The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template." ], [ - "parenthesis", - "round brackets", - 17187299362680072378, + "term", + "single-term", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 12960504640524214008, - 7549890404163577655, + 389609625699055241, + 14883359024073212478, 18446744073709551615, 18446744073709551615, - 216, - 243, - 216, - 243, - 41, - 48, + 36, + 40, + 36, + 40, + 6, + 7, true, - "(both in users and content)", - "(both in users and content)" + "goal", + "goal" ], [ - "expression", - "wtoken-concatenation", - 17187299362680072378, + "conn", + "single-conn", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 329104161622136223, - 9304407318657891408, + 15441160910541485670, + 1662040798765251967, 18446744073709551615, 18446744073709551615, - 334, - 339, - 334, - 339, - 65, - 66, + 41, + 43, + 41, + 43, + 7, + 8, true, - "w.r.t", - "w.r.t" + "of", + "of" ], [ - "sentence", - "", - 17187299362680072378, + "term", + "single-term", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 13562905925698502846, - 15259172910127558115, + 3663813169945470735, + 17139564151051767194, 18446744073709551615, 18446744073709551615, - 22, - 93, - 22, - 93, - 3, - 18, + 44, + 68, + 44, + 68, + 8, + 11, true, - "In Section 3, we present the design of the platform and its components.", - "In Section 3, we present the design of the platform and its components." + "template specific models", + "template specific models" ], [ - "sentence", - "", - 17187299362680072378, + "verb", + "compound-verb", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 1592324435600311370, - 6679784309178480570, + 6623118764989562485, + 9686528964214635468, 18446744073709551615, 18446744073709551615, - 94, - 280, - 94, - 280, - 18, - 54, + 69, + 81, + 69, + 81, + 11, + 14, true, - "In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively.", - "In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively." + "is to obtain", + "is to obtain" ], [ - "sentence", - "", - 17187299362680072378, + "conn", + "single-conn", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 18017856606572388707, - 11119000415778134338, + 15441160910541485865, + 1662040545925493605, 18446744073709551615, 18446744073709551615, - 281, - 340, - 281, - 340, - 54, - 67, + 72, + 74, + 72, + 74, + 12, + 13, true, - "Finally, in Section 5, we discuss the open questions w.r.t.", - "Finally, in Section 5, we discuss the open questions w.r.t." + "to", + "to" ], [ "term", "single-term", - 17187299362680072378, + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 7362111305564357210, - 14399545547382599450, + 16960645913427248555, + 7662141651479474713, 18446744073709551615, 18446744073709551615, - 141, - 159, - 141, - 159, - 28, - 30, + 91, + 109, + 91, + 109, + 16, + 18, true, - "deployment methods", - "deployment methods" + "extraction quality", + "extraction quality" ], [ - "term", - "single-term", - 17187299362680072378, + "conn", + "single-conn", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 14592782398836220527, - 13536879568188234094, + 15441160910541486989, + 1662040951000079940, 18446744073709551615, 18446744073709551615, - 178, - 193, - 178, - 193, - 35, - 37, + 110, + 112, + 110, + 112, + 18, + 19, true, - "platform scales", - "platform scales" + "by", + "by" ], [ - "term", - "single-term", - 17187299362680072378, + "verb", + "single-verb", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 4421383392096991748, - 6284876151106966992, + 15180748647375949898, + 15041926949817059678, 18446744073709551615, 18446744073709551615, - 248, - 265, - 248, - 265, - 49, - 51, + 113, + 125, + 113, + 125, + 19, + 20, true, - "compute resources", - "compute resources" + "specializing", + "specializing" ], [ "term", "single-term", - 17187299362680072378, + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 8051609034415273401, - 10487247228020021805, + 329104161610777240, + 15370809836743986311, 18446744073709551615, 18446744073709551615, - 319, - 333, - 319, - 333, - 63, - 65, + 130, + 135, + 130, + 135, + 21, + 22, true, - "open questions", - "open questions" + "model", + "model" ], [ - "term", - "single-term", - 17187299362680072378, + "conn", + "single-conn", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 8106352240078799135, - 10120178145746215787, + 389609625618762887, + 14878547061345061059, 18446744073709551615, 18446744073709551615, - 25, - 32, - 25, - 32, - 4, - 5, + 136, + 140, + 136, + 140, + 22, + 24, true, - "Section", - "Section" + "on a", + "on a" ], [ "term", "single-term", - 17187299362680072378, + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 16381206568241679420, - 8738387660838128289, + 10137510760641589283, + 15174113578041628274, 18446744073709551615, 18446744073709551615, - 51, - 57, - 51, - 57, - 10, - 11, + 141, + 158, + 141, + 158, + 24, + 26, true, - "design", - "design" + "specific template", + "specific template" ], [ - "term", - "single-term", - 17187299362680072378, + "sentence", + "", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 14814125365076808131, - 2092259178040575550, + 15812734743858168044, + 5104988671183900609, 18446744073709551615, 18446744073709551615, - 65, - 73, - 65, - 73, - 13, - 14, + 160, + 272, + 160, + 272, + 27, + 47, true, - "platform", - "platform" + "This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance.", + "This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance." ], [ - "term", - "single-term", - 17187299362680072378, + "verb", + "single-verb", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 2703018952916355661, - 11574998382432588793, + 15441160910541486535, + 1662040640859036333, 18446744073709551615, 18446744073709551615, - 82, - 92, - 82, - 92, - 16, - 17, + 165, + 167, + 165, + 167, + 28, + 29, true, - "components", - "components" + "is", + "is" ], [ - "term", - "single-term", - 17187299362680072378, + "conn", + "single-conn", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 8106352240078799135, - 10120178145746219109, + 3701312585595488544, + 14499465500010376427, 18446744073709551615, 18446744073709551615, - 97, - 104, - 97, - 104, - 19, - 20, + 168, + 180, + 168, + 180, + 29, + 31, true, - "Section", - "Section" + "necessary in", + "necessary in" ], [ "term", "single-term", - 17187299362680072378, + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 11899564443746965611, - 8824822780498807299, + 7342862043108457350, + 10866470711373289678, 18446744073709551615, 18446744073709551615, - 123, - 135, - 123, - 135, - 25, - 26, + 181, + 202, + 181, + 202, + 31, + 34, true, - "architecture", - "architecture" + "many technical fields", + "many technical fields" ], [ "term", "single-term", - 17187299362680072378, + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 16381206521526353544, - 8079494218851857408, + 14650440612701450082, + 10632661340355574917, 18446744073709551615, 18446744073709551615, - 199, - 205, - 199, - 205, + 214, + 222, + 214, + 222, + 37, 38, - 39, true, - "regard", - "regard" + "accuracy", + "accuracy" ], [ - "term", - "single-term", - 17187299362680072378, + "conn", + "single-conn", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 16381206519640398140, - 414969871814550286, + 16381206565712212855, + 5026312373792128532, 18446744073709551615, 18446744073709551615, - 209, - 215, - 209, - 215, + 223, + 229, + 223, + 229, + 38, 40, - 41, true, - "volume", - "volume" + "of the", + "of the" ], [ - "term", - "single-term", - 17187299362680072378, + "verb", + "single-verb", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 329104159157820437, - 13127829657860064361, + 6168374324562720592, + 8408511475472730744, 18446744073709551615, 18446744073709551615, - 225, 230, - 225, + 239, 230, - 44, - 45, + 239, + 40, + 41, true, - "users", - "users" + "extracted", + "extracted" ], [ "term", "single-term", - 17187299362680072378, + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 8106398484416916345, - 11293518884131724477, + 389609625696431489, + 14876459829455684771, 18446744073709551615, 18446744073709551615, - 235, - 242, - 235, - 242, - 46, - 47, + 240, + 244, + 240, + 244, + 41, + 42, true, - "content", - "content" + "data", + "data" + ], + [ + "verb", + "single-verb", + 14539041145469267811, + "TEXT", + "#/texts/55", + 1.0, + 15441160910541486535, + 1662040640859038873, + 18446744073709551615, + 18446744073709551615, + 245, + 247, + 245, + 247, + 42, + 43, + true, + "is", + "is" + ], + [ + "conn", + "single-conn", + 14539041145469267811, + "TEXT", + "#/texts/55", + 1.0, + 15441160910541485670, + 1662040798765106998, + 18446744073709551615, + 18446744073709551615, + 248, + 250, + 248, + 250, + 43, + 44, + true, + "of", + "of" ], [ "term", "single-term", - 17187299362680072378, + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 8106352240078799135, - 10120178145746003140, + 3376407656379762908, + 17651500245932752692, 18446744073709551615, 18446744073709551615, - 293, - 300, - 293, - 300, - 57, - 58, + 251, + 271, + 251, + 271, + 44, + 46, true, - "Section", - "Section" + "paramount importance", + "paramount importance" ], [ - "verb", - "single-verb", - 17187299362680072378, + "sentence", + "", + 14539041145469267811, "TEXT", - "#/texts/14", + "#/texts/55", 1.0, - 8106476016677076976, - 9844196961628278464, + 551135567978634707, + 9805137836117614428, 18446744073709551615, 18446744073709551615, - 39, - 46, - 39, - 46, - 8, - 9, + 273, + 460, + 273, + 460, + 47, + 78, true, - "present", - "present" + "Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality.", + "Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality." ], [ - "verb", - "single-verb", + "term", + "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, - 8106397868479560363, - 8170627791942563001, + 1525875096007260836, + 12021364178741137402, 18446744073709551615, 18446744073709551615, - 111, - 118, - 111, - 118, - 23, - 24, + 381, + 392, + 381, + 392, + 74, + 75, true, - "discuss", - "discuss" + "development", + "development" ], [ - "verb", - "single-verb", + "term", + "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, - 8106397868479560363, - 8170627791941832362, + 2969135035769345619, + 4036418751139797908, 18446744073709551615, 18446744073709551615, - 307, - 314, - 307, - 314, - 61, - 62, + 0, + 20, + 0, + 20, + 0, + 2, true, - "discuss", - "discuss" + "processing solutions", + "processing solutions" ], [ - "verb", - "single-verb", + "sentence", + "", 17187299362680072378, "TEXT", "#/texts/14", 1.0, - 329104161622136223, - 9304407318657891408, + 13562905925698502846, + 15259172910127558115, 18446744073709551615, 18446744073709551615, - 334, - 339, - 334, - 339, - 65, - 66, + 22, + 93, + 22, + 93, + 3, + 18, true, - "w.r.t", - "w.r.t" + "In Section 3, we present the design of the platform and its components.", + "In Section 3, we present the design of the platform and its components." ], [ "conn", @@ -10746,88 +10901,88 @@ "In" ], [ - "conn", - "single-conn", + "term", + "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, - 16381206565712212855, - 8774362010989401403, + 8106352240078799135, + 10120178145746215787, 18446744073709551615, 18446744073709551615, - 58, - 64, - 58, - 64, - 11, - 13, + 25, + 32, + 25, + 32, + 4, + 5, true, - "of the", - "of the" + "Section", + "Section" ], [ - "conn", - "single-conn", + "numval", + "ival", 17187299362680072378, "TEXT", "#/texts/14", 1.0, - 15441160910541480354, - 13110915667349571689, + 17767354399704235163, + 1719697440342653142, 18446744073709551615, 18446744073709551615, - 94, - 96, - 94, - 96, - 18, - 19, + 33, + 34, + 33, + 34, + 5, + 6, true, - "In", - "In" + "3", + "3" ], [ - "conn", - "single-conn", + "verb", + "single-verb", 17187299362680072378, "TEXT", "#/texts/14", 1.0, - 389609625618037948, - 4823273682945992581, + 8106476016677076976, + 9844196961628278464, 18446744073709551615, 18446744073709551615, - 194, - 198, - 194, - 198, - 37, - 38, + 39, + 46, + 39, + 46, + 8, + 9, true, - "with", - "with" + "present", + "present" ], [ - "conn", - "single-conn", + "term", + "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, - 8106396909821462677, - 1088815676641410103, + 16381206568241679420, + 8738387660838128289, 18446744073709551615, 18446744073709551615, - 217, - 224, - 217, - 224, - 42, - 44, + 51, + 57, + 51, + 57, + 10, + 11, true, - "both in", - "both in" + "design", + "design" ], [ "conn", @@ -10836,323 +10991,428 @@ "TEXT", "#/texts/14", 1.0, - 15441160910541486538, - 13110916059597983243, + 16381206565712212855, + 8774362010989401403, 18446744073709551615, 18446744073709551615, - 290, - 292, - 290, - 292, - 56, - 57, + 58, + 64, + 58, + 64, + 11, + 13, true, - "in", - "in" + "of the", + "of the" ], [ - "conn", - "single-conn", + "term", + "single-term", 17187299362680072378, "TEXT", "#/texts/14", 1.0, - 15441160910541485865, - 13110915963300809577, + 14814125365076808131, + 2092259178040575550, 18446744073709551615, 18446744073709551615, - 206, - 208, - 206, - 208, - 39, - 40, + 65, + 73, + 65, + 73, + 13, + 14, true, - "to", - "to" + "platform", + "platform" ], [ - "numval", - "ival", - 697648145931166262, + "term", + "single-term", + 17187299362680072378, "TEXT", - "#/texts/15", + "#/texts/14", 1.0, - 17767354399704235162, - 7083995155582974975, + 2703018952916355661, + 11574998382432588793, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 82, + 92, + 82, + 92, + 16, + 17, true, - "2", - "2" + "components", + "components" ], [ - "numval", - "ival", - 7935233310532930917, + "sentence", + "", + 17187299362680072378, "TEXT", - "#/texts/16", + "#/texts/14", 1.0, - 17767354399704235163, - 2552838057434759723, + 1592324435600311370, + 6679784309178480570, 18446744073709551615, 18446744073709551615, - 130, - 131, - 130, - 131, + 94, + 280, + 94, + 280, + 18, + 54, + true, + "In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively.", + "In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively." + ], + [ + "conn", + "single-conn", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 15441160910541480354, + 13110915667349571689, + 18446744073709551615, + 18446744073709551615, + 94, + 96, + 94, + 96, + 18, + 19, + true, + "In", + "In" + ], + [ + "term", + "single-term", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 8106352240078799135, + 10120178145746219109, + 18446744073709551615, + 18446744073709551615, + 97, + 104, + 97, + 104, + 19, 20, - 21, true, - "3", - "3" + "Section", + "Section" ], [ "numval", "ival", - 7935233310532930917, + 17187299362680072378, "TEXT", - "#/texts/16", + "#/texts/14", 1.0, 17767354399704235156, - 2552838057671732941, + 1719697438695412307, 18446744073709551615, 18446744073709551615, - 133, - 134, - 133, - 134, - 22, - 23, + 105, + 106, + 105, + 106, + 20, + 21, true, "4", "4" ], [ - "parenthesis", - "square brackets", - 7935233310532930917, + "verb", + "single-verb", + 17187299362680072378, "TEXT", - "#/texts/16", + "#/texts/14", 1.0, - 16381206577288742091, - 6894361769431189204, + 8106397868479560363, + 8170627791942563001, 18446744073709551615, 18446744073709551615, - 129, - 135, - 129, - 135, - 19, + 111, + 118, + 111, + 118, + 23, 24, true, - "[3, 4]", - "[3, 4]" + "discuss", + "discuss" ], [ - "expression", - "common", - 7935233310532930917, + "term", + "single-term", + 17187299362680072378, "TEXT", - "#/texts/16", + "#/texts/14", 1.0, - 15441160910541486545, - 16301782680726802891, + 11899564443746965611, + 8824822780498807299, 18446744073709551615, 18446744073709551615, - 558, - 562, - 558, - 562, - 101, - 102, + 123, + 135, + 123, + 135, + 25, + 26, true, - "ie", - "i.e." + "architecture", + "architecture" ], [ - "expression", - "word-concatenation", - 7935233310532930917, + "term", + "single-term", + 17187299362680072378, "TEXT", - "#/texts/16", + "#/texts/14", 1.0, - 14650469740546809126, - 13134297167790756810, + 7362111305564357210, + 14399545547382599450, 18446744073709551615, 18446744073709551615, - 741, - 749, - 741, - 749, - 135, - 136, + 141, + 159, + 141, + 159, + 28, + 30, true, - "JSON/XML", - "JSON/XML" + "deployment methods", + "deployment methods" ], [ - "sentence", - "", - 7935233310532930917, + "term", + "single-term", + 17187299362680072378, "TEXT", - "#/texts/16", + "#/texts/14", 1.0, - 7413762744011699502, - 5112650843480238838, + 14592782398836220527, + 13536879568188234094, 18446744073709551615, 18446744073709551615, - 0, - 136, - 0, - 136, - 0, - 25, + 178, + 193, + 178, + 193, + 35, + 37, true, - "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4].", - "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4]." + "platform scales", + "platform scales" ], [ - "sentence", - "", - 7935233310532930917, + "conn", + "single-conn", + 17187299362680072378, "TEXT", - "#/texts/16", + "#/texts/14", 1.0, - 5916877018351655351, - 15887401132590714495, + 389609625618037948, + 4823273682945992581, 18446744073709551615, 18446744073709551615, - 137, - 205, - 137, - 205, - 25, + 194, + 198, + 194, + 198, + 37, 38, true, - "Broadly speaking, there are two types of approaches to this problem.", - "Broadly speaking, there are two types of approaches to this problem." + "with", + "with" ], [ - "sentence", - "", - 7935233310532930917, + "term", + "single-term", + 17187299362680072378, "TEXT", - "#/texts/16", + "#/texts/14", 1.0, - 6270962906961324285, - 7804853914853957296, + 16381206521526353544, + 8079494218851857408, 18446744073709551615, 18446744073709551615, - 206, - 359, - 206, - 359, + 199, + 205, + 199, + 205, 38, - 66, + 39, true, - "In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document.", - "In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document." + "regard", + "regard" ], [ - "sentence", - "", - 7935233310532930917, + "conn", + "single-conn", + 17187299362680072378, "TEXT", - "#/texts/16", + "#/texts/14", 1.0, - 1323125914001755357, - 2708959011598697473, + 15441160910541485865, + 13110915963300809577, 18446744073709551615, 18446744073709551615, - 360, - 443, - 360, - 443, - 66, - 83, + 206, + 208, + 206, + 208, + 39, + 40, true, - "This can be done through a conversion from PDF towards HTML or MS Word for example.", - "This can be done through a conversion from PDF towards HTML or MS Word for example." + "to", + "to" ], [ - "sentence", - "", - 7935233310532930917, + "term", + "single-term", + 17187299362680072378, "TEXT", - "#/texts/16", + "#/texts/14", 1.0, - 7959677268287021834, - 6471587455066159969, + 16381206519640398140, + 414969871814550286, 18446744073709551615, 18446744073709551615, - 444, - 711, - 444, - 711, - 83, - 128, + 209, + 215, + 209, + 215, + 40, + 41, true, - "The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format.", - "The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format." + "volume", + "volume" ], [ - "sentence", - "", - 7935233310532930917, + "parenthesis", + "round brackets", + 17187299362680072378, "TEXT", - "#/texts/16", + "#/texts/14", 1.0, - 9906268904976001851, - 12420000417227776440, + 12960504640524214008, + 7549890404163577655, 18446744073709551615, 18446744073709551615, - 712, - 780, - 712, - 780, - 128, - 142, + 216, + 243, + 216, + 243, + 41, + 48, true, - "For example, this could be a JSON/XML file with a particular schema.", - "For example, this could be a JSON/XML file with a particular schema." + "(both in users and content)", + "(both in users and content)" ], [ - "sentence", - "", - 7935233310532930917, + "conn", + "single-conn", + 17187299362680072378, "TEXT", - "#/texts/16", + "#/texts/14", 1.0, - 9067254065901696428, - 10388894100200420496, + 8106396909821462677, + 1088815676641410103, 18446744073709551615, 18446744073709551615, - 781, - 955, - 781, - 955, - 142, - 173, + 217, + 224, + 217, + 224, + 42, + 44, true, - "Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution.", - "Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution." + "both in", + "both in" ], [ "term", - "enum-term-mark-4", - 7935233310532930917, + "single-term", + 17187299362680072378, "TEXT", - "#/texts/16", + "#/texts/14", 1.0, - 2074372556278321470, - 3687797441781668801, + 329104159157820437, + 13127829657860064361, + 18446744073709551615, + 18446744073709551615, + 225, + 230, + 225, + 230, + 44, + 45, + true, + "users", + "users" + ], + [ + "term", + "single-term", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 8106398484416916345, + 11293518884131724477, + 18446744073709551615, + 18446744073709551615, + 235, + 242, + 235, + 242, + 46, + 47, + true, + "content", + "content" + ], + [ + "term", + "single-term", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 4421383392096991748, + 6284876151106966992, + 18446744073709551615, + 18446744073709551615, + 248, + 265, + 248, + 265, + 49, + 51, + true, + "compute resources", + "compute resources" + ], + [ + "term", + "enum-term-mark-4", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 2074372556278321470, + 3687797441781668801, 18446744073709551615, 18446744073709551615, 415, @@ -11168,233 +11428,233 @@ [ "term", "single-term", - 7935233310532930917, + 14539041145469267811, "TEXT", - "#/texts/16", + "#/texts/55", 1.0, - 12653831733608918357, - 1251885133784117773, + 879437392081459464, + 10698589901478685905, 18446744073709551615, 18446744073709551615, - 23, - 36, - 23, - 36, - 4, - 6, + 286, + 310, + 286, + 310, + 49, + 52, true, - "PDF documents", - "PDF documents" + "many technical documents", + "many technical documents" ], [ - "term", - "single-term", - 7935233310532930917, + "conn", + "single-conn", + 14539041145469267811, "TEXT", - "#/texts/16", + "#/texts/55", 1.0, - 1649772470814702484, - 1849781250727403708, + 389609625698530964, + 14883385687690770855, 18446744073709551615, 18446744073709551615, - 41, - 73, - 41, - 73, - 7, - 10, + 311, + 315, + 311, + 315, + 52, + 54, true, - "automatic content reconstruction", - "automatic content reconstruction" + "in a", + "in a" ], [ "term", "single-term", - 7935233310532930917, + 14539041145469267811, "TEXT", - "#/texts/16", + "#/texts/55", 1.0, - 4649638595618642234, - 17675128594551486840, + 15130402050161305835, + 1457144697725364176, 18446744073709551615, 18446744073709551615, - 86, - 105, - 86, - 105, - 13, - 15, + 316, + 330, + 316, + 330, + 54, + 56, true, - "outstanding problem", - "outstanding problem" + "specific field", + "specific field" ], [ - "term", - "single-term", - 7935233310532930917, + "verb", + "single-verb", + 14539041145469267811, "TEXT", - "#/texts/16", + "#/texts/55", 1.0, - 9088977435888678827, - 7025359603537163328, + 16381206574684919940, + 8690278604869594595, 18446744073709551615, 18446744073709551615, - 213, - 227, - 213, - 227, - 40, - 42, + 341, + 347, + 341, + 347, + 57, + 58, true, - "first approach", - "first approach" + "appear", + "appear" ], [ - "term", - "single-term", - 7935233310532930917, + "conn", + "single-conn", + 14539041145469267811, "TEXT", - "#/texts/16", + "#/texts/55", 1.0, - 5396697874491186037, - 9700463201577231321, + 389609625698530964, + 14883385687690756753, 18446744073709551615, 18446744073709551615, - 320, - 342, - 320, - 342, - 59, - 62, + 348, + 352, + 348, + 352, + 58, + 60, true, - "original visual layout", - "original visual layout" + "in a", + "in a" ], [ "term", "single-term", - 7935233310532930917, + 14539041145469267811, "TEXT", - "#/texts/16", + "#/texts/55", 1.0, - 8106471324341093100, - 10896171766474086033, + 5723400002059657755, + 8384905200420629131, 18446744073709551615, 18446744073709551615, - 423, - 430, - 423, - 430, - 78, - 80, + 353, + 369, + 353, + 369, + 60, + 62, true, - "MS Word", - "MS Word" + "certain template", + "certain template" ], [ - "term", - "single-term", - 7935233310532930917, + "verb", + "single-verb", + 14539041145469267811, "TEXT", - "#/texts/16", + "#/texts/55", 1.0, - 10632085908481842480, - 3848207310545898370, + 329104161505838030, + 15370325700124998836, 18446744073709551615, 18446744073709551615, - 448, - 472, - 448, - 472, - 84, - 87, + 383, + 388, + 383, + 388, + 65, + 66, true, - "second approach attempts", - "second approach attempts" + "makes", + "makes" ], [ "term", "single-term", - 7935233310532930917, + 14539041145469267811, "TEXT", - "#/texts/16", + "#/texts/55", 1.0, - 11738704476441755021, - 15052719376970997774, + 329104161787480235, + 15382185116652927163, 18446744073709551615, 18446744073709551615, - 670, - 687, - 670, - 687, - 121, - 123, + 389, + 394, + 389, + 394, + 66, + 67, true, - "original document", - "original document" + "sense", + "sense" ], [ - "term", - "single-term", - 7935233310532930917, + "conn", + "single-conn", + 14539041145469267811, "TEXT", - "#/texts/16", + "#/texts/55", 1.0, - 14630472899120924944, - 11642528133024722414, + 15441160910541485865, + 1662040545925472456, 18446744073709551615, 18446744073709551615, - 693, - 710, - 693, - 710, - 125, - 127, + 395, + 397, + 395, + 397, + 67, + 68, true, - "structured format", - "structured format" + "to", + "to" ], [ - "term", - "single-term", - 7935233310532930917, + "verb", + "single-verb", + 14539041145469267811, "TEXT", - "#/texts/16", + "#/texts/55", 1.0, - 673611805924135293, - 4470122145607424586, + 389609625631208371, + 14878114134196888026, 18446744073709551615, 18446744073709551615, - 741, - 754, - 741, - 754, - 135, - 137, + 398, + 402, + 398, + 402, + 68, + 69, true, - "JSON/XML file", - "JSON/XML file" + "take", + "take" ], [ "term", "single-term", - 7935233310532930917, + 14539041145469267811, "TEXT", - "#/texts/16", + "#/texts/55", 1.0, - 3982493928589580498, - 8690888332062541868, + 5946904284821171904, + 7436968498862967568, 18446744073709551615, 18446744073709551615, - 762, - 779, - 762, - 779, - 139, - 141, + 403, + 412, + 403, + 412, + 69, + 70, true, - "particular schema", - "particular schema" + "advantage", + "advantage" ], [ "term", @@ -11439,46 +11699,46 @@ "first step" ], [ - "term", - "single-term", - 7935233310532930917, + "parenthesis", + "round brackets", + 16175086861512378818, "TEXT", - "#/texts/16", + "#/figures/7/captions/0", 1.0, - 13157956405326233364, - 1973865905648942248, + 16921575733553257608, + 12281596461521835087, 18446744073709551615, 18446744073709551615, - 857, - 885, - 857, - 885, - 156, - 159, + 89, + 148, + 89, + 148, + 17, + 29, true, - "knowledge discovery platform", - "knowledge discovery platform" + "(each with four cores, running four local worker processes)", + "(each with four cores, running four local worker processes)" ], [ - "term", - "single-term", - 7935233310532930917, + "numval", + "ival", + 16175086861512378818, "TEXT", - "#/texts/16", + "#/figures/7/captions/0", 1.0, - 2940970869648856259, - 4641698687139622359, + 17767354399704235152, + 4062931237961089018, 18446744073709551615, 18446744073709551615, - 923, - 938, - 923, - 938, - 167, - 169, + 7, + 8, + 7, + 8, + 1, + 2, true, - "second approach", - "second approach" + "8", + "8" ], [ "term", @@ -11901,46 +12161,46 @@ "example" ], [ - "term", - "single-term", - 7935233310532930917, + "numval", + "ival", + 6667504298804810757, "TEXT", - "#/texts/16", + "#/figures/6/captions/0", 1.0, - 6167933651658664291, - 4335834381973654488, + 17767354399704235159, + 9715777791729971940, 18446744073709551615, 18446744073709551615, - 890, - 899, - 890, - 899, - 160, - 161, + 7, + 8, + 7, + 8, + 1, + 2, true, - "documents", - "documents" + "7", + "7" ], [ - "term", - "single-term", - 7935233310532930917, + "numval", + "ival", + 3206590615695639432, "TEXT", - "#/texts/16", + "#/figures/5/captions/0", 1.0, - 14635106751859230946, - 4735627980056120373, + 17767354399704235156, + 2534172456610010803, 18446744073709551615, 18446744073709551615, - 946, - 954, - 946, - 954, - 171, - 172, + 89, + 90, + 89, + 90, + 16, + 17, true, - "solution", - "solution" + "4", + "4" ], [ "verb", @@ -12069,25 +12329,25 @@ "is thought" ], [ - "verb", - "compound-verb", - 7935233310532930917, + "numval", + "ival", + 3206590615695639432, "TEXT", - "#/texts/16", + "#/figures/5/captions/0", 1.0, - 5518720680045185536, - 10887625114734223201, + 17767354399704235158, + 2534172456061785951, 18446744073709551615, 18446744073709551615, - 904, - 914, - 904, - 914, - 163, - 165, + 7, + 8, + 7, + 8, + 1, + 2, true, - "have opted", - "have opted" + "6", + "6" ], [ "verb", @@ -12699,88 +12959,88 @@ "as a" ], [ - "conn", - "single-conn", - 7935233310532930917, + "numval", + "ival", + 17801697261174341699, "TEXT", - "#/texts/16", + "#/figures/4/captions/0", 1.0, - 3512299892331381400, - 9603465650093366657, + 17767354399704235162, + 833845511957208288, 18446744073709551615, 18446744073709551615, - 847, - 856, - 847, - 856, - 154, - 156, + 220, + 221, + 220, + 221, + 45, + 46, true, - "towards a", - "towards a" + "2", + "2" ], [ - "conn", - "single-conn", - 7935233310532930917, + "numval", + "ival", + 17801697261174341699, "TEXT", - "#/texts/16", + "#/figures/4/captions/0", 1.0, - 12178341415895625940, - 5663899155610829905, + 17767354399704235157, + 833845513437915409, 18446744073709551615, 18446744073709551615, - 886, - 889, - 886, - 889, - 159, - 160, + 7, + 8, + 7, + 8, + 1, + 2, true, - "for", - "for" + "5", + "5" ], [ - "conn", - "single-conn", - 7935233310532930917, + "parenthesis", + "round brackets", + 7479698582664857938, "TEXT", - "#/texts/16", + "#/figures/3/captions/0", 1.0, - 8106397727991264470, - 13733498763290197426, + 5004423576043290598, + 6857855095860201593, 18446744073709551615, 18446744073709551615, - 915, - 922, - 915, - 922, - 165, - 167, + 236, + 276, + 236, + 276, + 44, + 53, true, - "for the", - "for the" + "(depicted by a vertical dashed red line)", + "(depicted by a vertical dashed red line)" ], [ - "conn", - "single-conn", - 7935233310532930917, + "parenthesis", + "round brackets", + 7479698582664857938, "TEXT", - "#/texts/16", + "#/figures/3/captions/0", 1.0, - 15441160910541486538, - 16301782677078975107, + 10200158520942631212, + 14814029506262268324, 18446744073709551615, 18446744073709551615, - 939, - 941, - 939, - 941, - 169, - 170, + 69, + 108, + 69, + 108, + 12, + 20, true, - "in", - "in" + "(Physical Review B and Elsevier papers)", + "(Physical Review B and Elsevier papers)" ], [ "conn", @@ -13160,6 +13420,48 @@ "proprietary solutions", "proprietary solutions" ], + [ + "term", + "single-term", + 2762070725424637531, + "TEXT", + "#/texts/17", + 1.0, + 10744767603644001295, + 6556612560521384278, + 18446744073709551615, + 18446744073709551615, + 270, + 291, + 270, + 291, + 46, + 48, + true, + "open-source solutions", + "open-source solutions" + ], + [ + "term", + "single-term", + 2762070725424637531, + "TEXT", + "#/texts/17", + 1.0, + 3984902188979412540, + 2758396569987389277, + 18446744073709551615, + 18446744073709551615, + 303, + 332, + 303, + 332, + 51, + 54, + true, + "proprietary solutions support", + "proprietary solutions support" + ], [ "term", "single-term", @@ -13286,6 +13588,27 @@ "DataCap^{7}", "DataCap$^{7}$" ], + [ + "term", + "single-term", + 2762070725424637531, + "TEXT", + "#/texts/17", + 1.0, + 14652282307509823780, + 1841895394656946508, + 18446744073709551615, + 18446744073709551615, + 254, + 262, + 254, + 262, + 43, + 44, + true, + "contrast", + "contrast" + ], [ "verb", "compound-verb", @@ -13433,6 +13756,48 @@ "of", "of" ], + [ + "conn", + "single-conn", + 2762070725424637531, + "TEXT", + "#/texts/17", + 1.0, + 15441160910541480354, + 9244457284230350460, + 18446744073709551615, + 18446744073709551615, + 251, + 253, + 251, + 253, + 42, + 43, + true, + "In", + "In" + ], + [ + "conn", + "single-conn", + 2762070725424637531, + "TEXT", + "#/texts/17", + 1.0, + 16381206519425733256, + 6316528514956069057, + 18446744073709551615, + 18446744073709551615, + 263, + 269, + 263, + 269, + 44, + 46, + true, + "to the", + "to the" + ], [ "parenthesis", "reference", @@ -13622,6 +13987,48 @@ "previous editions", "previous editions" ], + [ + "term", + "single-term", + 7536915191196259776, + "TEXT", + "#/texts/18", + 1.0, + 5303544497514782120, + 7345008805812505585, + 18446744073709551615, + 18446744073709551615, + 0, + 10, + 0, + 10, + 0, + 1, + true, + "extraction", + "extraction" + ], + [ + "term", + "single-term", + 7536915191196259776, + "TEXT", + "#/texts/18", + 1.0, + 6167933651658664291, + 12757348439776621126, + 18446744073709551615, + 18446744073709551615, + 24, + 33, + 24, + 33, + 3, + 4, + true, + "documents", + "documents" + ], [ "term", "single-term", @@ -13811,6 +14218,27 @@ "is actively addressed", "is actively addressed" ], + [ + "verb", + "single-verb", + 7536915191196259776, + "TEXT", + "#/texts/18", + 1.0, + 8106478648743879659, + 12679135976426961828, + 18446744073709551615, + 18446744073709551615, + 16, + 23, + 16, + 23, + 2, + 3, + true, + "scanned", + "scanned" + ], [ "verb", "single-verb", @@ -13895,6 +14323,27 @@ "posed", "posed" ], + [ + "conn", + "single-conn", + 7536915191196259776, + "TEXT", + "#/texts/18", + 1.0, + 389609625697843734, + 12402283298966649397, + 18446744073709551615, + 18446744073709551615, + 11, + 15, + 11, + 15, + 1, + 2, + true, + "from", + "from" + ], [ "conn", "single-conn", @@ -33194,6 +33643,69 @@ "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute.", "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute." ], + [ + "term", + "single-term", + 10982401368140758581, + "TEXT", + "#/texts/37", + 1.0, + 3197976581661651446, + 17591908013383021851, + 18446744073709551615, + 18446744073709551615, + 85, + 103, + 85, + 103, + 14, + 17, + true, + "vertical red lines", + "vertical red lines" + ], + [ + "term", + "single-term", + 10982401368140758581, + "TEXT", + "#/texts/37", + 1.0, + 4147505635383066832, + 7737381165149079765, + 18446744073709551615, + 18446744073709551615, + 150, + 165, + 150, + 165, + 25, + 27, + true, + "annotated pages", + "annotated pages" + ], + [ + "term", + "single-term", + 10982401368140758581, + "TEXT", + "#/texts/37", + 1.0, + 12895837410855552806, + 17306921502480655741, + 18446744073709551615, + 18446744073709551615, + 178, + 192, + 178, + 192, + 32, + 34, + true, + "improved model", + "improved model" + ], [ "term", "single-term", @@ -33278,6 +33790,48 @@ "minute", "minute" ], + [ + "term", + "single-term", + 10982401368140758581, + "TEXT", + "#/texts/37", + 1.0, + 14634153919632515335, + 1857646059609777193, + 18446744073709551615, + 18446744073709551615, + 120, + 128, + 120, + 128, + 20, + 21, + true, + "training", + "training" + ], + [ + "verb", + "compound-verb", + 10982401368140758581, + "TEXT", + "#/texts/37", + 1.0, + 13034167073558276041, + 14872878930615633276, + 18446744073709551615, + 18446744073709551615, + 129, + 142, + 129, + 142, + 21, + 23, + true, + "was performed", + "was performed" + ], [ "verb", "single-verb", @@ -33299,6 +33853,48 @@ "show", "show" ], + [ + "verb", + "single-verb", + 10982401368140758581, + "TEXT", + "#/texts/37", + 1.0, + 14637951605983202826, + 7813818474500339205, + 18446744073709551615, + 18446744073709551615, + 104, + 112, + 104, + 112, + 17, + 18, + true, + "indicate", + "indicate" + ], + [ + "verb", + "single-verb", + 10982401368140758581, + "TEXT", + "#/texts/37", + 1.0, + 15441160910541486535, + 2695916097507178008, + 18446744073709551615, + 18446744073709551615, + 193, + 195, + 193, + 195, + 34, + 35, + true, + "is", + "is" + ], [ "conn", "single-conn", @@ -33362,6 +33958,48 @@ "per", "per" ], + [ + "conn", + "single-conn", + 10982401368140758581, + "TEXT", + "#/texts/37", + 1.0, + 16381206519429333259, + 6325354017576163482, + 18446744073709551615, + 18446744073709551615, + 113, + 119, + 113, + 119, + 18, + 20, + true, + "that a", + "that a" + ], + [ + "conn", + "single-conn", + 10982401368140758581, + "TEXT", + "#/texts/37", + 1.0, + 16381206566339127348, + 1754758722220302331, + 18446744073709551615, + 18446744073709551615, + 143, + 149, + 143, + 149, + 23, + 25, + true, + "on the", + "on the" + ], [ "numval", "ival", @@ -33698,6 +34336,48 @@ "ground-truth collection", "ground-truth collection" ], + [ + "term", + "single-term", + 887751753527930563, + "TEXT", + "#/texts/38", + 1.0, + 329104161594416377, + 13413315670796926229, + 18446744073709551615, + 18446744073709551615, + 15, + 20, + 15, + 20, + 3, + 4, + true, + "point", + "point" + ], + [ + "term", + "single-term", + 887751753527930563, + "TEXT", + "#/texts/38", + 1.0, + 16381206590740615814, + 5715153085670543197, + 18446744073709551615, + 18446744073709551615, + 36, + 42, + 36, + 42, + 7, + 8, + true, + "labels", + "labels" + ], [ "term", "single-term", @@ -34013,6 +34693,48 @@ "leads to", "leads to" ], + [ + "verb", + "single-verb", + 887751753527930563, + "TEXT", + "#/texts/38", + 1.0, + 389609625632179144, + 7525453844240816178, + 18446744073709551615, + 18446744073709551615, + 0, + 4, + 0, + 4, + 0, + 1, + true, + "used", + "used" + ], + [ + "verb", + "single-verb", + 887751753527930563, + "TEXT", + "#/texts/38", + 1.0, + 8106476016678293182, + 14705001208406550988, + 18446744073709551615, + 18446744073709551615, + 24, + 31, + 24, + 31, + 5, + 6, + true, + "predict", + "predict" + ], [ "verb", "single-verb", @@ -34244,6 +34966,27 @@ "comes", "comes" ], + [ + "conn", + "single-conn", + 887751753527930563, + "TEXT", + "#/texts/38", + 1.0, + 6184771668677947934, + 317856897517446338, + 18446744073709551615, + 18446744073709551615, + 5, + 14, + 5, + 14, + 1, + 3, + true, + "from that", + "from that" + ], [ "conn", "single-conn", @@ -34475,6 +35218,27 @@ "for", "for" ], + [ + "conn", + "single-conn", + 887751753527930563, + "TEXT", + "#/texts/38", + 1.0, + 15441160910541485865, + 11951507086803887753, + 18446744073709551615, + 18446744073709551615, + 21, + 23, + 21, + 23, + 4, + 5, + true, + "to", + "to" + ], [ "conn", "single-conn", @@ -36155,6 +36919,27 @@ "precision metrics", "precision metrics" ], + [ + "term", + "single-term", + 15354930767839681193, + "TEXT", + "#/texts/41", + 1.0, + 10318747471677033167, + 11847785680345282579, + 18446744073709551615, + 18446744073709551615, + 425, + 443, + 425, + 443, + 80, + 82, + true, + "second observation", + "second observation" + ], [ "term", "single-term", @@ -36638,6 +37423,48 @@ "is", "is" ], + [ + "verb", + "single-verb", + 15354930767839681193, + "TEXT", + "#/texts/41", + 1.0, + 15441160910541486535, + 217027369425791545, + 18446744073709551615, + 18446744073709551615, + 444, + 446, + 444, + 446, + 82, + 83, + true, + "is", + "is" + ], + [ + "verb", + "single-verb", + 15354930767839681193, + "TEXT", + "#/texts/41", + 1.0, + 389609625696287852, + 94271090942472495, + 18446744073709551615, + 18446744073709551615, + 455, + 459, + 455, + 459, + 85, + 86, + true, + "deal", + "deal" + ], [ "conn", "single-conn", @@ -36785,6 +37612,48 @@ "with the", "with the" ], + [ + "conn", + "single-conn", + 15354930767839681193, + "TEXT", + "#/texts/41", + 1.0, + 389609625631229034, + 52497997003113109, + 18446744073709551615, + 18446744073709551615, + 447, + 451, + 447, + 451, + 83, + 84, + true, + "that", + "that" + ], + [ + "conn", + "single-conn", + 15354930767839681193, + "TEXT", + "#/texts/41", + 1.0, + 16381206557726458966, + 12614731555033482050, + 18446744073709551615, + 18446744073709551615, + 460, + 466, + 460, + 466, + 86, + 88, + true, + "with a", + "with a" + ], [ "conn", "single-conn", @@ -39725,6 +40594,27 @@ "arXiv data^{11}", "arXiv data$^{11}$" ], + [ + "term", + "single-term", + 4628466594790006384, + "TEXT", + "#/texts/48", + 1.0, + 2903324788977241891, + 494345879165015724, + 18446744073709551615, + 18446744073709551615, + 103, + 112, + 103, + 112, + 17, + 19, + true, + "PDF pages", + "PDF pages" + ], [ "term", "single-term", @@ -39788,6 +40678,48 @@ "have been trained", "have been trained" ], + [ + "verb", + "compound-verb", + 4628466594790006384, + "TEXT", + "#/texts/48", + 1.0, + 9561950597095011783, + 3531415358081330379, + 18446744073709551615, + 18446744073709551615, + 82, + 96, + 82, + 96, + 14, + 16, + true, + "have annotated", + "have annotated" + ], + [ + "verb", + "single-verb", + 4628466594790006384, + "TEXT", + "#/texts/48", + 1.0, + 389609625632539415, + 12849636877128501627, + 18446744073709551615, + 18446744073709551615, + 117, + 121, + 117, + 121, + 20, + 21, + true, + "know", + "know" + ], [ "conn", "single-conn", @@ -40103,6 +41035,69 @@ "image-classification algorithms", "image-classification algorithms" ], + [ + "term", + "single-term", + 9651706913678711778, + "TEXT", + "#/texts/49", + 1.0, + 14639575749168485524, + 17067935029815538881, + 18446744073709551615, + 18446744073709551615, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "location", + "location" + ], + [ + "term", + "single-term", + 9651706913678711778, + "TEXT", + "#/texts/49", + 1.0, + 329104159216638303, + 3735971156137506785, + 18446744073709551615, + 18446744073709551615, + 25, + 30, + 25, + 30, + 5, + 6, + true, + "table", + "table" + ], + [ + "term", + "single-term", + 9651706913678711778, + "TEXT", + "#/texts/49", + 1.0, + 389609625632301461, + 3852646011442706184, + 18446744073709551615, + 18446744073709551615, + 39, + 43, + 39, + 43, + 8, + 9, + true, + "page", + "page" + ], [ "term", "single-term", @@ -40334,6 +41329,69 @@ "necessary for", "necessary for" ], + [ + "conn", + "single-conn", + 9651706913678711778, + "TEXT", + "#/texts/49", + 1.0, + 15441160910541485670, + 288606476502796116, + 18446744073709551615, + 18446744073709551615, + 9, + 11, + 9, + 11, + 1, + 2, + true, + "of", + "of" + ], + [ + "conn", + "single-conn", + 9651706913678711778, + "TEXT", + "#/texts/49", + 1.0, + 15441160910541487054, + 288595831133479933, + 18446744073709551615, + 18446744073709551615, + 12, + 14, + 12, + 14, + 2, + 3, + true, + "at", + "at" + ], + [ + "conn", + "single-conn", + 9651706913678711778, + "TEXT", + "#/texts/49", + 1.0, + 8106342614185119603, + 5315721009696706391, + 18446744073709551615, + 18446744073709551615, + 31, + 38, + 31, + 38, + 6, + 8, + true, + "on each", + "on each" + ], [ "conn", "single-conn", @@ -42393,109 +43451,109 @@ "to" ], [ - "numval", - "ival", - 18259197018396996238, + "term", + "single-term", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 17767354399704235161, - 12733743888687180225, + 329104159241711235, + 991946153785165058, 18446744073709551615, 18446744073709551615, - 93, - 94, - 93, - 94, - 16, - 17, + 729, + 734, + 729, + 734, + 129, + 130, true, - "1", - "1" + "truth", + "truth" ], [ - "numval", - "ival", - 18259197018396996238, + "term", + "single-term", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 12178341415896426714, - 4652804192217870476, + 6167933651658664291, + 11942237281037582534, 18446744073709551615, 18446744073709551615, - 291, - 294, - 291, - 294, - 53, - 54, + 698, + 707, + 698, + 707, + 123, + 124, true, - "100", - "100" + "documents", + "documents" ], [ - "numval", - "ival", - 18259197018396996238, + "term", + "single-term", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 329104147765109382, - 8033726402022826926, + 329104159157820437, + 995383834556884589, 18446744073709551615, 18446744073709551615, - 312, - 317, - 312, - 317, - 58, - 59, + 670, + 675, + 670, + 675, + 118, + 119, true, - "25000", - "25000" + "users", + "users" ], [ - "numval", - "ival", - 18259197018396996238, + "term", + "single-term", + 11222145795862225841, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 12178341415896426714, - 4652804192217923506, + 3830746689439412878, + 10628297214120553798, 18446744073709551615, 18446744073709551615, - 354, - 357, - 354, - 357, - 66, - 67, + 170, + 210, + 170, + 210, + 37, + 42, true, - "100", - "100" + "24th ACM SIGKDD International Conference", + "24th ACM SIGKDD International Conference" ], [ - "numval", - "ival", - 18259197018396996238, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/51", + "#/texts/12", 1.0, - 17767354399704235152, - 12733743887789901018, + 14650448032998792781, + 15963759494992376767, 18446744073709551615, 18446744073709551615, - 508, - 509, - 508, - 509, - 91, - 92, + 446, + 454, + 446, + 454, + 83, + 84, true, - "8", - "8" + "approach", + "approach" ], [ "numval", @@ -42540,1012 +43598,1012 @@ "(\u2248 10 pages/sec/node)" ], [ - "expression", - "common", - 18259197018396996238, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/51", + "#/texts/12", 1.0, - 15441160910541486545, - 2599358878961543341, + 990358581043194791, + 393905999985936006, 18446744073709551615, 18446744073709551615, - 303, - 307, - 303, - 307, - 56, - 57, + 390, + 403, + 390, + 403, + 72, + 73, true, - "ie", - "i.e." + "microservices", + "microservices" ], [ - "expression", - "word-concatenation", - 18259197018396996238, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/51", + "#/texts/12", 1.0, - 6285955549867796622, - 12901492066051428715, + 990358581043194791, + 393905999985964694, 18446744073709551615, 18446744073709551615, - 108, - 124, - 108, - 124, - 21, - 22, + 327, + 340, + 327, + 340, + 62, + 63, true, - "time-to-solution", - "time-to-solution" + "microservices", + "microservices" ], [ - "expression", - "word-concatenation", - 18259197018396996238, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/51", + "#/texts/12", 1.0, - 15656590191683919916, - 3502038016915722737, + 12178341415895638602, + 5842694294408079134, 18446744073709551615, 18446744073709551615, - 385, - 398, - 385, - 398, - 73, - 74, + 320, + 323, + 320, + 323, + 60, + 61, true, - "out-ofthe-box", - "out-ofthe-box" + "set", + "set" ], [ - "expression", - "word-concatenation", - 18259197018396996238, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/51", + "#/texts/12", 1.0, - 329104162326555074, - 12378649640990487310, + 12178341415896221596, + 5842744026410738636, 18446744073709551615, 18446744073709551615, - 406, - 411, - 406, - 411, - 75, - 76, + 296, + 299, + 296, + 299, + 53, + 54, true, - "R-CNN", - "R-CNN" + "CCS", + "CCS" ], [ - "expression", - "word-concatenation", - 18259197018396996238, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/51", + "#/texts/12", 1.0, - 6285955549867796622, - 12901492066051459793, + 14814125365076808131, + 9349977279496707806, 18446744073709551615, 18446744073709551615, - 651, - 667, - 651, - 667, - 119, - 120, + 252, + 260, + 252, + 260, + 46, + 47, true, - "time-to-solution", - "time-to-solution" + "platform", + "platform" ], [ - "expression", - "wtoken-concatenation", - 18259197018396996238, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/51", + "#/texts/12", 1.0, - 16381206533950151485, - 7463375822213972642, + 15359670209433732834, + 1709633722429132795, 18446744073709551615, 18446744073709551615, - 493, - 499, - 493, - 499, - 89, - 90, + 235, + 245, + 235, + 245, + 43, + 44, true, - "YOLOv2", - "YOLOv2" + "algorithms", + "algorithms" ], [ - "expression", - "wtoken-concatenation", - 18259197018396996238, + "term", + "single-term", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 16381206533950151485, - 7463375822214056128, + 6167933651658664291, + 11942237281037800116, 18446744073709551615, 18446744073709551615, - 787, - 793, - 787, - 793, - 145, - 146, + 1035, + 1044, + 1035, + 1044, + 175, + 176, true, - "YOLOv2", - "YOLOv2" + "documents", + "documents" ], [ - "sentence", - "", - 18259197018396996238, + "term", + "enum-term-mark-4", + 11222145795862225841, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 11214795667451364706, - 15381220353542038442, + 795735363451947563, + 16676628183188309306, 18446744073709551615, 18446744073709551615, - 0, - 83, - 0, - 83, - 0, - 14, + 214, + 247, + 214, + 247, + 43, + 48, true, - "Let us now discuss both deep neural network training microservices on the platform.", - "Let us now discuss both deep neural network training microservices on the platform." + "Knowledge Discovery & Data Mining", + "Knowledge Discovery & Data Mining" ], [ "sentence", "", - 18259197018396996238, + 11222145795862225841, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 17449560956934989976, - 12526021364899620960, + 3268516227836428987, + 8296109392654892130, 18446744073709551615, 18446744073709551615, - 84, - 227, - 84, - 227, - 14, - 41, + 326, + 345, + 326, + 345, + 71, + 73, true, - "In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision.", - "In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision." + "https://doi.org/10.", + "https://doi.org/10." ], [ "sentence", "", - 18259197018396996238, + 11222145795862225841, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 13058222401901188325, - 14090621328054154871, + 17980062243523090453, + 3043178868879598133, 18446744073709551615, 18446744073709551615, - 228, - 364, - 228, - 364, - 41, - 69, + 293, + 325, + 293, + 325, + 59, + 71, true, - "In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times.", - "In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times." + "ACM, New York, NY, USA, 9 pages.", + "ACM, New York, NY, USA, 9 pages." ], [ - "sentence", - "", - 18259197018396996238, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/51", + "#/texts/12", 1.0, - 16675190523738339061, - 7202929718160933759, + 389609625695918821, + 3664761358525422290, 18446744073709551615, 18446744073709551615, - 365, - 587, - 365, - 587, - 69, - 107, + 199, + 203, + 199, + 203, + 38, + 39, true, - "We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied.", - "We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied." + "core", + "core" ], [ - "sentence", - "", - 18259197018396996238, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/51", + "#/texts/12", 1.0, - 10235041227958384786, - 9628423971346406996, + 14814125365076808131, + 9349977279496653565, 18446744073709551615, 18446744073709551615, - 588, - 691, - 588, - 691, - 107, - 125, + 176, + 184, + 176, + 184, + 33, + 34, true, - "We believe that this is the main origin for the discrepancy of time-to-solution for the training phase.", - "We believe that this is the main origin for the discrepancy of time-to-solution for the training phase." + "platform", + "platform" ], [ - "sentence", - "", - 18259197018396996238, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/51", + "#/texts/12", 1.0, - 11909429825414533491, - 7916582600131240808, + 14635106751859230946, + 3899039667786064358, 18446744073709551615, 18446744073709551615, - 692, - 731, - 692, - 731, - 125, - 133, + 137, + 145, + 137, + 145, + 25, + 26, true, - "The same holds true for the prediction.", - "The same holds true for the prediction." + "solution", + "solution" ], [ - "sentence", - "", - 18259197018396996238, + "expression", + "wtoken-concatenation", + 11222145795862225841, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 7447987213947934224, - 363147361352019607, + 3534146179424153776, + 16664784081959773586, 18446744073709551615, 18446744073709551615, - 732, - 913, - 732, - 911, - 133, - 172, + 326, + 344, + 326, + 344, + 71, + 72, true, - "Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", - "Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node)." + "https://doi.org/10", + "https://doi.org/10" ], [ - "term", - "enum-term-mark-2", - 18259197018396996238, + "expression", + "wtoken-concatenation", + 11222145795862225841, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 11037453576911667853, - 14703723871622436608, + 389609625548781308, + 918163733627828877, 18446744073709551615, 18446744073709551615, - 206, - 226, - 206, - 226, + 170, + 174, + 170, + 174, 37, - 40, + 38, true, - "recall and precision", - "recall and precision" + "24th", + "24th" ], [ - "term", - "single-term", - 18259197018396996238, + "link", + "url", + 11222145795862225841, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 13848731310568719727, - 15095939915134652393, + 3534146179424153776, + 16664784081959773586, 18446744073709551615, 18446744073709551615, - 24, - 66, - 24, - 66, - 5, - 10, + 326, + 344, + 326, + 344, + 71, + 72, true, - "deep neural network training microservices", - "deep neural network training microservices" + "https://doi.org/10", + "https://doi.org/10" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 1353284443403550494, - 17158735888603064564, + 12360325703059227080, + 15341633962216548312, 18446744073709551615, 18446744073709551615, - 155, - 166, - 155, - 166, - 27, - 29, + 1512, + 1553, + 1512, + 1553, + 253, + 256, true, - "single page", - "single page" + "knowledge-engineering project engagements", + "knowledge-engineering project engagements" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 12141441254112579393, - 8271858979549873106, + 8462871836886525200, + 10493121872431814801, 18446744073709551615, 18446744073709551615, - 235, - 249, - 235, - 249, - 43, - 45, + 1495, + 1507, + 1495, + 1507, + 250, + 252, true, - "training phase", - "training phase" + "active users", + "active users" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 18169256434676190331, - 11634553033353850813, + 168078114375663109, + 12852846298920524296, 18446744073709551615, 18446744073709551615, - 318, - 329, - 318, - 329, - 59, - 61, + 1441, + 1468, + 1441, + 1468, + 242, + 245, true, - "page images", - "page images" + "IBM internal infrastructure", + "IBM internal infrastructure" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 1151653930094198889, - 6279210758650536115, + 10465443055056631368, + 58866334284871721, 18446744073709551615, 18446744073709551615, - 385, - 411, - 385, - 411, - 73, - 76, + 1403, + 1415, + 1403, + 1415, + 236, + 238, true, - "out-ofthe-box Faster R-CNN", - "out-ofthe-box Faster R-CNN" + "CCS platform", + "CCS platform" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 12141441254112579393, - 8271858979549955993, + 14630472445500347050, + 6260595242788033664, 18446744073709551615, 18446744073709551615, - 471, - 485, - 471, - 485, - 85, - 87, + 1380, + 1397, + 1380, + 1397, + 232, + 234, true, - "training phase", - "training phase" + "structured output", + "structured output" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 2503288761659507641, - 9743919505994936922, + 10100743957883477761, + 17954790962075745659, 18446744073709551615, 18446744073709551615, - 493, - 507, - 493, - 507, - 89, - 91, + 1293, + 1322, + 1293, + 1322, + 218, + 221, true, - "YOLOv2 batches", - "YOLOv2 batches" + "good precision/recall metrics", + "good precision/recall metrics" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 16269569307198368878, - 14888617347479270783, + 5928632445065269445, + 14217942914367810037, 18446744073709551615, 18446744073709551615, - 616, - 627, - 616, - 627, - 113, - 115, + 1265, + 1276, + 1265, + 1276, + 213, + 215, true, - "main origin", - "main origin" + "little time", + "little time" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 12141441254112579393, - 8271858979549787104, + 11805639520798919476, + 8476511316725219115, 18446744073709551615, 18446744073709551615, - 676, - 690, - 676, - 690, - 122, - 124, + 1227, + 1240, + 1227, + 1240, + 207, + 209, true, - "training phase", - "training phase" + "large amounts", + "large amounts" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 4237078182846444452, - 7428907322213125011, + 8106464525640940249, + 12084772193525026048, 18446744073709551615, 18446744073709551615, - 787, - 806, - 787, - 806, - 145, - 147, + 922, + 929, + 922, + 929, + 159, + 160, true, - "YOLOv2 architecture", - "YOLOv2 architecture" + "modules", + "modules" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 14814125365076808131, - 10453527503990612347, + 5415884051047601374, + 4355778428986290778, 18446744073709551615, 18446744073709551615, - 74, - 82, - 74, - 82, - 12, - 13, + 1133, + 1160, + 1133, + 1160, + 191, + 193, true, - "platform", - "platform" + "machine-learning algorithms", + "machine-learning algorithms" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 6285955549867796622, - 12901492066051428715, + 11942859038914222878, + 6623027391573465220, 18446744073709551615, 18446744073709551615, - 108, - 124, - 108, - 124, - 21, - 22, + 1016, + 1031, + 1016, + 1031, + 172, + 174, true, - "time-to-solution", - "time-to-solution" + "massive amounts", + "massive amounts" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 14634153919632515335, - 365322755488345032, + 7838671148811051201, + 3585713728473930092, 18446744073709551615, 18446744073709551615, - 129, - 137, - 129, - 137, - 23, - 24, + 952, + 990, + 952, + 990, + 165, + 168, true, - "training", - "training" + "asynchronous microservice architecture", + "asynchronous microservice architecture" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 5731695876385560379, - 1758035992340926235, + 2954625771153872709, + 4652514773317300232, 18446744073709551615, 18446744073709551615, - 182, - 193, - 182, - 193, - 33, - 34, + 850, + 890, + 850, + 890, + 147, + 151, true, - "performance", - "performance" + "structured content representation format", + "structured content representation format" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 329104159246284497, - 8646809584775625185, + 3416039644310333922, + 4934158934704280837, 18446744073709551615, 18446744073709551615, - 197, - 202, - 197, - 202, - 35, - 36, + 737, + 785, + 737, + 785, + 132, + 136, true, - "terms", - "terms" + "train machine-learning classification algorithms", + "train machine-learning classification algorithms" ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 16381206521531485437, - 11024740562177031234, + 3735444463619010795, + 10473776487201094119, 18446744073709551615, 18446744073709551615, - 206, - 212, - 206, - 212, - 37, - 38, + 709, + 728, + 709, + 728, + 125, + 128, true, - "recall", - "recall" + "ie collect ground", + "i.e. collect ground" ], [ - "term", - "single-term", - 18259197018396996238, + "numval", + "ival", + 11222145795862225841, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 6184954595655792282, - 2740680839011190488, + 389609625536247226, + 914428205219130181, 18446744073709551615, 18446744073709551615, - 217, - 226, - 217, - 226, - 39, - 40, + 346, + 350, + 346, + 350, + 73, + 74, true, - "precision", - "precision" + "1145", + "1145" ], [ - "term", - "single-term", - 18259197018396996238, + "numval", + "ival", + 11222145795862225841, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 15359670209433732834, - 11505488180295702106, + 17767354399704235153, + 5919416028440889582, 18446744073709551615, 18446744073709551615, - 271, - 281, - 271, - 281, - 50, - 51, + 317, + 318, + 317, + 318, + 68, + 69, true, - "algorithms", - "algorithms" + "9", + "9" ], [ - "term", - "single-term", - 18259197018396996238, + "numval", + "ival", + 11222145795862225841, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 16381206565270919865, - 7578403846550666862, + 15441160910541481862, + 12820302901235644324, 18446744073709551615, 18446744073709551615, - 295, - 301, - 295, - 301, - 54, - 55, + 162, + 164, + 162, + 164, + 34, + 35, true, - "epochs", - "epochs" + "18", + "18" ], [ - "term", - "single-term", - 18259197018396996238, + "numval", + "irng", + 11222145795862225841, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 8106342689863369930, - 11135817727321581998, + 329104147759644091, + 11978218711906185056, 18446744073709551615, 18446744073709551615, - 346, - 353, - 346, - 353, - 65, - 66, + 256, + 261, + 256, + 261, + 50, + 51, true, - "network", - "network" + "19-23", + "19-23" ], [ - "term", - "single-term", - 18259197018396996238, + "numval", + "fval", + 11222145795862225841, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 329104159219994925, - 8640251348534211245, + 11541938200508964503, + 6621613840590615166, 18446744073709551615, 18446744073709551615, - 358, - 363, - 358, - 363, - 67, - 68, + 351, + 366, + 351, + 366, + 75, + 76, true, - "times", - "times" + "3219819.3219834", + "3219819.3219834" ], [ - "term", - "single-term", - 18259197018396996238, + "numval", + "year", + 11222145795862225841, "TEXT", - "#/texts/51", + "#/texts/10", 1.0, - 2455254482033220466, - 11766388440552122471, + 389609625548777054, + 918164764798382455, 18446744073709551615, 18446744073709551615, - 417, - 427, - 417, - 427, - 77, - 78, + 263, + 267, + 263, + 267, + 52, + 53, true, - "Tensorflow", - "Tensorflow" + "2018", + "2018" ], [ "term", "single-term", - 18259197018396996238, + 3749305213430885773, "TEXT", - "#/texts/51", + "#/texts/12", 1.0, - 14652257119591248677, - 16033503133782517052, + 2703018679320364082, + 14545924726949564279, 18446744073709551615, 18446744073709551615, - 451, - 459, - 451, - 459, - 82, - 83, + 94, + 104, + 94, + 104, + 18, + 19, true, - "batching", - "batching" + "conversion", + "conversion" ], [ - "term", - "single-term", - 18259197018396996238, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 16381206560620045048, - 7774432132927566429, + 15441160910541485865, + 11606670830397544434, 18446744073709551615, 18446744073709551615, - 510, - 516, - 510, - 516, - 92, - 93, + 1377, + 1379, + 1377, + 1379, + 231, + 232, true, - "images", - "images" + "to", + "to" ], [ - "term", - "single-term", - 18259197018396996238, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 389609625631241985, - 11701890325058806343, + 15441160910541485865, + 11606670830397545800, 18446744073709551615, 18446744073709551615, - 522, - 526, - 522, - 526, - 95, - 96, + 1355, + 1357, + 1355, + 1357, + 228, + 229, true, - "time", - "time" + "to", + "to" ], [ - "term", - "single-term", - 18259197018396996238, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 16381206519429140242, - 7379520217990130218, + 8106351192274276906, + 17899388016831785682, 18446744073709551615, 18446744073709551615, - 528, - 534, - 528, - 534, - 97, - 98, + 1212, + 1219, + 1212, + 1219, + 204, + 206, true, - "thanks", - "thanks" + "to both", + "to both" ], [ - "term", - "single-term", - 18259197018396996238, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 329104161828335551, - 12350292282878253456, + 15441160910541485865, + 11606670830397529924, 18446744073709551615, 18446744073709551615, - 541, - 546, - 541, - 546, - 100, - 101, + 1092, + 1094, + 1092, + 1094, + 185, + 186, true, - "image", - "image" + "to", + "to" ], [ - "term", - "single-term", - 18259197018396996238, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 1478855739373258073, - 16768663803468661998, + 329104159243175056, + 993032465640498236, 18446744073709551615, 18446744073709551615, - 636, - 647, - 636, - 647, - 117, - 118, + 946, + 951, + 946, + 951, + 163, + 165, true, - "discrepancy", - "discrepancy" + "to an", + "to an" ], [ - "term", - "single-term", - 18259197018396996238, + "sentence", + "", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 6285955549867796622, - 12901492066051459793, + 11602122462230219692, + 9062878903616548976, 18446744073709551615, 18446744073709551615, - 651, - 667, - 651, - 667, - 119, - 120, + 1399, + 1554, + 1399, + 1554, + 235, + 257, true, - "time-to-solution", - "time-to-solution" + "The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", + "The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements." ], [ - "term", - "single-term", - 18259197018396996238, + "sentence", + "", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 14103651237077221583, - 1262912962491166125, + 11949985654620491247, + 6433012828858116708, 18446744073709551615, 18446744073709551615, - 720, - 730, - 720, - 730, - 131, - 132, + 1197, + 1398, + 1197, + 1398, + 201, + 235, true, - "prediction", - "prediction" + "This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output.", + "This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output." ], [ - "term", - "single-term", - 18259197018396996238, + "sentence", + "", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 329104161594416377, - 12352174572142722555, + 696858082777940132, + 6587401266180559184, 18446744073709551615, 18446744073709551615, - 752, - 757, - 752, - 757, - 137, - 138, + 1046, + 1196, + 1046, + 1196, + 177, + 201, true, - "point", - "point" + "Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude.", + "Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude." ], [ - "term", - "single-term", - 18259197018396996238, + "sentence", + "", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 389609625619349298, - 11674445135708463101, + 10285604264132694933, + 1782145150804012891, 18446744073709551615, 18446744073709551615, - 761, - 765, - 761, - 765, - 139, - 140, + 892, + 1045, + 892, + 1045, + 152, + 177, true, - "view", - "view" + "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents.", + "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents." ], [ "term", "single-term", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 14814125365076808131, - 10453527503990666008, + 3932662928795581219, + 3325076288347729928, 18446744073709551615, 18446744073709551615, - 773, - 781, - 773, - 781, - 142, - 143, + 828, + 844, + 828, + 844, + 144, + 145, true, - "platform", - "platform" + "bitmap-documents", + "bitmap-documents" ], [ "term", @@ -43632,88 +44690,88 @@ "sec" ], [ - "verb", - "compound-verb", - 18259197018396996238, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 8526860058636487735, - 15955870111469140752, + 389609625631408052, + 1612210503630929212, 18446744073709551615, 18446744073709551615, - 330, - 341, - 330, - 341, - 61, - 64, + 845, + 849, + 845, + 849, + 145, + 147, true, - "were fed to", - "were fed to" + "to a", + "to a" ], [ - "verb", - "compound-verb", - 18259197018396996238, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 436128332273723128, - 12647681645588449593, + 15441160910541485865, + 11606670830397301532, 18446744073709551615, 18446744073709551615, - 428, - 446, - 428, - 446, - 78, - 81, + 676, + 678, + 676, + 678, + 119, + 120, true, - "does not implement", - "does not implement" + "to", + "to" ], [ - "verb", - "compound-verb", - 18259197018396996238, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 2778023241922598008, - 5238034027547162597, + 12178341415895625940, + 12963192413398671002, 18446744073709551615, 18446744073709551615, - 562, - 586, - 562, - 586, - 103, - 106, + 1508, + 1511, + 1508, + 1511, + 252, + 253, true, - "is automatically applied", - "is automatically applied" + "for", + "for" ], [ - "verb", - "compound-verb", - 18259197018396996238, + "term", + "single-term", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 18110906195041757747, - 18325478196446152715, + 12178341415896289890, + 12968333890042400352, 18446744073709551615, 18446744073709551615, - 807, - 826, - 807, - 826, - 147, - 150, + 821, + 824, + 821, + 824, + 142, + 143, true, - "seems better suited", - "seems better suited" + "PDF", + "PDF" ], [ "verb", @@ -43758,256 +44816,256 @@ "/node" ], [ - "verb", - "single-verb", - 18259197018396996238, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 12178341415896275389, - 4652821010771256286, + 389609625631229040, + 1612226037052379844, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 1486, + 1490, + 1486, + 1490, + 248, + 249, true, - "Let", - "Let" + "than", + "than" ], [ - "verb", - "single-verb", - 18259197018396996238, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 8106397868479560363, - 5980952610294528544, + 15441160910541485678, + 11606670855875426468, 18446744073709551615, 18446744073709551615, - 11, - 18, - 11, - 18, - 3, - 4, + 1438, + 1440, + 1438, + 1440, + 241, + 242, true, - "discuss", - "discuss" + "on", + "on" ], [ - "verb", - "single-verb", - 18259197018396996238, + "expression", + "wtoken-concatenation", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 389609625741152123, - 11698558665309690548, + 12178341415896195376, + 12963254028349616217, 18446744073709551615, 18446744073709551615, - 99, - 103, - 99, - 103, - 19, - 20, + 1339, + 1342, + 1339, + 1342, + 225, + 226, true, - "show", - "show" + "99%", + "99%" ], [ - "verb", - "single-verb", - 18259197018396996238, + "expression", + "word-concatenation", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 14103651237077222912, - 1262912573528208063, + 11355983594424639335, + 375612941360355674, 18446744073709551615, 18446744073709551615, - 142, - 152, - 142, - 152, - 25, - 26, + 1298, + 1314, + 1298, + 1314, + 219, + 220, true, - "predicting", - "predicting" + "precision/recall", + "precision/recall" ], [ - "verb", - "single-verb", - 18259197018396996238, + "expression", + "word-concatenation", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 16381206564578053366, - 7676681725158730412, + 10391722136816057200, + 4465071482523967093, 18446744073709551615, 18446744073709551615, + 1512, + 1533, + 1512, + 1533, + 253, 254, - 260, - 254, - 260, - 47, - 48, true, - "ensure", - "ensure" + "knowledge-engineering", + "knowledge-engineering" ], [ - "verb", - "single-verb", - 18259197018396996238, + "expression", + "word-concatenation", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 12178341415895649364, - 4652781883350111182, + 3753411203337468488, + 16756051673090420119, 18446744073709551615, 18446744073709551615, - 282, - 285, - 282, - 285, - 51, - 52, + 1244, + 1256, + 1244, + 1256, + 210, + 211, true, - "ran", - "ran" + "ground-truth", + "ground-truth" ], [ - "verb", - "single-verb", - 18259197018396996238, + "expression", + "word-concatenation", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 15441160910541486545, - 2599358878961543341, + 6307689511527468252, + 12199545311202523423, 18446744073709551615, 18446744073709551615, - 303, - 307, - 303, - 307, - 56, - 57, + 1133, + 1149, + 1133, + 1149, + 191, + 192, true, - "ie", - "i.e." + "machine-learning", + "machine-learning" ], [ - "verb", - "single-verb", - 18259197018396996238, + "expression", + "word-concatenation", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 8106342033696543838, - 10720166011679309151, + 3753411203337468488, + 16756051673090395246, 18446744073709551615, 18446744073709551615, - 368, - 375, - 368, - 375, - 70, - 71, + 1102, + 1114, + 1102, + 1114, + 187, + 188, true, - "observe", - "observe" + "ground-truth", + "ground-truth" ], [ - "verb", - "single-verb", - 18259197018396996238, + "expression", + "word-concatenation", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 14634109260174176887, - 3059970276159290973, + 3932662928795581219, + 3325076288347729928, 18446744073709551615, 18446744073709551615, - 547, - 555, - 547, - 555, - 101, - 102, + 828, + 844, + 828, + 844, + 144, + 145, true, - "resizing", - "resizing" + "bitmap-documents", + "bitmap-documents" ], [ - "verb", - "single-verb", - 18259197018396996238, + "expression", + "word-concatenation", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 8106397860663428876, - 2379893300042418437, + 6307689511527468252, + 12199545311202481186, 18446744073709551615, 18446744073709551615, - 591, - 598, - 591, - 598, - 108, - 109, + 743, + 759, + 743, + 759, + 133, + 134, true, - "believe", - "believe" + "machine-learning", + "machine-learning" ], [ - "verb", - "single-verb", - 18259197018396996238, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 15441160910541486535, - 2599358878751709903, + 389609625618037948, + 1610651885976451134, 18446744073709551615, 18446744073709551615, - 609, - 611, - 609, - 611, - 111, - 112, + 1343, + 1347, + 1343, + 1347, + 226, + 227, true, - "is", - "is" + "with", + "with" ], [ - "verb", - "single-verb", - 18259197018396996238, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 329104161533598953, - 11928511646589428500, + 15441160910541485670, + 11606670832821292551, 18446744073709551615, 18446744073709551615, - 701, - 706, - 701, - 706, - 127, - 128, + 1336, + 1338, + 1336, + 1338, + 224, + 225, true, - "holds", - "holds" + "of", + "of" ], [ "verb", @@ -44033,125 +45091,83 @@ [ "conn", "single-conn", - 18259197018396996238, - "TEXT", - "#/texts/51", - 1.0, - 14634153888224917429, - 9004783391296823986, - 18446744073709551615, - 18446744073709551615, - 707, - 715, - 707, - 715, - 128, - 130, - true, - "true for", - "true for" - ], - [ - "conn", - "single-conn", - 18259197018396996238, - "TEXT", - "#/texts/51", - 1.0, - 16381206566339127348, - 7523956295610612753, - 18446744073709551615, - 18446744073709551615, - 67, - 73, - 67, - 73, - 10, - 12, - true, - "on the", - "on the" - ], - [ - "conn", - "single-conn", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 15441160910541480354, - 2599356225275492892, + 16381206560518651853, + 18414993880775571288, 18446744073709551615, 18446744073709551615, - 84, - 86, - 84, - 86, - 14, - 15, + 1323, + 1329, + 1323, + 1329, + 221, + 223, true, - "In", - "In" + "in the", + "in the" ], [ - "conn", - "single-conn", - 18259197018396996238, + "expression", + "common", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 12178341415895625940, - 4653059449996398372, + 15441160910541486545, + 11606670743807693522, 18446744073709551615, 18446744073709551615, + 709, + 713, + 709, + 713, 125, - 128, - 125, - 128, - 22, - 23, + 126, true, - "for", - "for" + "ie", + "i.e." ], [ - "conn", - "single-conn", - 18259197018396996238, + "parenthesis", + "round brackets", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 16381206568455155979, - 8062169836442615762, + 8912272716224106832, + 12227152516026650269, 18446744073709551615, 18446744073709551615, - 175, - 181, - 175, - 181, - 31, - 33, + 708, + 735, + 708, + 735, + 124, + 131, true, - "as the", - "as the" + "(i.e. collect ground-truth)", + "(i.e. collect ground-truth)" ], [ "conn", "single-conn", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, 15441160910541486538, - 2599358879133688732, + 11606670739900210613, 18446744073709551615, 18446744073709551615, - 194, - 196, - 194, - 196, - 34, - 35, + 1257, + 1259, + 1257, + 1259, + 211, + 212, true, "in", "in" @@ -44159,20 +45175,20 @@ [ "conn", "single-conn", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, 15441160910541485670, - 2599358870315263905, + 11606670832821388621, 18446744073709551615, 18446744073709551615, - 203, - 205, - 203, - 205, - 36, - 37, + 1241, + 1243, + 1241, + 1243, + 209, + 210, true, "of", "of" @@ -44180,296 +45196,338 @@ [ "conn", "single-conn", - 18259197018396996238, + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 16380809977974811061, - 11732651135400697626, + 15441160910541485670, + 11606670832821349359, 18446744073709551615, 18446744073709551615, - 228, - 234, - 228, - 234, - 41, - 43, + 1183, + 1185, + 1183, + 1185, + 198, + 199, true, - "In the", - "In the" + "of", + "of" ], [ - "conn", - "single-conn", - 18259197018396996238, + "numval", + "ival", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 3504047303032829403, - 14383519537824238604, + 12178341415896436703, + 12968333296314215347, 18446744073709551615, 18446744073709551615, - 261, - 270, - 261, - 270, - 48, - 50, + 1491, + 1494, + 1491, + 1494, + 249, + 250, true, - "that both", - "that both" + "250", + "250" ], [ - "conn", - "single-conn", - 18259197018396996238, + "numval", + "ival", + 3624246356859711021, "TEXT", - "#/texts/51", + "#/texts/7", 1.0, - 14634130761162415388, - 10901511361886185107, + 17767354399704235161, + 12573472761345255474, 18446744073709551615, 18446744073709551615, - 376, - 384, - 376, - 384, - 71, - 73, + 0, + 1, + 0, + 1, + 0, + 1, true, - "that the", - "that the" + "1", + "1" ], [ - "conn", - "single-conn", - 18259197018396996238, + "geoloc", + "country", + 11056873211244709904, "TEXT", - "#/texts/51", + "#/texts/5", 1.0, - 389609625697843734, - 11702137981936100184, + 2664439525053388608, + 16906723856094244091, 18446744073709551615, 18446744073709551615, - 412, - 416, - 412, - 416, - 76, - 77, + 13, + 24, + 13, + 24, + 2, + 3, true, - "from", - "from" + "Switzerland", + "Switzerland" ], [ - "conn", - "single-conn", - 18259197018396996238, + "link", + "email", + 18258237174351515285, "TEXT", - "#/texts/51", + "#/texts/3", 1.0, - 2511937742856062086, - 2355253536228937084, + 7883794643982446593, + 9473083479424942219, 18446744073709551615, 18446744073709551615, - 460, - 470, - 460, - 470, - 83, - 85, + 0, + 30, + 0, + 30, + 0, + 11, true, - "during the", - "during the" + "taa,dol,cau,bek@zurich.ibm.com", + "taa,dol,cau,bek@zurich.ibm.com" ], [ "conn", "single-conn", - 18259197018396996238, + 10227328696767902037, "TEXT", - "#/texts/51", + "#/texts/1", 1.0, - 329104161580427521, - 12357508218241612915, + 15441160910541485865, + 1862717525379277583, 18446744073709551615, 18446744073709551615, - 487, - 492, - 487, - 492, - 88, - 89, + 55, + 57, + 55, + 57, + 8, + 9, true, - "while", - "while" + "to", + "to" ], [ "conn", "single-conn", - 18259197018396996238, + 10227328696767902037, "TEXT", - "#/texts/51", + "#/texts/1", 1.0, - 389609625700792947, - 11701923673037716898, + 15441160910541487054, + 1862666054904793840, 18446744073709551615, 18446744073709551615, - 517, - 521, - 517, - 521, - 93, - 95, + 75, + 77, + 75, + 77, + 11, + 12, true, - "at a", - "at a" + "at", + "at" ], [ - "conn", - "single-conn", - 18259197018396996238, + "term", + "single-term", + 10227328696767902037, "TEXT", - "#/texts/51", + "#/texts/1", 1.0, - 3504047303127782210, - 14386938221778026486, + 329104162321612062, + 9665794625919571011, 18446744073709551615, 18446744073709551615, - 599, - 608, - 599, - 608, - 109, - 111, + 78, + 83, + 78, + 83, + 12, + 13, true, - "that this", - "that this" + "Scale", + "Scale" ], [ - "conn", - "single-conn", - 18259197018396996238, + "term", + "single-term", + 10227328696767902037, "TEXT", - "#/texts/51", + "#/texts/1", 1.0, - 8106397727991264470, - 4625930078648415204, + 2543543638813814383, + 14974042820297549065, 18446744073709551615, 18446744073709551615, - 628, - 635, - 628, - 635, - 115, - 117, + 58, + 74, + 58, + 74, + 9, + 11, true, - "for the", - "for the" + "Ingest Documents", + "Ingest Documents" ], [ - "conn", - "single-conn", - 18259197018396996238, + "term", + "single-term", + 10227328696767902037, "TEXT", - "#/texts/51", + "#/texts/1", 1.0, - 15441160910541485670, - 2599358870315233503, + 3953336115302703444, + 3908089371773344302, 18446744073709551615, 18446744073709551615, - 648, - 650, - 648, - 650, - 118, - 119, + 29, + 54, + 29, + 54, + 5, + 8, true, - "of", - "of" + "Machine Learning Platform", + "Machine Learning Platform" ], [ - "conn", - "single-conn", - 18259197018396996238, + "term", + "single-term", + 10227328696767902037, "TEXT", - "#/texts/51", + "#/texts/1", 1.0, - 8106397727991264470, - 4625930078648412606, + 12638008641667971393, + 2808934749433980912, 18446744073709551615, 18446744073709551615, - 668, - 675, - 668, - 675, - 120, - 122, + 0, + 25, + 0, + 25, + 0, + 3, true, - "for the", - "for the" + "Corpus Conversion Service", + "Corpus Conversion Service" ], [ - "conn", - "single-conn", - 18259197018396996238, + "sentence", + "", + 10227328696767902037, "TEXT", - "#/texts/51", + "#/texts/1", 1.0, - 14637917359887717745, - 11341143089950838331, + 11303007895399162817, + 11350976242507888924, 18446744073709551615, 18446744073709551615, - 743, - 751, - 743, - 751, - 135, - 137, + 0, + 84, + 0, + 84, + 0, + 14, true, - "from the", - "from the" + "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", + "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale." ], [ - "conn", - "single-conn", - 18259197018396996238, + "term", + "single-term", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 15441160910541485670, - 2599358870315209500, + 329104161634702433, + 739201814026917115, 18446744073709551615, 18446744073709551615, - 758, - 760, - 758, - 760, - 138, - 139, + 1330, + 1335, + 1330, + 1335, + 223, + 224, true, - "of", - "of" + "range", + "range" ], [ - "conn", - "single-conn", - 18259197018396996238, + "term", + "single-term", + 17999848460847860039, "TEXT", - "#/texts/51", + "#/texts/8", 1.0, - 16381206565712212855, - 7825456364758516667, + 3753411203337468488, + 16756051673090420119, 18446744073709551615, 18446744073709551615, - 766, - 772, - 766, - 772, - 140, - 142, + 1244, + 1256, + 1244, + 1256, + 210, + 211, true, - "of the", - "of the" + "ground-truth", + "ground-truth" + ], + [ + "term", + "single-term", + 17999848460847860039, + "TEXT", + "#/texts/8", + 1.0, + 6179392101937111178, + 13132284913272968426, + 18446744073709551615, + 18446744073709551615, + 1186, + 1195, + 1186, + 1195, + 199, + 200, + true, + "magnitude", + "magnitude" + ], + [ + "term", + "single-term", + 17999848460847860039, + "TEXT", + "#/texts/8", + 1.0, + 329104161571401725, + 741255023938407211, + 18446744073709551615, + 18446744073709551615, + 1177, + 1182, + 1177, + 1182, + 197, + 198, + true, + "order", + "order" ], [ "conn", @@ -44514,46 +45572,46 @@ "as" ], [ - "conn", - "single-conn", - 18259197018396996238, + "numval", + "ival", + 7377574370756688828, "TEXT", - "#/texts/51", + "#/texts/0", 1.0, - 16381206519425733256, - 7379223398534589543, + 15441160910541481790, + 218889966910406464, 18446744073709551615, 18446744073709551615, - 339, - 345, - 339, - 345, - 63, - 65, + 27, + 29, + 27, + 29, + 2, + 3, true, - "to the", - "to the" + "24", + "24" ], [ - "conn", - "single-conn", - 18259197018396996238, + "numval", + "year", + 7377574370756688828, "TEXT", - "#/texts/51", + "#/texts/0", 1.0, - 329104159243175056, - 8638673086732548345, + 389609625548777054, + 1345153950666588077, 18446744073709551615, 18446744073709551615, - 535, - 540, - 535, - 540, - 98, - 100, + 34, + 38, + 34, + 38, + 4, + 5, true, - "to an", - "to an" + "2018", + "2018" ], [ "conn", @@ -44640,172 +45698,172 @@ "Not-Table" ], [ - "expression", - "word-concatenation", - 14663676516964431047, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 3002943871017471876, - 6314608314970297277, + 16381206562264646932, + 18168705856416964271, 18446744073709551615, 18446744073709551615, - 49, - 63, - 49, - 63, - 9, - 10, + 1095, + 1101, + 1095, + 1101, + 186, + 187, true, - "pre-processing", - "pre-processing" + "gather", + "gather" ], [ - "expression", - "word-concatenation", - 14663676516964431047, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 3458523808570659318, - 9975991896240937817, + 3534225588934870450, + 17328851096575956236, 18446744073709551615, 18446744073709551615, - 141, - 157, - 141, - 157, - 22, - 23, + 1062, + 1071, + 1062, + 1071, + 180, + 182, true, - "object-detection", - "object-detection" + "will show", + "will show" ], [ - "sentence", - "", - 14663676516964431047, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 7429795002768371766, - 12580216355924388710, + 16381206485955868973, + 16260582896355405879, 18446744073709551615, 18446744073709551615, - 0, - 136, - 0, - 136, - 0, - 21, + 1009, + 1015, + 1009, + 1015, + 171, + 172, true, - "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously.", - "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously." + "handle", + "handle" ], [ - "sentence", - "", - 14663676516964431047, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 16291040095568243120, - 1594236025068685140, + 15441160910541486535, + 11606670739883745478, 18446744073709551615, 18446744073709551615, - 137, - 239, - 137, - 239, - 21, - 39, + 930, + 932, + 930, + 932, + 160, + 161, true, - "The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1.", - "The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1." + "is", + "is" ], [ - "term", - "single-term", - 14663676516964431047, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 4471200074237295914, - 1456466697102274833, + 3534225588934870450, + 17328851096576172964, 18446744073709551615, 18446744073709551615, - 8, - 28, - 8, - 28, - 2, - 4, + 895, + 904, + 895, + 904, + 153, + 155, true, - "performance analysis", - "performance analysis" + "will show", + "will show" ], [ - "term", - "single-term", - 14663676516964431047, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 4048925549312111393, - 15542194947650577050, + 8106398484416229602, + 5707746526356454429, 18446744073709551615, 18446744073709551615, - 49, - 69, - 49, - 69, - 9, - 11, + 801, + 808, + 801, + 808, + 138, + 139, true, - "pre-processing stage", - "pre-processing stage" + "convert", + "convert" ], [ - "term", - "single-term", - 14663676516964431047, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 15479850329146856745, - 787461524154987429, + 14650452911780017077, + 11510513167121376409, 18446744073709551615, 18446744073709551615, - 141, - 166, - 141, - 166, - 22, - 24, + 689, + 697, + 689, + 697, + 122, + 123, true, - "object-detection networks", - "object-detection networks" + "annotate", + "annotate" ], [ - "term", - "single-term", - 14663676516964431047, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 4874473477449861741, - 3504061852580538950, + 329104161667983915, + 773700989878712775, 18446744073709551615, 18446744073709551615, - 206, - 222, - 206, - 222, - 32, - 34, + 679, + 684, + 679, + 684, + 120, + 121, true, - "confidence level", - "confidence level" + "parse", + "parse" ], [ "term", @@ -44814,1888 +45872,1888 @@ "TEXT", "#/texts/52", 1.0, - 8106464574171450434, - 15318495777273702751, + 5568743441709168075, + 13244961468904706800, 18446744073709551615, 18446744073709551615, - 107, - 114, - 107, - 114, - 17, - 18, + 322, + 337, + 322, + 337, + 56, + 58, true, - "metrics", - "metrics" + "particular case", + "particular case" ], [ - "term", - "single-term", - 14663676516964431047, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 12178341415895638602, - 6222934568051327791, + 16381206569317834029, + 10666754365487817153, 18446744073709551615, 18446744073709551615, - 177, - 180, - 177, - 180, - 26, - 27, + 663, + 669, + 663, + 669, + 117, + 118, true, - "set", - "set" + "allows", + "allows" ], [ - "term", - "single-term", - 14663676516964431047, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 329104159325617355, - 15838640579331060931, + 15441160910541487054, + 11606670851925780672, 18446744073709551615, 18446744073709551615, - 193, - 198, - 193, - 198, - 29, - 30, + 1164, + 1166, + 1164, + 1166, + 194, + 195, true, - "boxes", - "boxes" + "at", + "at" ], [ - "verb", - "compound-verb", - 14663676516964431047, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 6181919773618307675, - 13087072183397009947, + 15441160910541486989, + 11606670853486803944, 18446744073709551615, 18446744073709551615, - 76, - 85, - 76, - 85, - 12, - 14, + 1161, + 1163, + 1161, + 1163, + 193, + 194, true, - "is needed", - "is needed" + "by", + "by" ], [ - "verb", - "compound-verb", + "term", + "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, - 3312537848285575572, - 3682069485478563076, + 329104159325617355, + 15838640579331035020, 18446744073709551615, 18446744073709551615, - 115, - 135, - 115, - 135, - 18, - 20, + 262, + 267, + 262, + 267, + 43, + 44, true, - "described previously", - "described previously" + "boxes", + "boxes" ], [ - "verb", - "single-verb", + "term", + "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, - 12178341415895617983, - 6222927924466837926, + 389609625696024605, + 3998089761623990856, 18446744073709551615, 18446744073709551615, - 30, - 33, - 30, - 33, - 5, - 6, + 291, + 295, + 291, + 295, + 48, + 49, true, - "let", - "let" + "cell", + "cell" ], [ - "verb", - "single-verb", + "term", + "single-term", 14663676516964431047, "TEXT", "#/texts/52", 1.0, - 8106342536055423396, - 1623603363237275433, + 329104161624445793, + 1096780638347487949, 18446744073709551615, 18446744073709551615, - 37, - 44, - 37, - 44, - 7, - 8, + 298, + 303, + 298, + 303, + 50, + 51, true, - "outline", - "outline" + "label", + "label" ], [ - "verb", - "single-verb", - 14663676516964431047, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 5947879507992892292, - 3137884750946432419, + 15441160910541486989, + 11606670853486674912, 18446744073709551615, 18446744073709551615, - 93, - 102, - 93, - 102, - 15, - 16, + 1130, + 1132, + 1130, + 1132, + 190, + 191, true, - "computing", - "computing" + "by", + "by" ], [ - "verb", - "single-verb", - 14663676516964431047, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 8106476016678293182, - 8897474810961070939, + 389609625631229034, + 1612226062922593249, 18446744073709551615, 18446744073709551615, - 167, - 174, - 167, - 174, - 24, - 25, + 1072, + 1076, + 1072, + 1076, + 182, + 183, true, - "predict", - "predict" + "that", + "that" ], [ - "verb", - "single-verb", - 14663676516964431047, + "conn", + "single-conn", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 14652253380850532610, - 15688350870772298580, + 15441160910541485670, + 11606670832821377067, 18446744073709551615, 18446744073709551615, - 184, - 192, - 184, - 192, - 28, - 29, + 1032, + 1034, + 1032, + 1034, + 174, + 175, true, - "bounding", - "bounding" + "of", + "of" ], [ "conn", "single-conn", - 14663676516964431047, + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 8106351438779293396, - 7036921387199751321, + 16381206565712212855, + 18288882301375701872, 18446744073709551615, 18446744073709551615, - 0, - 7, - 0, - 7, - 0, - 2, + 915, + 921, + 915, + 921, + 157, + 159, true, - "For the", - "For the" + "of the", + "of the" ], [ "conn", "single-conn", - 14663676516964431047, + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 16381206569837301772, - 829894264837423586, + 3504047303033029818, + 12858913108667382047, 18446744073709551615, 18446744073709551615, - 86, - 92, - 86, - 92, - 14, - 15, + 905, + 914, + 905, + 914, + 155, + 157, true, - "before", - "before" + "that each", + "that each" ], [ "conn", "single-conn", - 14663676516964431047, + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, 15441160910541485670, - 15053982237527373603, + 11606670832821399597, 18446744073709551615, 18446744073709551615, - 181, - 183, - 181, - 183, - 27, - 28, + 818, + 820, + 818, + 820, + 141, + 142, true, "of", "of" ], [ - "conn", - "single-conn", - 14663676516964431047, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/52", + "#/texts/8", 1.0, - 16381206557726458966, - 4275353707798328089, + 8106478708506631920, + 17126853238947237410, 18446744073709551615, 18446744073709551615, - 199, - 205, - 199, - 205, - 30, - 32, + 1473, + 1480, + 1473, + 1480, + 246, + 247, true, - "with a", - "with a" + "serving", + "serving" ], [ - "conn", - "single-conn", + "verb", + "single-verb", 14663676516964431047, "TEXT", "#/texts/52", 1.0, - 8106397860038858133, - 2367955007216749470, + 12178341415895516060, + 6222929879151021867, 18446744073709551615, 18446744073709551615, - 223, - 230, - 223, - 230, - 34, - 35, - true, - "between", - "between" + 243, + 246, + 243, + 246, + 40, + 41, + true, + "use", + "use" ], [ - "numval", - "ival", - 4577067829072175096, + "verb", + "single-verb", + 14663676516964431047, "TEXT", - "#/texts/53", + "#/texts/52", 1.0, - 17767354399704235162, - 15759397524433803932, + 14652253380850532610, + 15688350870772294533, 18446744073709551615, 18446744073709551615, - 6, - 7, - 6, - 7, - 1, - 2, + 253, + 261, + 253, + 261, + 42, + 43, true, - "2", - "2" + "bounding", + "bounding" ], [ - "sentence", - "", - 4577067829072175096, + "verb", + "single-verb", + 14663676516964431047, "TEXT", - "#/texts/53", + "#/texts/52", 1.0, - 13412490586202463721, - 17653988074073433733, + 5950066721891255692, + 2770587476745436308, 18446744073709551615, 18446744073709551615, - 0, - 95, - 0, - 95, - 0, - 17, + 271, + 280, + 271, + 280, + 45, + 46, true, - "Table 2: Performance results for the template specific model of the Physical Review B journals.", - "Table 2: Performance results for the template specific model of the Physical Review B journals." + "associate", + "associate" ], [ - "sentence", - "", - 4577067829072175096, + "verb", + "single-verb", + 14663676516964431047, "TEXT", - "#/texts/53", + "#/texts/52", 1.0, - 2713668199866952841, - 4447940936101437620, + 15441160910541486535, + 15053982258803746941, 18446744073709551615, 18446744073709551615, - 96, - 202, - 96, - 202, - 17, - 34, + 311, + 313, + 311, + 313, + 53, + 54, true, - "The confusion matrix highlights the huge imbalance between the number of text cells with different labels.", - "The confusion matrix highlights the huge imbalance between the number of text cells with different labels." + "is", + "is" ], [ - "sentence", - "", - 4577067829072175096, + "verb", + "single-verb", + 14663676516964431047, "TEXT", - "#/texts/53", + "#/texts/52", 1.0, - 12325075441819606052, - 4798224535047183092, + 6180152660545840784, + 16460990176239850859, 18446744073709551615, 18446744073709551615, - 203, - 310, - 203, - 310, - 34, - 53, + 365, + 374, + 365, + 374, + 65, + 66, true, - "The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types.", - "The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types." + "depending", + "depending" ], [ - "term", - "single-term", - 4577067829072175096, + "verb", + "single-verb", + 14663676516964431047, "TEXT", - "#/texts/53", + "#/texts/52", 1.0, - 8087581502811400566, - 7573439973442034769, + 8106342531491540207, + 1091864981917538389, 18446744073709551615, 18446744073709551615, - 9, - 28, - 9, - 28, - 3, - 5, + 391, + 398, + 391, + 398, + 69, + 70, true, - "Performance results", - "Performance results" + "overlap", + "overlap" ], [ - "term", - "single-term", - 4577067829072175096, + "verb", + "compound-verb", + 17999848460847860039, "TEXT", - "#/texts/53", + "#/texts/8", 1.0, - 13356790934987174038, - 18420992769499992239, + 2604368229451749231, + 5954729608874990660, 18446744073709551615, 18446744073709551615, - 37, - 60, - 37, - 60, - 7, - 10, + 1416, + 1437, + 1416, + 1437, + 238, + 241, true, - "template specific model", - "template specific model" + "is currently deployed", + "is currently deployed" ], [ - "term", - "single-term", - 4577067829072175096, + "verb", + "compound-verb", + 17999848460847860039, "TEXT", - "#/texts/53", + "#/texts/8", 1.0, - 9872729223299515659, - 7908640068811257205, + 9791407429604398000, + 14740221032007164243, 18446744073709551615, 18446744073709551615, - 68, - 94, - 68, - 94, - 12, - 16, + 1281, + 1292, + 1281, + 1292, + 216, + 218, true, - "Physical Review B journals", - "Physical Review B journals" + "obtain very", + "obtain very" ], [ - "term", - "single-term", - 4577067829072175096, + "verb", + "compound-verb", + 17999848460847860039, "TEXT", - "#/texts/53", + "#/texts/8", 1.0, - 5497358094214601811, - 7433163521566214246, + 5690225847229166303, + 18320034715902341983, 18446744073709551615, 18446744073709551615, - 100, - 116, - 100, - 116, - 18, - 20, + 1115, + 1129, + 1115, + 1129, + 188, + 190, true, - "confusion matrix", - "confusion matrix" + "is accelerated", + "is accelerated" ], [ - "term", - "single-term", - 4577067829072175096, + "verb", + "single-verb", + 17999848460847860039, "TEXT", - "#/texts/53", + "#/texts/8", 1.0, - 1488936167715046380, - 16637143750883657942, + 8106398484416916345, + 5707744688882101082, 18446744073709551615, 18446744073709551615, - 132, - 146, - 132, - 146, - 22, - 24, + 1358, + 1365, + 1358, + 1365, + 229, + 230, true, - "huge imbalance", - "huge imbalance" + "content", + "content" ], [ "term", "single-term", - 4577067829072175096, + 17999848460847860039, "TEXT", - "#/texts/53", + "#/texts/8", 1.0, - 5748925367544727060, - 15357132638157717228, + 2703018679320364082, + 15916371892854536925, 18446744073709551615, 18446744073709551615, - 169, - 179, - 169, - 179, - 28, - 30, + 1366, + 1376, + 1366, + 1376, + 230, + 231, true, - "text cells", - "text cells" + "conversion", + "conversion" ], [ - "term", - "single-term", - 4577067829072175096, + "conn", + "single-conn", + 14663676516964431047, "TEXT", - "#/texts/53", + "#/texts/52", 1.0, - 220880076010336098, - 14991640362132342656, + 3534222425899983491, + 17228377976260951108, 18446744073709551615, 18446744073709551615, - 185, - 201, - 185, - 201, - 31, - 33, + 281, + 290, + 281, + 290, + 46, + 48, true, - "different labels", - "different labels" + "with each", + "with each" ], [ - "term", - "single-term", - 4577067829072175096, + "conn", + "single-conn", + 14663676516964431047, "TEXT", - "#/texts/53", + "#/texts/52", 1.0, - 4360412890788129778, - 6086964040649348468, + 8106398107541152403, + 9925559447860985794, 18446744073709551615, 18446744073709551615, - 216, - 232, - 216, - 232, - 37, - 39, + 314, + 321, + 314, + 321, + 54, + 56, true, - "ensemble machine", - "ensemble machine" + "in this", + "in this" ], [ - "term", - "single-term", - 4577067829072175096, + "conn", + "single-conn", + 14663676516964431047, "TEXT", - "#/texts/53", + "#/texts/52", 1.0, - 9628232334734286437, - 15559530413649010038, + 15441160910541485678, + 15053982241221563648, 18446744073709551615, 18446744073709551615, - 275, - 288, - 275, - 288, - 46, - 48, + 375, + 377, + 375, + 377, + 66, + 67, true, - "high accuracy", - "high accuracy" + "on", + "on" ], [ - "term", - "single-term", - 4577067829072175096, + "conn", + "single-conn", + 14663676516964431047, "TEXT", - "#/texts/53", + "#/texts/52", 1.0, - 5579859536360440221, - 12384760726355576022, + 8106477878453677833, + 420597325771448632, 18446744073709551615, 18446744073709551615, - 298, - 309, - 298, - 309, - 50, - 52, + 378, + 385, + 378, + 385, + 67, + 68, true, - "label types", - "label types" + "whether", + "whether" ], [ - "term", - "single-term", - 4577067829072175096, + "conn", + "single-conn", + 14663676516964431047, "TEXT", - "#/texts/53", + "#/texts/52", 1.0, - 16381206574973295053, - 15664074499384566316, + 15441160910541485865, + 15053982239329549650, 18446744073709551615, 18446744073709551615, - 159, - 165, - 159, - 165, - 26, - 27, + 268, + 270, + 268, + 270, + 44, + 45, true, - "number", - "number" + "to", + "to" ], [ "term", "single-term", - 4577067829072175096, + 3749305213430885773, "TEXT", - "#/texts/53", + "#/texts/12", 1.0, - 329104159157898666, - 7979932887321468479, + 6167933651658664291, + 16598695715373476800, 18446744073709551615, 18446744073709551615, - 207, - 212, - 207, - 212, - 35, - 36, + 74, + 83, + 74, + 83, + 15, + 16, true, - "usage", - "usage" + "documents", + "documents" ], [ - "term", - "single-term", - 4577067829072175096, + "geoloc", + "country", + 11222145795862225841, "TEXT", - "#/texts/53", + "#/texts/10", 1.0, - 8106464574531629743, - 13092511743146000891, + 17782056979161528852, + 4690987004959947827, 18446744073709551615, 18446744073709551615, - 242, - 249, - 242, - 249, - 40, - 41, + 277, + 291, + 277, + 291, + 56, + 58, true, - "methods", - "methods" + "United Kingdom", + "United Kingdom" ], [ - "verb", - "compound-verb", - 4577067829072175096, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/53", + "#/texts/12", 1.0, - 12736124800502880399, - 3048726189598552717, + 6182654480499682241, + 9496359210917921791, 18446744073709551615, 18446744073709551615, - 250, - 267, - 250, - 267, - 41, - 44, + 61, + 70, + 61, + 70, + 13, + 14, true, - "allows to achieve", - "allows to achieve" + "ingestion", + "ingestion" ], [ - "verb", - "single-verb", - 4577067829072175096, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/53", + "#/texts/12", 1.0, - 15927123199600624159, - 11830974991863511971, + 329104161668023890, + 13427899720650205831, 18446744073709551615, 18446744073709551615, - 117, - 127, - 117, - 127, - 20, - 21, + 8, + 13, + 8, + 13, + 2, + 3, true, - "highlights", - "highlights" + "paper", + "paper" ], [ - "verb", - "single-verb", - 4577067829072175096, + "conn", + "single-conn", + 11222145795862225841, "TEXT", - "#/texts/53", + "#/texts/10", 1.0, - 14639581097006750428, - 17977442740486581742, + 15441160910541485678, + 12820303021843804862, 18446744073709551615, 18446744073709551615, - 233, - 241, - 233, - 241, - 39, - 40, + 211, + 213, + 211, + 213, + 42, + 43, true, - "learning", - "learning" + "on", + "on" ], [ - "conn", - "single-conn", - 4577067829072175096, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/53", + "#/texts/12", 1.0, - 8106397727991264470, - 13939727220022896426, + 239702429653970881, + 11301722290661797635, 18446744073709551615, 18446744073709551615, - 29, - 36, - 29, - 36, - 5, - 7, + 870, + 890, + 870, + 890, + 149, + 152, true, - "for the", - "for the" + "single task failures", + "single task failures" ], [ - "conn", - "single-conn", - 4577067829072175096, + "sentence", + "", + 16923207262044929933, "TEXT", - "#/texts/53", + "#/texts/11", 1.0, - 16381206565712212855, - 15527423972997370423, + 18073096319598857596, + 14789900833203243228, 18446744073709551615, 18446744073709551615, - 61, - 67, - 61, - 67, - 10, - 12, + 1346, + 1532, + 1346, + 1532, + 228, + 267, true, - "of the", - "of the" + "Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context.", + "Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context." ], [ - "conn", - "single-conn", - 4577067829072175096, + "term", + "single-term", + 11222145795862225841, "TEXT", - "#/texts/53", + "#/texts/10", 1.0, - 2011002864325523456, - 16665978214615422828, + 329104161667992688, + 12637076450269003134, 18446744073709551615, 18446744073709551615, - 147, - 158, - 147, - 158, - 24, - 26, + 319, + 324, + 319, + 324, + 69, + 70, true, - "between the", - "between the" + "pages", + "pages" ], [ - "conn", - "single-conn", - 4577067829072175096, + "term", + "single-term", + 11222145795862225841, "TEXT", - "#/texts/53", + "#/texts/10", 1.0, - 15441160910541485670, - 10632466984953712528, + 12178341415895650394, + 1738736899274670576, 18446744073709551615, 18446744073709551615, - 166, - 168, - 166, - 168, - 27, - 28, + 312, + 315, + 312, + 315, + 66, + 67, true, - "of", - "of" + "USA", + "USA" ], [ - "conn", - "single-conn", - 4577067829072175096, + "term", + "single-term", + 11222145795862225841, "TEXT", - "#/texts/53", + "#/texts/10", 1.0, - 389609625618037948, - 18050712937266565062, + 15441160910541487804, + 12820302595509217913, 18446744073709551615, 18446744073709551615, - 180, - 184, - 180, - 184, - 30, - 31, + 308, + 310, + 308, + 310, + 64, + 65, true, - "with", - "with" + "NY", + "NY" ], [ - "conn", - "single-conn", - 4577067829072175096, + "term", + "single-term", + 11222145795862225841, "TEXT", - "#/texts/53", + "#/texts/10", 1.0, - 15441160910541485670, - 10632466984953723750, + 12178341415896228980, + 1738757751107532979, 18446744073709551615, 18446744073709551615, - 213, - 215, - 213, - 215, - 36, - 37, + 293, + 296, + 293, + 296, + 59, + 60, true, - "of", - "of" + "ACM", + "ACM" ], [ - "conn", - "single-conn", - 4577067829072175096, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/53", + "#/texts/12", 1.0, - 14814149446809805987, - 2376885852812773633, + 17671651082391847352, + 4285231550406356710, 18446744073709551615, 18446744073709551615, - 289, - 297, - 289, - 297, - 48, - 50, + 812, + 831, + 812, + 831, + 141, + 143, true, - "over all", - "over all" + "strong dependencies", + "strong dependencies" ], [ - "conn", - "single-conn", + "term", + "single-term", 4577067829072175096, "TEXT", "#/texts/53", 1.0, - 15441160910541485865, - 10632466981388317765, + 9628232334734286437, + 15559530413649010038, 18446744073709551615, 18446744073709551615, - 257, - 259, - 257, - 259, - 42, - 43, + 275, + 288, + 275, + 288, + 46, + 48, true, - "to", - "to" + "high accuracy", + "high accuracy" ], [ - "numval", - "ival", - 2569392033451362672, + "term", + "single-term", + 4577067829072175096, "TEXT", - "#/texts/54", + "#/texts/53", 1.0, - 17767354399704235160, - 13994996428325642210, + 5579859536360440221, + 12384760726355576022, 18446744073709551615, 18446744073709551615, - 443, - 444, - 443, - 444, - 78, - 79, + 298, + 309, + 298, + 309, + 50, + 52, true, - "0", - "0" + "label types", + "label types" ], [ - "numval", - "ival", - 2569392033451362672, + "term", + "single-term", + 11222145795862225841, "TEXT", - "#/texts/54", + "#/texts/10", 1.0, - 17767354399704235157, - 13994996428928834278, + 16381206531301571445, + 12510416255984707889, 18446744073709551615, 18446744073709551615, - 446, - 447, - 446, - 447, - 80, - 81, + 269, + 275, + 269, + 275, + 54, + 55, true, - "5", - "5" + "London", + "London" ], [ - "parenthesis", - "round brackets", - 2569392033451362672, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 5763721985249138201, - 11333613653010201493, + 4253886245479866309, + 90465651070093109, 18446744073709551615, 18446744073709551615, - 726, - 746, - 726, - 746, - 129, - 135, + 773, + 799, + 773, + 799, + 136, + 139, true, - "(made with a camera)", - "(made with a camera)" + "proper resource management", + "proper resource management" ], [ - "expression", - "word-concatenation", - 2569392033451362672, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 2772095701715059387, - 18429532044600751065, + 17000938524684089439, + 12283057491291530260, 18446744073709551615, 18446744073709551615, - 99, - 109, - 99, - 109, - 16, - 17, + 742, + 755, + 742, + 755, + 129, + 131, true, - "dual-class", - "dual-class" + "many benefits", + "many benefits" ], [ - "expression", - "word-concatenation", - 2569392033451362672, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 329104162326555074, - 15570664097727008132, + 9920918086675479799, + 11129371561875838665, 18446744073709551615, 18446744073709551615, - 460, - 465, - 460, - 465, - 84, - 85, + 689, + 725, + 689, + 725, + 122, + 125, true, - "R-CNN", - "R-CNN" + "asynchronous communication protocols", + "asynchronous communication protocols" ], [ - "expression", - "word-concatenation", - 2569392033451362672, + "term", + "single-term", + 11222145795862225841, "TEXT", - "#/texts/54", + "#/texts/10", 1.0, - 329104162326555074, - 15570664097727047898, + 16381206562442326159, + 10586055992353118926, 18446744073709551615, 18446744073709551615, - 815, - 820, - 815, - 820, - 145, - 146, + 249, + 255, + 249, + 255, + 49, + 50, true, - "R-CNN", - "R-CNN" + "August", + "August" ], [ - "expression", - "wtoken-concatenation", - 2569392033451362672, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 16381206533950151485, - 198566132787583629, + 7501920923775581134, + 5285457240038734782, 18446744073709551615, 18446744073709551615, - 278, - 284, - 278, - 284, - 47, - 48, + 567, + 584, + 567, + 584, + 103, + 105, true, - "YOLOv2", - "YOLOv2" + "new microservices", + "new microservices" ], [ - "expression", - "wtoken-concatenation", - 2569392033451362672, + "term", + "single-term", + 11222145795862225841, "TEXT", - "#/texts/54", + "#/texts/10", 1.0, - 15441160910541480158, - 10477275210029982213, + 12178341415896253943, + 1738717073979978820, 18446744073709551615, 18446744073709551615, - 400, - 402, - 400, - 402, - 69, - 70, + 157, + 160, + 157, + 160, + 32, + 33, true, - "F1", - "F1" + "KDD", + "KDD" ], [ - "expression", - "wtoken-concatenation", - 2569392033451362672, + "term", + "enum-term-mark-4", + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 329104147618556708, - 15461264859114081015, + 11674491770136657522, + 11680961660123138230, 18446744073709551615, 18446744073709551615, - 412, - 417, - 412, - 417, - 72, - 73, + 1333, + 1344, + 1333, + 1344, + 224, + 227, true, - "98.7%", - "98.7%" + "JSON or XML", + "JSON or XML" ], [ - "sentence", - "", - 2569392033451362672, + "term", + "single-term", + 11222145795862225841, "TEXT", - "#/texts/54", + "#/texts/10", 1.0, - 784428348664963687, - 2735229758044296436, + 14650948201816210252, + 2694576768786644093, 18446744073709551615, 18446744073709551615, - 33, - 133, - 33, - 133, - 6, - 20, + 298, + 306, + 298, + 306, + 61, + 63, true, - "The corresponding recall and precision are then computed for this dual-class classification problem.", - "The corresponding recall and precision are then computed for this dual-class classification problem." + "New York", + "New York" ], [ - "sentence", - "", - 2569392033451362672, + "term", + "single-term", + 11222145795862225841, "TEXT", - "#/texts/54", + "#/texts/10", 1.0, - 3927917834152176938, - 12569591881522562313, + 17782056979161528852, + 4690987004959947827, 18446744073709551615, 18446744073709551615, - 134, - 273, - 134, - 273, - 20, - 46, + 277, + 291, + 277, + 291, + 56, + 58, true, - "In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence.", - "In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence." + "United Kingdom", + "United Kingdom" ], [ - "sentence", - "", - 2569392033451362672, + "term", + "single-term", + 11222145795862225841, "TEXT", - "#/texts/54", + "#/texts/10", 1.0, - 3956872905292683881, - 2752157999599851583, + 9639847902089872401, + 15642530745605263941, 18446744073709551615, 18446744073709551615, - 274, - 445, - 274, - 445, + 236, + 247, + 236, + 247, 46, - 80, + 48, true, - "For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0.", - "For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0." + "Data Mining", + "Data Mining" ], [ - "sentence", - "", - 2569392033451362672, + "parenthesis", + "round brackets", + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 17055744903410885404, - 12761534484507818149, + 5879944210728656410, + 9673170177479615330, 18446744073709551615, 18446744073709551615, - 449, - 556, - 449, - 556, - 82, - 101, + 1432, + 1473, + 1432, + 1473, + 246, + 257, true, - "The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers.", - "The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers." + "(documents, images, authors, tables, etc)", + "(documents, images, authors, tables, etc)" ], [ - "sentence", - "", - 2569392033451362672, + "conn", + "single-conn", + 4577067829072175096, "TEXT", - "#/texts/54", + "#/texts/53", 1.0, - 14420414998277701657, - 3037581738866623003, + 14814149446809805987, + 2376885852812773633, 18446744073709551615, 18446744073709551615, - 557, - 667, - 557, - 667, - 101, - 119, + 289, + 297, + 289, + 297, + 48, + 50, true, - "We believe this originates from the selective search algorithm which is used to determine regions of interest.", - "We believe this originates from the selective search algorithm which is used to determine regions of interest." + "over all", + "over all" ], [ - "sentence", - "", - 2569392033451362672, + "verb", + "single-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 14678097696923692160, - 11491609575789433741, + 16381206563350835754, + 15338244529159273971, 18446744073709551615, 18446744073709551615, - 668, - 773, - 668, - 773, - 119, - 139, + 262, + 268, + 262, + 268, + 48, + 49, true, - "The images we feed it are not typical photographic images (made with a camera) but layout visualisations.", - "The images we feed it are not typical photographic images (made with a camera) but layout visualisations." + "called", + "called" ], [ - "sentence", - "", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 1336288703622935510, - 15435580690586079242, + 15441160910541485865, + 4857876500092426670, 18446744073709551615, 18446744073709551615, - 774, - 867, - 774, - 867, - 139, - 156, + 767, + 769, + 767, + 769, + 134, + 135, true, - "The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", - "The selective search algorithm in Faster R-CNN might not be optimal for such type of objects." + "to", + "to" ], [ - "term", - "enum-term-mark-2", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 11037453576911667853, - 12443097430245333421, + 15441160910541485865, + 4857876500092547312, 18446744073709551615, 18446744073709551615, - 51, - 71, - 51, - 71, - 8, - 11, + 616, + 618, + 616, + 618, + 111, + 112, true, - "recall and precision", - "recall and precision" + "to", + "to" ], [ - "term", - "enum-term-mark-2", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 767578358531619449, - 1472685584560725507, + 15441160910541485865, + 4857876500092543847, 18446744073709551615, 18446744073709551615, - 204, - 224, - 204, - 224, - 35, - 38, + 556, + 558, + 556, + 558, + 101, + 102, true, - "precision and recall", - "precision and recall" + "to", + "to" ], [ "term", - "enum-term-mark-2", - 2569392033451362672, + "single-term", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 767578358531619449, - 1472685584560746355, + 14315066823203278267, + 5715163301899035549, 18446744073709551615, 18446744073709551615, - 527, - 547, - 527, - 547, - 96, - 99, + 483, + 500, + 483, + 500, + 90, + 92, true, - "precision and recall", - "precision and recall" + "complex pipelines", + "complex pipelines" ], [ - "term", - "single-term", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 7737036869804521677, - 431221867393766623, + 15441160910541485865, + 4857876500092539243, 18446744073709551615, 18446744073709551615, - 37, - 57, - 37, - 57, - 7, - 9, + 501, + 503, + 501, + 503, + 92, + 93, true, - "corresponding recall", - "corresponding recall" + "to", + "to" ], [ - "term", - "single-term", + "expression", + "word-concatenation", 2569392033451362672, "TEXT", "#/texts/54", 1.0, - 11075783049363921732, - 14381818982688268241, + 329104162326555074, + 15570664097727047898, 18446744073709551615, 18446744073709551615, - 99, - 132, - 99, - 132, - 16, - 19, + 815, + 820, + 815, + 820, + 145, + 146, true, - "dual-class classification problem", - "dual-class classification problem" + "R-CNN", + "R-CNN" ], [ "term", "single-term", - 2569392033451362672, + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 8581372359543855162, - 10333944193716453687, + 7904009099850099728, + 6069321302342300412, 18446744073709551615, 18446744073709551615, - 151, - 166, - 151, - 166, - 25, - 27, + 427, + 439, + 427, + 439, + 78, + 81, true, - "fair comparison", - "fair comparison" + "own REST API", + "own REST API" ], [ "term", "single-term", - 2569392033451362672, + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 16904814960714419182, - 7305130667909903014, + 3812062755894317903, + 5752895239615977865, 18446744073709551615, 18446744073709551615, - 218, - 232, - 218, - 232, - 37, - 39, + 359, + 374, + 359, + 374, + 66, + 68, true, - "recall metrics", - "recall metrics" + "main components", + "main components" ], [ "term", "single-term", - 2569392033451362672, + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 5859613489047657680, - 4575208165015881094, + 12638008641667971393, + 14590037144173376663, 18446744073709551615, 18446744073709551615, - 392, - 408, - 392, - 408, - 68, - 71, + 269, + 294, + 269, + 294, + 49, + 52, true, - "maximum F1 score", - "maximum F1 score" + "Corpus Conversion Service", + "Corpus Conversion Service" ], [ "term", "single-term", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 4874473477449861741, - 7312361899298084317, + 8106398484416909789, + 17530798545720977035, 18446744073709551615, 18446744073709551615, - 423, - 439, - 423, - 439, - 75, - 77, + 1524, + 1531, + 1524, + 1531, + 265, + 266, true, - "confidence level", - "confidence level" + "context", + "context" ], [ "term", "single-term", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 6927970521128218953, - 6482828839300817669, + 6167933651658664291, + 3744443950142863448, 18446744073709551615, 18446744073709551615, - 453, - 472, - 453, - 472, - 83, - 86, + 1495, + 1504, + 1495, + 1504, + 260, + 261, true, - "Faster R-CNN method", - "Faster R-CNN method" + "documents", + "documents" ], [ "term", "single-term", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 16904814894749305757, - 5737021334745277149, + 16381206513098478539, + 8569522873910347573, 18446744073709551615, 18446744073709551615, - 541, - 555, - 541, - 555, - 98, - 100, + 1461, + 1467, + 1461, + 1467, + 253, + 254, true, - "recall numbers", - "recall numbers" + "tables", + "tables" ], [ - "term", - "single-term", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 4349380732135272089, - 16458298459980248480, + 15441160910541485865, + 4857876500092540787, 18446744073709551615, 18446744073709551615, - 593, - 619, - 593, - 619, - 107, - 110, + 474, + 476, + 474, + 476, + 88, + 89, true, - "selective search algorithm", - "selective search algorithm" + "to", + "to" ], [ - "term", - "single-term", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 2351536754407393176, - 12969141846351017301, + 8106478041484051995, + 2311188108209868134, 18446744073709551615, 18446744073709551615, - 698, - 725, - 698, - 725, - 126, - 129, + 681, + 688, + 681, + 688, + 121, + 122, true, - "typical photographic images", - "typical photographic images" + "through", + "through" ], [ - "term", - "single-term", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 18245848170103364623, - 3851473044777784430, + 15441160910541480354, + 4857876037199396344, 18446744073709551615, 18446744073709551615, - 751, - 772, - 751, - 772, - 136, - 138, + 607, + 609, + 607, + 609, + 109, + 110, true, - "layout visualisations", - "layout visualisations" + "In", + "In" ], [ - "term", - "single-term", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 4349380732135272089, - 16458298459980260537, + 752127337293867046, + 13713074507145666172, 18446744073709551615, 18446744073709551615, - 778, - 804, - 778, - 804, - 140, - 143, + 585, + 596, + 585, + 596, + 105, + 107, true, - "selective search algorithm", - "selective search algorithm" + "against the", + "against the" ], [ "term", "single-term", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 5327781098613689502, - 14889487484335627658, + 8106397759446161562, + 17038239979594063466, 18446744073709551615, 18446744073709551615, - 808, - 820, - 808, - 820, - 144, - 146, + 1452, + 1459, + 1452, + 1459, + 251, + 252, true, - "Faster R-CNN", - "Faster R-CNN" + "authors", + "authors" ], [ "term", "single-term", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 6165459236568103333, - 2812369711373771464, + 16381206560620045048, + 15910167584621803731, 18446744073709551615, 18446744073709551615, - 846, - 855, - 846, - 855, - 151, - 153, + 1444, + 1450, + 1444, + 1450, + 249, + 250, true, - "such type", - "such type" + "images", + "images" ], [ - "term", - "single-term", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 6184954595655792282, - 18387321712019319773, + 15441160910541486989, + 4857876114906482442, 18446744073709551615, 18446744073709551615, - 62, - 71, - 62, - 71, - 10, - 11, + 420, + 422, + 420, + 422, + 76, + 77, true, - "precision", - "precision" + "by", + "by" ], [ "term", "single-term", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 329104161571401725, - 15575423851065642052, + 6167933651658664291, + 3744443950142859841, 18446744073709551615, 18446744073709551615, - 137, - 142, - 137, - 142, - 21, - 22, + 1433, + 1442, + 1433, + 1442, + 247, + 248, true, - "order", - "order" + "documents", + "documents" ], [ "term", "single-term", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 14814151113413570861, - 12729204908894192489, + 14652282388618227426, + 14047491818249905874, 18446744073709551615, 18446744073709551615, - 178, - 186, - 178, - 186, - 30, - 31, + 1423, + 1431, + 1423, + 1431, + 245, + 246, true, - "networks", - "networks" + "concepts", + "concepts" ], [ "term", "single-term", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 6184954595655792282, - 18387321712019245881, + 12178341415895541463, + 14794406103722084656, 18446744073709551615, 18446744073709551615, - 204, - 213, - 204, - 213, - 35, - 36, + 1341, + 1344, + 1341, + 1344, + 226, + 227, true, - "precision", - "precision" + "XML", + "XML" ], [ "term", "single-term", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 16381206521526353544, - 16408450721845756506, + 389609625541450799, + 476546803986815687, 18446744073709551615, 18446744073709551615, - 238, - 244, - 238, - 244, - 40, - 41, + 1333, + 1337, + 1333, + 1337, + 224, + 225, true, - "regard", - "regard" + "JSON", + "JSON" ], [ - "term", - "single-term", - 2569392033451362672, + "verb", + "single-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 2702871111219879214, - 2512541272008941381, + 14639581097006750428, + 4101766079705362430, 18446744073709551615, 18446744073709551615, - 262, - 272, - 262, - 272, - 44, - 45, + 226, + 234, + 226, + 234, + 42, + 43, true, - "confidence", - "confidence" + "learning", + "learning" ], [ - "term", - "single-term", - 2569392033451362672, + "verb", + "single-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 16381206533950151485, - 198566132787583629, + 12178341415895601584, + 5841349058796574805, 18446744073709551615, 18446744073709551615, - 278, - 284, - 278, - 284, - 47, - 48, + 204, + 207, + 204, + 207, + 39, + 40, true, - "YOLOv2", - "YOLOv2" + "has", + "has" ], [ - "term", - "single-term", - 2569392033451362672, + "verb", + "compound-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 16381206521531485437, - 16408606466535231414, + 7780068026497460305, + 562602692899396130, 18446744073709551615, 18446744073709551615, - 305, - 311, - 305, - 311, - 52, - 53, + 760, + 772, + 760, + 772, + 133, + 136, true, - "recall", - "recall" + "allows to do", + "allows to do" ], [ - "term", - "single-term", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 6184954595655792282, - 18387321712019270016, + 13852121904094090198, + 14590995273314953312, 18446744073709551615, 18446744073709551615, - 330, - 339, - 330, - 339, - 57, - 58, + 376, + 389, + 376, + 389, + 69, + 72, true, - "precision", - "precision" + "Each of these", + "Each of these" ], [ - "term", - "single-term", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 2702871111219879214, - 2512541272008894019, + 15441160910541486538, + 4857876073127839401, 18446744073709551615, 18446744073709551615, - 355, - 365, - 355, - 365, - 62, - 63, + 351, + 353, + 351, + 353, + 64, + 65, true, - "confidence", - "confidence" + "in", + "in" ], [ - "term", - "single-term", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 6184954595655792282, - 18387321712019273929, + 15441160910541485670, + 4857876500911649386, 18446744073709551615, 18446744073709551615, - 527, - 536, - 527, - 536, - 96, - 97, + 324, + 326, + 324, + 326, + 61, + 62, true, - "precision", - "precision" + "of", + "of" ], [ - "term", - "single-term", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 8106478448964548679, - 12701825139671272799, + 389609625620237736, + 3672771697496836670, 18446744073709551615, 18446744073709551615, - 647, - 654, - 647, - 654, - 115, - 116, + 315, + 319, + 315, + 319, + 58, + 60, true, - "regions", - "regions" + "of a", + "of a" ], [ - "term", - "single-term", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 14637953883246475850, - 7956817731702541219, + 12178341415895623120, + 5842693827432037020, 18446744073709551615, 18446744073709551615, - 658, - 666, - 658, - 666, - 117, - 118, + 311, + 314, + 311, + 314, + 57, + 58, true, - "interest", - "interest" + "out", + "out" ], [ - "term", - "single-term", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 16381206560620045048, - 3784940468244328560, + 14638855195670894879, + 12124056112419286236, 18446744073709551615, 18446744073709551615, - 672, - 678, - 672, - 678, - 120, - 121, + 186, + 194, + 186, + 194, + 35, + 37, true, - "images", - "images" + "which at", + "which at" ], [ "term", @@ -46704,19 +47762,19 @@ "TEXT", "#/texts/54", 1.0, - 16381206563351041630, - 1952046848832586628, + 5327781098613689502, + 14889487484335627658, 18446744073709551615, 18446744073709551615, - 739, - 745, - 739, - 745, - 133, - 134, + 808, + 820, + 808, + 820, + 144, + 146, true, - "camera", - "camera" + "Faster R-CNN", + "Faster R-CNN" ], [ "term", @@ -46725,775 +47783,754 @@ "TEXT", "#/texts/54", 1.0, - 8106342034010873556, - 18238380662499221230, + 6165459236568103333, + 2812369711373771464, 18446744073709551615, 18446744073709551615, - 859, - 866, - 859, - 866, - 154, - 155, + 846, + 855, + 846, + 855, + 151, + 153, true, - "objects", - "objects" + "such type", + "such type" ], [ - "verb", - "compound-verb", - 2569392033451362672, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 11891944663675020942, - 13358251629780069780, + 15684933964106580812, + 12993940953903139083, 18446744073709551615, 18446744073709551615, - 72, - 89, - 72, - 89, - 11, - 14, + 208, + 225, + 208, + 225, + 40, + 42, true, - "are then computed", - "are then computed" + "trainable machine", + "trainable machine" ], [ - "verb", - "compound-verb", - 2569392033451362672, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 6183880245133195430, - 11375315636474919011, + 4066887494406769292, + 15944572553884562120, 18446744073709551615, 18446744073709551615, - 312, - 321, - 312, - 321, - 53, - 55, + 110, + 131, + 110, + 131, + 20, + 23, true, - "goes down", - "goes down" + "structured data files", + "structured data files" ], [ - "verb", - "compound-verb", - 2569392033451362672, + "term", + "single-term", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 2694830089385977061, - 235012322887490211, + 3741141293805179509, + 9675794815446093236, 18446744073709551615, 18446744073709551615, - 366, - 378, - 366, - 378, - 63, - 65, + 40, + 55, + 40, + 55, + 9, + 11, true, - "is increased", - "is increased" + "first component", + "first component" ], [ - "verb", - "compound-verb", - 2569392033451362672, + "sentence", + "", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 7743689594175537908, - 4826765463732452457, + 13863701154380798624, + 5607686807400793153, 18446744073709551615, 18446744073709551615, - 473, - 502, - 473, - 502, - 86, - 91, + 607, + 891, + 607, + 891, + 109, + 153, true, - "is also performing quite well", - "is also performing quite well" + "In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures.", + "In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures." ], [ - "verb", - "compound-verb", - 2569392033451362672, + "sentence", + "", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 14568989124066371477, - 1068965357575472568, + 210366145485171616, + 10779316463372138244, 18446744073709551615, 18446744073709551615, - 508, - 520, - 508, - 520, - 93, - 95, + 441, + 606, + 441, + 606, + 82, + 109, true, - "has slightly", - "has slightly" + "This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform.", + "This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform." ], [ - "verb", - "compound-verb", - 2569392033451362672, + "sentence", + "", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 16534452113033443144, - 7065494418204761025, + 1805453063572196406, + 15284543814810892665, 18446744073709551615, 18446744073709551615, - 626, - 646, - 626, - 646, - 111, - 115, + 376, + 440, + 376, + 440, + 69, + 82, true, - "is used to determine", - "is used to determine" + "Each of these microservices can be consumed by its own REST API.", + "Each of these microservices can be consumed by its own REST API." ], [ - "verb", - "compound-verb", - 2569392033451362672, + "sentence", + "", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 8106397797831668975, - 18220343756781523026, + 12064124790943537514, + 9632597734224986436, 18446744073709551615, 18446744073709551615, - 690, - 697, - 690, - 697, - 124, - 126, + 247, + 375, + 247, + 375, + 45, + 69, true, - "are not", - "are not" + "This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components.", + "This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components." ], [ - "verb", - "single-verb", - 2569392033451362672, + "sentence", + "", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 15441160910541486853, - 10477289391110259759, + 2369217517028793827, + 11890147189063173430, 18446744073709551615, 18446744073709551615, - 146, - 148, - 146, - 148, - 23, + 133, + 246, + 133, + 246, 24, + 45, true, - "do", - "do" + "The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms.", + "The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms." ], [ - "verb", - "single-verb", - 2569392033451362672, + "sentence", + "", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 14814150880980441564, - 5851167619774412175, + 5306542014856411002, + 14493189109864111156, 18446744073709551615, 18446744073709551615, - 191, - 199, - 191, - 199, - 33, - 34, + 0, + 132, + 0, + 132, + 0, + 24, true, - "optimise", - "optimise" + "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files.", + "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files." ], [ - "verb", - "single-verb", - 2569392033451362672, + "parenthesis", + "round brackets", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 6184954633443293966, - 15964917443528178420, + 329104053210116957, + 4933919093561563747, 18446744073709551615, 18446744073709551615, - 252, - 261, - 252, - 261, - 43, - 44, + 295, + 300, + 295, + 300, + 52, + 55, true, - "predicted", - "predicted" + "(CCS)", + "(CCS)" ], [ - "verb", - "single-verb", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 8106342033696543838, - 18232753974273180210, + 389609625700764258, + 3654402694655081504, 18446744073709551615, 18446744073709551615, - 288, - 295, - 288, - 295, - 49, - 50, + 171, + 175, + 171, + 175, + 31, + 33, true, - "observe", - "observe" + "as a", + "as a" ], [ - "verb", - "single-verb", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 389609625699055541, - 1239396878369861980, + 15441160910541485670, + 4857876500911708855, 18446744073709551615, 18446744073709551615, - 340, - 344, - 340, - 344, - 58, - 59, + 168, + 170, + 168, + 170, + 30, + 31, true, - "goes", - "goes" + "of", + "of" ], [ - "verb", - "single-verb", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 6168826060228989821, - 9992741985777267919, + 389609625698622943, + 3653991554605439637, 18446744073709551615, 18446744073709551615, - 380, - 389, - 380, - 389, - 66, - 67, + 105, + 109, + 105, + 109, + 19, + 20, true, - "obtaining", - "obtaining" + "into", + "into" ], [ - "verb", - "single-verb", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 8106397860663428876, - 16964248131253901291, + 15441160910541485670, + 4857876500911665887, 18446744073709551615, 18446744073709551615, - 560, - 567, - 560, - 567, - 102, - 103, + 71, + 73, + 71, + 73, + 14, + 15, true, - "believe", - "believe" + "of", + "of" ], [ - "verb", - "single-verb", - 2569392033451362672, + "conn", + "single-conn", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 13983620007877845674, - 12955352785275452378, + 16381206566339127348, + 15334506191466791715, 18446744073709551615, 18446744073709551615, - 573, - 583, - 573, - 583, - 104, - 105, + 33, + 39, + 33, + 39, + 7, + 9, true, - "originates", - "originates" + "on the", + "on the" ], [ - "verb", - "single-verb", + "term", + "single-term", 2569392033451362672, "TEXT", "#/texts/54", 1.0, - 389609625697838276, - 1239402610955961201, + 8106342034010873556, + 18238380662499221230, 18446744073709551615, 18446744073709551615, - 682, - 686, - 682, - 686, - 122, - 123, + 859, + 866, + 859, + 866, + 154, + 155, true, - "feed", - "feed" + "objects", + "objects" ], [ - "verb", - "single-verb", - 2569392033451362672, + "conn", + "single-conn", + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 389609625618411791, - 1242783662433971802, + 15441160910541485865, + 9094674369429209693, 18446744073709551615, 18446744073709551615, - 727, - 731, - 727, - 731, - 130, - 131, + 1391, + 1393, + 1391, + 1393, + 238, + 239, true, - "made", - "made" + "to", + "to" ], [ "verb", - "single-verb", - 2569392033451362672, + "compound-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 15441160910541487001, - 10477275230049640367, + 8944903948136983007, + 3100279804263702344, 18446744073709551615, 18446744073709551615, - 831, - 833, - 831, - 833, - 148, - 149, + 666, + 680, + 666, + 680, + 119, + 121, true, - "be", - "be" + "are integrated", + "are integrated" ], [ "conn", "single-conn", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 16553501753141503400, - 15045481503517904124, - 18446744073709551615, - 18446744073709551615, - 834, - 845, - 834, - 845, - 149, - 151, - true, - "optimal for", - "optimal for" - ], - [ - "conn", - "single-conn", - 2569392033451362672, - "TEXT", - "#/texts/54", - 1.0, - 14637917333165224513, - 10908983268505451281, + 5748787292106066554, + 4405126515520980867, 18446744073709551615, 18446744073709551615, - 90, - 98, - 90, - 98, - 14, - 16, + 1513, + 1523, + 1513, + 1523, + 263, + 265, true, - "for this", - "for this" + "these into", + "these into" ], [ "conn", "single-conn", - 2569392033451362672, + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 15441160910541480354, - 10477275240531848205, + 8106396862006371970, + 13009000795262405678, 18446744073709551615, 18446744073709551615, - 134, - 136, - 134, - 136, - 20, - 21, + 0, + 7, + 0, + 7, + 0, + 2, true, - "In", - "In" + "In this", + "In this" ], [ "conn", "single-conn", - 2569392033451362672, + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 16381206565712212855, - 1966173897978141572, + 1993790582685692910, + 3267300742396852093, 18446744073709551615, 18446744073709551615, - 167, - 173, - 167, - 173, - 27, - 29, + 855, + 869, + 855, + 869, + 147, + 149, true, - "of the", - "of the" + "robust against", + "robust against" ], [ - "conn", - "single-conn", - 2569392033451362672, + "verb", + "single-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 389609625618037948, - 1242787593333487218, + 329104161505838030, + 13472448784809337111, 18446744073709551615, 18446744073709551615, - 233, - 237, - 233, - 237, - 39, - 40, + 836, + 841, + 836, + 841, + 144, + 145, true, - "with", - "with" + "makes", + "makes" ], [ - "conn", - "single-conn", - 2569392033451362672, + "verb", + "single-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 12178341415896108722, - 156309885604541418, + 5305301449677211216, + 8681985492456152514, 18446744073709551615, 18446744073709551615, - 274, - 277, - 274, - 277, - 46, - 47, + 801, + 811, + 801, + 811, + 140, + 141, true, - "For", - "For" + "eliminates", + "eliminates" ], [ "conn", "single-conn", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 14634130761162415388, - 14288776936577427060, + 16057368201763467386, + 216739275376297295, 18446744073709551615, 18446744073709551615, - 296, - 304, - 296, - 304, - 50, - 52, + 1484, + 1494, + 1484, + 1494, + 258, + 260, true, - "that the", - "that the" + "from these", + "from these" ], [ "conn", "single-conn", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 16381206568455155979, - 1869095877123778211, + 15441160910541485670, + 9094674364219303614, 18446744073709551615, 18446744073709551615, - 348, - 354, - 348, - 354, - 60, - 62, + 1420, + 1422, + 1420, + 1422, + 244, + 245, true, - "as the", - "as the" + "of", + "of" ], [ "conn", "single-conn", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 15441160910541485670, - 10477275256518274646, + 16381206557726458966, + 16025464328456099242, 18446744073709551615, 18446744073709551615, - 409, - 411, - 409, - 411, - 71, - 72, + 1399, + 1405, + 1399, + 1405, + 240, + 242, true, - "of", - "of" + "with a", + "with a" ], [ - "conn", - "single-conn", - 2569392033451362672, + "verb", + "compound-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 389609625700792947, - 1238530397841875604, + 15903921305565697154, + 7448795222128154927, 18446744073709551615, 18446744073709551615, - 418, - 422, - 418, - 422, + 404, + 419, + 404, + 419, 73, - 75, + 76, true, - "at a", - "at a" + "can be consumed", + "can be consumed" ], [ - "conn", - "single-conn", - 2569392033451362672, + "verb", + "compound-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 15441160910541485670, - 10477275256518295884, + 5237537207757377628, + 6864205941272212007, 18446744073709551615, 18446744073709551615, - 440, - 442, - 440, - 442, - 77, - 78, + 149, + 167, + 149, + 167, + 27, + 30, true, - "of", - "of" + "propose is thought", + "propose is thought" ], [ - "conn", - "single-conn", - 2569392033451362672, + "verb", + "compound-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 14637917359887717745, - 1544745809668392834, + 8568388710680918302, + 1832540720065690143, 18446744073709551615, 18446744073709551615, - 584, - 592, - 584, - 592, - 105, - 107, + 18, + 32, + 18, + 32, + 5, + 7, true, - "from the", - "from the" + "focus entirely", + "focus entirely" ], [ "conn", "single-conn", - 2569392033451362672, + 16923207262044929933, "TEXT", - "#/texts/54", + "#/texts/11", 1.0, - 15441160910541485670, - 10477275256518301113, + 8106478685702231057, + 1428751967817183488, 18446744073709551615, 18446744073709551615, - 655, - 657, - 655, - 657, - 116, - 117, + 1325, + 1332, + 1325, + 1332, + 222, + 224, true, - "of", - "of" + "such as", + "such as" ], [ - "conn", - "single-conn", - 2569392033451362672, + "verb", + "single-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 16381206557726458966, - 3788832551851477825, + 329104159209890617, + 13606843864069204390, 18446744073709551615, 18446744073709551615, - 732, + 733, 738, - 732, + 733, 738, - 131, - 133, + 127, + 128, true, - "with a", - "with a" + "gives", + "gives" ], [ - "conn", - "single-conn", - 2569392033451362672, + "verb", + "single-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 15441160910541486538, - 10477275205185242704, + 389609625618412480, + 3672855485569275414, 18446744073709551615, 18446744073709551615, - 805, - 807, - 805, - 807, - 143, - 144, + 619, + 623, + 619, + 623, + 112, + 113, true, - "in", - "in" + "make", + "make" ], [ - "conn", - "single-conn", - 2569392033451362672, + "verb", + "single-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 15441160910541485670, - 10477275256518310244, + 8106396517344986388, + 5854485364096172979, 18446744073709551615, 18446744073709551615, - 856, - 858, - 856, - 858, - 153, - 154, + 559, + 566, + 559, + 566, + 102, + 103, true, - "of", - "of" + "develop", + "develop" ], [ - "conn", - "single-conn", - 2569392033451362672, + "verb", + "single-verb", + 3749305213430885773, "TEXT", - "#/texts/54", + "#/texts/12", 1.0, - 15441160910541485865, - 10477275215095288698, + 16381206569317834029, + 15127822949531294179, 18446744073709551615, 18446744073709551615, - 143, - 145, - 143, - 145, - 22, - 23, + 546, + 552, + 546, + 552, + 99, + 100, true, - "to", - "to" + "allows", + "allows" ], [ - "conn", - "single-conn", + "verb", + "single-verb", 2569392033451362672, "TEXT", "#/texts/54", 1.0, - 16381206519425733256, - 370344314517327407, + 15441160910541487001, + 10477275230049640367, 18446744073709551615, 18446744073709551615, - 245, - 251, - 245, - 251, - 41, - 43, + 831, + 833, + 831, + 833, + 148, + 149, true, - "to the", - "to the" + "be", + "be" ], [ "conn", @@ -47502,880 +48539,1237 @@ "TEXT", "#/texts/54", 1.0, - 15441160910541485865, - 10477275215095322459, + 16553501753141503400, + 15045481503517904124, 18446744073709551615, 18446744073709551615, - 634, - 636, - 634, - 636, - 113, - 114, + 834, + 845, + 834, + 845, + 149, + 151, true, - "to", - "to" + "optimal for", + "optimal for" ], [ - "expression", - "wtoken-concatenation", - 14539041145469267811, + "verb", + "single-verb", + 16923207262044929933, "TEXT", - "#/texts/55", + "#/texts/11", 1.0, - 329104147725158908, - 18028372742913290156, + 12178341415895640485, + 14799993819747716499, 18446744073709551615, 18446744073709551615, - 0, - 5, - 0, - 5, - 0, - 1, + 1509, + 1512, + 1509, + 1512, + 262, + 263, true, - "3.4.3", - "3.4.3" + "put", + "put" ], [ - "sentence", - "", - 14539041145469267811, + "verb", + "single-verb", + 16923207262044929933, "TEXT", - "#/texts/55", + "#/texts/11", 1.0, - 7718133462399744108, - 17823198661305637266, + 6168374324562720592, + 185665609222125727, 18446744073709551615, 18446744073709551615, - 0, - 31, - 0, - 31, - 0, - 5, + 1474, + 1483, + 1474, + 1483, + 257, + 258, true, - "3.4.3 Template specific Models.", - "3.4.3 Template specific Models." + "extracted", + "extracted" ], [ - "sentence", - "", - 14539041145469267811, + "verb", + "single-verb", + 16923207262044929933, "TEXT", - "#/texts/55", + "#/texts/11", 1.0, - 10092485441396158590, - 1921679794908306598, + 389609625696287852, + 497722139527509467, 18446744073709551615, 18446744073709551615, - 32, - 159, - 32, - 159, - 5, - 27, + 1394, + 1398, + 1394, + 1398, + 239, + 240, true, - "The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template.", - "The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template." + "deal", + "deal" ], [ - "sentence", - "", - 14539041145469267811, + "verb", + "single-verb", + 16923207262044929933, "TEXT", - "#/texts/55", + "#/texts/11", 1.0, - 15812734743858168044, - 5104988671183900609, + 15441160910541486535, + 9094674367323996407, 18446744073709551615, 18446744073709551615, - 160, - 272, - 160, - 272, - 27, - 47, + 1383, + 1385, + 1383, + 1385, + 236, + 237, true, - "This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance.", - "This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance." + "is", + "is" ], [ - "sentence", - "", - 14539041145469267811, + "verb", + "single-verb", + 16923207262044929933, "TEXT", - "#/texts/55", + "#/texts/11", 1.0, - 551135567978634707, - 9805137836117614428, + 389609625621532398, + 554816074249930520, 18446744073709551615, 18446744073709551615, - 273, - 460, - 273, - 460, - 47, - 78, + 1358, + 1362, + 1358, + 1362, + 231, + 232, true, - "Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality.", - "Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality." + "need", + "need" ], [ "term", "single-term", - 14539041145469267811, + 16923207262044929933, "TEXT", - "#/texts/55", + "#/texts/11", 1.0, - 11907907877741579530, - 940094317087021995, + 11805624357079379862, + 2927818536118337064, 18446744073709551615, 18446744073709551615, - 6, - 30, - 6, - 30, - 1, - 4, + 1406, + 1419, + 1406, + 1419, + 242, + 244, true, - "Template specific Models", - "Template specific Models" + "large variety", + "large variety" ], [ "term", "single-term", - 14539041145469267811, + 16923207262044929933, "TEXT", - "#/texts/55", + "#/texts/11", 1.0, - 3663813169945470735, - 17139564151051767194, + 13018076357583391135, + 18265178771346204830, 18446744073709551615, 18446744073709551615, - 44, - 68, - 44, - 68, - 8, - 11, + 1365, + 1377, + 1365, + 1377, + 233, + 235, true, - "template specific models", - "template specific models" + "query engine", + "query engine" ], [ "term", "single-term", - 14539041145469267811, + 16923207262044929933, "TEXT", - "#/texts/55", + "#/texts/11", 1.0, - 16960645913427248555, - 7662141651479474713, + 14630472899120924944, + 15550065915551638064, 18446744073709551615, 18446744073709551615, - 91, - 109, - 91, - 109, - 16, - 18, + 1307, + 1324, + 1307, + 1324, + 220, + 222, true, - "extraction quality", - "extraction quality" + "structured format", + "structured format" ], [ "term", "single-term", - 14539041145469267811, + 3749305213430885773, "TEXT", - "#/texts/55", + "#/texts/12", 1.0, - 10137510760641589283, - 15174113578041628274, + 14814125365076808131, + 9349977279496695017, 18446744073709551615, 18446744073709551615, - 141, - 158, - 141, - 158, - 24, - 26, + 846, + 854, + 846, + 854, + 146, + 147, true, - "specific template", - "specific template" + "platform", + "platform" ], [ "term", "single-term", - 14539041145469267811, + 3749305213430885773, "TEXT", - "#/texts/55", + "#/texts/12", 1.0, - 7342862043108457350, - 10866470711373289678, + 990358581043194791, + 393905999985528944, 18446744073709551615, 18446744073709551615, - 181, - 202, - 181, - 202, - 31, - 34, + 652, + 665, + 652, + 665, + 118, + 119, true, - "many technical fields", - "many technical fields" + "microservices", + "microservices" ], [ "term", "single-term", - 14539041145469267811, + 3749305213430885773, "TEXT", - "#/texts/55", + "#/texts/12", 1.0, - 3376407656379762908, - 17651500245932752692, + 14814125365076808131, + 9349977279496698149, 18446744073709551615, 18446744073709551615, - 251, - 271, - 251, - 271, - 44, - 46, + 629, + 637, + 629, + 637, + 114, + 115, true, - "paramount importance", - "paramount importance" + "platform", + "platform" ], [ - "term", - "single-term", - 14539041145469267811, + "verb", + "single-verb", + 3749305213430885773, "TEXT", - "#/texts/55", + "#/texts/12", 1.0, - 879437392081459464, - 10698589901478685905, + 8106476000254393164, + 1725287517912256023, 18446744073709551615, 18446744073709551615, - 286, - 310, - 286, - 310, - 49, - 52, + 504, + 511, + 504, + 511, + 93, + 94, true, - "many technical documents", - "many technical documents" + "process", + "process" ], [ - "term", - "single-term", - 14539041145469267811, + "verb", + "single-verb", + 3749305213430885773, "TEXT", - "#/texts/55", + "#/texts/12", 1.0, - 15130402050161305835, - 1457144697725364176, + 329104159303279946, + 13502145352581782916, 18446744073709551615, 18446744073709551615, - 316, - 330, - 316, - 330, - 54, - 56, + 477, + 482, + 477, + 482, + 89, + 90, true, - "specific field", - "specific field" + "build", + "build" ], [ - "term", - "single-term", - 14539041145469267811, + "verb", + "single-verb", + 3749305213430885773, "TEXT", - "#/texts/55", + "#/texts/12", 1.0, - 5723400002059657755, - 8384905200420629131, + 16381206569317834029, + 15127822949531520780, 18446744073709551615, 18446744073709551615, - 353, - 369, - 353, - 369, - 60, - 62, + 464, + 470, + 464, + 470, + 86, + 87, true, - "certain template", - "certain template" + "allows", + "allows" ], [ - "term", - "single-term", - 14539041145469267811, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/55", + "#/texts/54", 1.0, - 16960645913427248555, - 7662141651479431440, + 15441160910541486538, + 10477275205185242704, 18446744073709551615, 18446744073709551615, - 441, - 459, - 441, - 459, - 75, - 77, + 805, + 807, + 805, + 807, + 143, + 144, true, - "extraction quality", - "extraction quality" + "in", + "in" ], [ - "term", - "single-term", - 14539041145469267811, + "conn", + "single-conn", + 2569392033451362672, "TEXT", - "#/texts/55", + "#/texts/54", 1.0, - 389609625699055241, - 14883359024073212478, + 15441160910541485670, + 10477275256518310244, 18446744073709551615, 18446744073709551615, - 36, - 40, - 36, - 40, - 6, - 7, + 856, + 858, + 856, + 858, + 153, + 154, true, - "goal", - "goal" + "of", + "of" ], [ "term", "single-term", - 14539041145469267811, + 3749305213430885773, "TEXT", - "#/texts/55", + "#/texts/12", 1.0, - 329104161610777240, - 15370809836743986311, + 329104161571401725, + 13426123714444340915, 18446744073709551615, 18446744073709551615, - 130, - 135, - 130, - 135, - 21, - 22, + 610, + 615, + 610, + 615, + 110, + 111, true, - "model", - "model" + "order", + "order" ], [ "term", "single-term", - 14539041145469267811, + 3749305213430885773, "TEXT", - "#/texts/55", + "#/texts/12", 1.0, - 14650440612701450082, - 10632661340355574917, + 14814125365076808131, + 9349977279496610029, 18446744073709551615, 18446744073709551615, - 214, - 222, - 214, - 222, - 37, - 38, + 597, + 605, + 597, + 605, + 107, + 108, true, - "accuracy", - "accuracy" + "platform", + "platform" ], [ - "term", - "single-term", - 14539041145469267811, + "verb", + "single-verb", + 3749305213430885773, "TEXT", - "#/texts/55", + "#/texts/12", 1.0, - 389609625696431489, - 14876459829455684771, + 6167774653473311671, + 8932714637044289580, 18446744073709551615, 18446744073709551615, - 240, - 244, - 240, - 244, - 41, - 42, + 341, + 350, + 341, + 350, + 63, + 64, true, - "data", - "data" + "organized", + "organized" ], [ "term", "single-term", - 14539041145469267811, + 17187299362680072378, "TEXT", - "#/texts/55", + "#/texts/14", 1.0, - 329104161787480235, - 15382185116652927163, + 14634109233387695059, + 13015968863509180771, 18446744073709551615, 18446744073709551615, - 389, - 394, - 389, - 394, - 66, + 341, + 349, + 341, + 349, 67, + 68, true, - "sense", - "sense" + "research", + "research" ], [ "term", "single-term", - 14539041145469267811, + 17187299362680072378, "TEXT", - "#/texts/55", + "#/texts/14", 1.0, - 5946904284821171904, - 7436968498862967568, + 8106352240078799135, + 10120178145746003140, 18446744073709551615, 18446744073709551615, - 403, - 412, - 403, - 412, - 69, - 70, + 293, + 300, + 293, + 300, + 57, + 58, true, - "advantage", - "advantage" + "Section", + "Section" ], [ - "term", - "single-term", - 14539041145469267811, + "sentence", + "", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 14634130803848280536, - 13102933406250746055, + 9067254065901696428, + 10388894100200420496, 18446744073709551615, 18446744073709551615, - 421, - 429, - 421, - 429, - 72, - 73, + 781, + 955, + 781, + 955, + 142, + 173, true, - "template", - "template" + "Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution.", + "Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution." ], [ - "verb", - "compound-verb", - 14539041145469267811, + "sentence", + "", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 6623118764989562485, - 9686528964214635468, + 9906268904976001851, + 12420000417227776440, 18446744073709551615, 18446744073709551615, - 69, - 81, - 69, - 81, - 11, - 14, + 712, + 780, + 712, + 780, + 128, + 142, true, - "is to obtain", - "is to obtain" + "For example, this could be a JSON/XML file with a particular schema.", + "For example, this could be a JSON/XML file with a particular schema." ], [ - "verb", - "single-verb", - 14539041145469267811, + "sentence", + "", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 15180748647375949898, - 15041926949817059678, + 7959677268287021834, + 6471587455066159969, 18446744073709551615, 18446744073709551615, - 113, - 125, - 113, - 125, - 19, - 20, + 444, + 711, + 444, + 711, + 83, + 128, true, - "specializing", - "specializing" + "The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format.", + "The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format." ], [ - "verb", - "single-verb", - 14539041145469267811, + "sentence", + "", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 15441160910541486535, - 1662040640859036333, + 1323125914001755357, + 2708959011598697473, 18446744073709551615, 18446744073709551615, - 165, - 167, - 165, - 167, - 28, - 29, + 360, + 443, + 360, + 443, + 66, + 83, true, - "is", - "is" + "This can be done through a conversion from PDF towards HTML or MS Word for example.", + "This can be done through a conversion from PDF towards HTML or MS Word for example." ], [ - "verb", - "single-verb", - 14539041145469267811, + "sentence", + "", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 6168374324562720592, - 8408511475472730744, + 6270962906961324285, + 7804853914853957296, 18446744073709551615, 18446744073709551615, - 230, - 239, - 230, - 239, - 40, - 41, + 206, + 359, + 206, + 359, + 38, + 66, true, - "extracted", - "extracted" + "In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document.", + "In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document." ], [ - "verb", - "single-verb", - 14539041145469267811, + "sentence", + "", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 15441160910541486535, - 1662040640859038873, + 5916877018351655351, + 15887401132590714495, 18446744073709551615, 18446744073709551615, - 245, - 247, - 245, - 247, - 42, - 43, + 137, + 205, + 137, + 205, + 25, + 38, true, - "is", - "is" + "Broadly speaking, there are two types of approaches to this problem.", + "Broadly speaking, there are two types of approaches to this problem." ], [ - "verb", - "single-verb", - 14539041145469267811, + "sentence", + "", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 16381206574684919940, - 8690278604869594595, + 7413762744011699502, + 5112650843480238838, 18446744073709551615, 18446744073709551615, - 341, - 347, - 341, - 347, - 57, - 58, + 0, + 136, + 0, + 136, + 0, + 25, true, - "appear", - "appear" + "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4].", + "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4]." ], [ - "verb", - "single-verb", - 14539041145469267811, + "expression", + "word-concatenation", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 329104161505838030, - 15370325700124998836, + 14650469740546809126, + 13134297167790756810, 18446744073709551615, 18446744073709551615, - 383, - 388, - 383, - 388, - 65, - 66, + 741, + 749, + 741, + 749, + 135, + 136, true, - "makes", - "makes" + "JSON/XML", + "JSON/XML" ], [ - "verb", - "single-verb", - 14539041145469267811, + "expression", + "common", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 389609625631208371, - 14878114134196888026, + 15441160910541486545, + 16301782680726802891, 18446744073709551615, 18446744073709551615, - 398, - 402, - 398, - 402, - 68, - 69, + 558, + 562, + 558, + 562, + 101, + 102, true, - "take", - "take" + "ie", + "i.e." ], [ - "verb", - "single-verb", - 14539041145469267811, + "term", + "single-term", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 8106398106568099440, - 4690670493670021785, + 3982493928589580498, + 8690888332062541868, 18446744073709551615, 18446744073709551615, - 433, - 440, - 433, - 440, - 74, - 75, + 762, + 779, + 762, + 779, + 139, + 141, true, - "improve", - "improve" + "particular schema", + "particular schema" ], [ - "conn", - "single-conn", - 14539041145469267811, + "term", + "single-term", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 3701312585595488544, - 14499465500010376427, + 673611805924135293, + 4470122145607424586, 18446744073709551615, 18446744073709551615, - 168, - 180, - 168, - 180, - 29, - 31, + 741, + 754, + 741, + 754, + 135, + 137, true, - "necessary in", - "necessary in" + "JSON/XML file", + "JSON/XML file" ], [ - "conn", - "single-conn", - 14539041145469267811, + "term", + "single-term", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 15441160910541485670, - 1662040798765251967, + 14630472899120924944, + 11642528133024722414, 18446744073709551615, 18446744073709551615, - 41, - 43, - 41, - 43, - 7, - 8, + 693, + 710, + 693, + 710, + 125, + 127, true, - "of", - "of" + "structured format", + "structured format" ], [ - "conn", - "single-conn", + "term", + "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, - 15441160910541486989, - 1662040951000079940, + 16960645913427248555, + 7662141651479431440, 18446744073709551615, 18446744073709551615, - 110, - 112, - 110, - 112, - 18, - 19, + 441, + 459, + 441, + 459, + 75, + 77, true, - "by", - "by" + "extraction quality", + "extraction quality" ], [ - "conn", - "single-conn", - 14539041145469267811, + "parenthesis", + "square brackets", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 389609625618762887, - 14878547061345061059, + 16381206577288742091, + 6894361769431189204, 18446744073709551615, 18446744073709551615, - 136, - 140, - 136, - 140, - 22, + 129, + 135, + 129, + 135, + 19, 24, true, - "on a", - "on a" + "[3, 4]", + "[3, 4]" ], [ - "conn", - "single-conn", - 14539041145469267811, + "term", + "single-term", + 17187299362680072378, "TEXT", - "#/texts/55", + "#/texts/14", 1.0, - 16381206565712212855, - 5026312373792128532, + 8935218952277810589, + 8068414338192124611, 18446744073709551615, 18446744073709551615, - 223, - 229, - 223, - 229, - 38, - 40, + 354, + 373, + 354, + 373, + 69, + 72, true, - "of the", - "of the" + "possible next steps", + "possible next steps" ], [ - "conn", - "single-conn", - 14539041145469267811, + "term", + "single-term", + 17187299362680072378, "TEXT", - "#/texts/55", + "#/texts/14", 1.0, - 15441160910541485670, - 1662040798765106998, + 8051609034415273401, + 10487247228020021805, 18446744073709551615, 18446744073709551615, - 248, - 250, - 248, - 250, - 43, - 44, + 319, + 333, + 319, + 333, + 63, + 65, true, - "of", - "of" + "open questions", + "open questions" ], [ - "conn", - "single-conn", - 14539041145469267811, + "numval", + "ival", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 389609625698530964, - 14883385687690770855, + 17767354399704235156, + 2552838057671732941, 18446744073709551615, 18446744073709551615, - 311, - 315, - 311, - 315, - 52, - 54, + 133, + 134, + 133, + 134, + 22, + 23, true, - "in a", - "in a" + "4", + "4" ], [ - "conn", - "single-conn", - 14539041145469267811, + "term", + "single-term", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 389609625698530964, - 14883385687690756753, + 11738704476441755021, + 15052719376970997774, 18446744073709551615, 18446744073709551615, - 348, - 352, - 348, - 352, - 58, - 60, + 670, + 687, + 670, + 687, + 121, + 123, true, - "in a", - "in a" + "original document", + "original document" ], [ - "conn", - "single-conn", - 14539041145469267811, + "term", + "single-term", + 7935233310532930917, "TEXT", - "#/texts/55", + "#/texts/16", 1.0, - 8106342927224204628, - 15389357728224894046, + 10632085908481842480, + 3848207310545898370, 18446744073709551615, 18446744073709551615, - 413, - 420, - 413, - 420, - 70, - 72, + 448, + 472, + 448, + 472, + 84, + 87, true, - "of this", - "of this" + "second approach attempts", + "second approach attempts" ], [ - "conn", - "single-conn", + "term", + "single-term", 14539041145469267811, "TEXT", "#/texts/55", 1.0, - 15441160910541485865, - 1662040545925493605, + 14634130803848280536, + 13102933406250746055, 18446744073709551615, 18446744073709551615, + 421, + 429, + 421, + 429, 72, - 74, + 73, + true, + "template", + "template" + ], + [ + "numval", + "ival", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 17767354399704235163, + 2552838057434759723, + 18446744073709551615, + 18446744073709551615, + 130, + 131, + 130, + 131, + 20, + 21, + true, + "3", + "3" + ], + [ + "numval", + "ival", + 697648145931166262, + "TEXT", + "#/texts/15", + 1.0, + 17767354399704235162, + 7083995155582974975, + 18446744073709551615, + 18446744073709551615, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "2", + "2" + ], + [ + "conn", + "single-conn", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 16381206565712212855, + 8774362010989370225, + 18446744073709551615, + 18446744073709551615, + 393, + 399, + 393, + 399, + 75, + 77, + true, + "of the", + "of the" + ], + [ + "sentence", + "", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 18017856606572388707, + 11119000415778134338, + 18446744073709551615, + 18446744073709551615, + 281, + 340, + 281, + 340, + 54, + 67, + true, + "Finally, in Section 5, we discuss the open questions w.r.t.", + "Finally, in Section 5, we discuss the open questions w.r.t." + ], + [ + "conn", + "single-conn", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 16381206560518651853, + 668849598704261767, + 18446744073709551615, + 18446744073709551615, + 374, + 380, + 374, + 380, 72, 74, - 12, + true, + "in the", + "in the" + ], + [ + "term", + "single-term", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 8106471324341093100, + 10896171766474086033, + 18446744073709551615, + 18446744073709551615, + 423, + 430, + 423, + 430, + 78, + 80, + true, + "MS Word", + "MS Word" + ], + [ + "term", + "single-term", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 5396697874491186037, + 9700463201577231321, + 18446744073709551615, + 18446744073709551615, + 320, + 342, + 320, + 342, + 59, + 62, + true, + "original visual layout", + "original visual layout" + ], + [ + "term", + "single-term", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 9088977435888678827, + 7025359603537163328, + 18446744073709551615, + 18446744073709551615, + 213, + 227, + 213, + 227, + 40, + 42, + true, + "first approach", + "first approach" + ], + [ + "verb", + "single-verb", + 14539041145469267811, + "TEXT", + "#/texts/55", + 1.0, + 8106398106568099440, + 4690670493670021785, + 18446744073709551615, + 18446744073709551615, + 433, + 440, + 433, + 440, + 74, + 75, + true, + "improve", + "improve" + ], + [ + "conn", + "single-conn", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 15441160910541486538, + 13110916059597983243, + 18446744073709551615, + 18446744073709551615, + 290, + 292, + 290, + 292, + 56, + 57, + true, + "in", + "in" + ], + [ + "expression", + "wtoken-concatenation", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 329104161622136223, + 9304407318657891408, + 18446744073709551615, + 18446744073709551615, + 334, + 339, + 334, + 339, + 65, + 66, + true, + "w.r.t", + "w.r.t" + ], + [ + "verb", + "single-verb", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 329104161622136223, + 9304407318657891408, + 18446744073709551615, + 18446744073709551615, + 334, + 339, + 334, + 339, + 65, + 66, + true, + "w.r.t", + "w.r.t" + ], + [ + "numval", + "ival", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 17767354399704235157, + 1719697440128552642, + 18446744073709551615, + 18446744073709551615, + 301, + 302, + 301, + 302, + 58, + 59, + true, + "5", + "5" + ], + [ + "verb", + "single-verb", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 8106397868479560363, + 8170627791941832362, + 18446744073709551615, + 18446744073709551615, + 307, + 314, + 307, + 314, + 61, + 62, + true, + "discuss", + "discuss" + ], + [ + "term", + "single-term", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 14814125365076808131, + 2092259178040758447, + 18446744073709551615, + 18446744073709551615, + 400, + 408, + 400, + 408, + 77, + 78, + true, + "platform", + "platform" + ], + [ + "term", + "single-term", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 4649638595618642234, + 17675128594551486840, + 18446744073709551615, + 18446744073709551615, + 86, + 105, + 86, + 105, 13, + 15, true, - "to", - "to" + "outstanding problem", + "outstanding problem" + ], + [ + "term", + "single-term", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 1649772470814702484, + 1849781250727403708, + 18446744073709551615, + 18446744073709551615, + 41, + 73, + 41, + 73, + 7, + 10, + true, + "automatic content reconstruction", + "automatic content reconstruction" ], [ "conn", @@ -48384,19 +49778,61 @@ "TEXT", "#/texts/55", 1.0, - 15441160910541485865, - 1662040545925472456, + 8106342927224204628, + 15389357728224894046, 18446744073709551615, 18446744073709551615, - 395, - 397, - 395, - 397, - 67, - 68, + 413, + 420, + 413, + 420, + 70, + 72, true, - "to", - "to" + "of this", + "of this" + ], + [ + "expression", + "word-concatenation", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 5044385734724420019, + 14795950652192688492, + 18446744073709551615, + 18446744073709551615, + 175, + 191, + 175, + 191, + 34, + 35, + true, + "state-of-the-art", + "state-of-the-art" + ], + [ + "term", + "single-term", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 12653831733608918357, + 1251885133784117773, + 18446744073709551615, + 18446744073709551615, + 23, + 36, + 23, + 36, + 4, + 6, + true, + "PDF documents", + "PDF documents" ], [ "conn", @@ -50120,6 +51556,27 @@ "algorithm", "algorithm" ], + [ + "term", + "single-term", + 1994904537764312371, + "TEXT", + "#/texts/57", + 1.0, + 389609625695123443, + 6542771302846269141, + 18446744073709551615, + 18446744073709551615, + 251, + 255, + 251, + 255, + 44, + 45, + true, + "case", + "case" + ], [ "verb", "compound-verb", @@ -50288,6 +51745,27 @@ "on", "on" ], + [ + "conn", + "single-conn", + 1994904537764312371, + "TEXT", + "#/texts/57", + 1.0, + 15441160910541480354, + 2424967642558730888, + 18446744073709551615, + 18446744073709551615, + 244, + 246, + 244, + 246, + 42, + 43, + true, + "In", + "In" + ], [ "conn", "single-conn", @@ -50351,6 +51829,27 @@ "As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised.", "As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised." ], + [ + "term", + "single-term", + 7742256726079628058, + "TEXT", + "#/texts/58", + 1.0, + 3327207230779122172, + 14880303134672427170, + 18446744073709551615, + 18446744073709551615, + 5, + 25, + 5, + 25, + 1, + 3, + true, + "structure originates", + "structure originates" + ], [ "term", "single-term", @@ -50456,6 +51955,48 @@ "distribution functions", "distribution functions" ], + [ + "term", + "single-term", + 7742256726079628058, + "TEXT", + "#/texts/58", + 1.0, + 16381206562412792821, + 5747237974239301187, + 18446744073709551615, + 18446744073709551615, + 29, + 35, + 29, + 35, + 4, + 5, + true, + "course", + "course" + ], + [ + "term", + "single-term", + 7742256726079628058, + "TEXT", + "#/texts/58", + 1.0, + 14634130803848280536, + 6539927821134743025, + 18446744073709551615, + 18446744073709551615, + 45, + 53, + 45, + 53, + 7, + 8, + true, + "template", + "template" + ], [ "term", "single-term", @@ -50687,6 +52228,48 @@ "robust against", "robust against" ], + [ + "conn", + "single-conn", + 7742256726079628058, + "TEXT", + "#/texts/58", + 1.0, + 15441160910541485670, + 2182812832524328350, + 18446744073709551615, + 18446744073709551615, + 26, + 28, + 26, + 28, + 3, + 4, + true, + "of", + "of" + ], + [ + "conn", + "single-conn", + 7742256726079628058, + "TEXT", + "#/texts/58", + 1.0, + 14637917359887717745, + 4260221633890204909, + 18446744073709551615, + 18446744073709551615, + 36, + 44, + 36, + 44, + 5, + 7, + true, + "from the", + "from the" + ], [ "conn", "single-conn", @@ -55097,6 +56680,48 @@ "layout semantics", "layout semantics" ], + [ + "term", + "single-term", + 15253541252152665681, + "TEXT", + "#/texts/64", + 1.0, + 3435211303988053560, + 11739782976916952568, + 18446744073709551615, + 18446744073709551615, + 193, + 213, + 193, + 213, + 38, + 41, + true, + "structured data file", + "structured data file" + ], + [ + "term", + "single-term", + 15253541252152665681, + "TEXT", + "#/texts/64", + 1.0, + 18223109722832599454, + 8892683979055541833, + 18446744073709551615, + 18446744073709551615, + 266, + 277, + 266, + 277, + 50, + 52, + true, + "parsed file", + "parsed file" + ], [ "term", "single-term", @@ -55181,6 +56806,48 @@ "objects", "objects" ], + [ + "term", + "single-term", + 15253541252152665681, + "TEXT", + "#/texts/64", + 1.0, + 329104161531686411, + 1734187893073550921, + 18446744073709551615, + 18446744073709551615, + 251, + 256, + 251, + 256, + 47, + 48, + true, + "cells", + "cells" + ], + [ + "verb", + "compound-verb", + 15253541252152665681, + "TEXT", + "#/texts/64", + 1.0, + 13995199617429053628, + 17399804092489638859, + 18446744073709551615, + 18446744073709551615, + 214, + 228, + 214, + 228, + 41, + 43, + true, + "is constructed", + "is constructed" + ], [ "verb", "single-verb", @@ -55244,6 +56911,27 @@ "retaining", "retaining" ], + [ + "verb", + "single-verb", + 15253541252152665681, + "TEXT", + "#/texts/64", + 1.0, + 5615554093848987331, + 567272589042250740, + 18446744073709551615, + 18446744073709551615, + 232, + 242, + 232, + 242, + 44, + 45, + true, + "assembling", + "assembling" + ], [ "conn", "single-conn", @@ -55307,6 +56995,48 @@ "from the", "from the" ], + [ + "conn", + "single-conn", + 15253541252152665681, + "TEXT", + "#/texts/64", + 1.0, + 15441160910541486989, + 7536681162200555270, + 18446744073709551615, + 18446744073709551615, + 229, + 231, + 229, + 231, + 43, + 44, + true, + "by", + "by" + ], + [ + "conn", + "single-conn", + 15253541252152665681, + "TEXT", + "#/texts/64", + 1.0, + 14637917359887717745, + 313033658418932224, + 18446744073709551615, + 18446744073709551615, + 257, + 265, + 257, + 265, + 48, + 50, + true, + "from the", + "from the" + ], [ "numval", "ival", @@ -56588,6 +58318,153 @@ "Corpus Conversion Service", "Corpus Conversion Service" ], + [ + "term", + "single-term", + 6410818076508661508, + "TEXT", + "#/texts/66", + 1.0, + 16807436920751143074, + 39376237413306192, + 18446744073709551615, + 18446744073709551615, + 147, + 163, + 145, + 161, + 36, + 39, + true, + "past few decades", + "past few decades" + ], + [ + "term", + "single-term", + 6410818076508661508, + "TEXT", + "#/texts/66", + 1.0, + 7863808487922385366, + 585004759158412684, + 18446744073709551615, + 18446744073709551615, + 179, + 198, + 177, + 196, + 43, + 45, + true, + "scientific articles", + "scientific articles" + ], + [ + "term", + "single-term", + 6410818076508661508, + "TEXT", + "#/texts/66", + 1.0, + 2748712640397819667, + 15735607294232033038, + 18446744073709551615, + 18446744073709551615, + 231, + 255, + 227, + 251, + 55, + 58, + true, + "IBM Research Rueschlikon", + "IBM Research Rueschlikon" + ], + [ + "term", + "single-term", + 6410818076508661508, + "TEXT", + "#/texts/66", + 1.0, + 4686361850733567621, + 1467369190559998124, + 18446744073709551615, + 18446744073709551615, + 285, + 300, + 281, + 296, + 67, + 71, + true, + "Peter W J Staar", + "Peter W J Staar" + ], + [ + "term", + "single-term", + 6410818076508661508, + "TEXT", + "#/texts/66", + 1.0, + 1571808557594152175, + 14918332605162209401, + 18446744073709551615, + 18446744073709551615, + 302, + 315, + 298, + 311, + 72, + 74, + true, + "Michele Dolfi", + "Michele Dolfi" + ], + [ + "term", + "single-term", + 6410818076508661508, + "TEXT", + "#/texts/66", + 1.0, + 9737597816447750448, + 9883948295774882902, + 18446744073709551615, + 18446744073709551615, + 317, + 331, + 313, + 327, + 75, + 77, + true, + "Christoph Auer", + "Christoph Auer" + ], + [ + "term", + "single-term", + 6410818076508661508, + "TEXT", + "#/texts/66", + 1.0, + 10999349626623612055, + 12887976605120007677, + 18446744073709551615, + 18446744073709551615, + 333, + 345, + 329, + 341, + 78, + 80, + true, + "Costas Bekas", + "Costas Bekas" + ], [ "term", "single-term", @@ -56715,92 +58592,50 @@ "scale" ], [ - "verb", - "single-verb", - 6410818076508661508, - "TEXT", - "#/texts/66", - 1.0, - 14639581097006750428, - 657388776835985868, - 18446744073709551615, - 18446744073709551615, - 68, - 76, - 68, - 76, - 17, - 18, - true, - "learning", - "learning" - ], - [ - "verb", - "single-verb", - 6410818076508661508, - "TEXT", - "#/texts/66", - 1.0, - 16381206560503286032, - 6900030997592563197, - 18446744073709551615, - 18446744073709551615, - 89, - 95, - 89, - 95, - 20, - 21, - true, - "ingest", - "ingest" - ], - [ - "conn", - "single-conn", + "term", + "single-term", 6410818076508661508, "TEXT", "#/texts/66", 1.0, - 15441160910541487054, - 13499006115035219017, + 16381206569333693762, + 6644323518426738955, 18446744073709551615, 18446744073709551615, - 106, - 108, - 106, - 108, - 22, - 23, + 169, + 175, + 167, + 173, + 41, + 42, true, - "at", - "at" + "amount", + "amount" ], [ - "conn", - "single-conn", + "term", + "single-term", 6410818076508661508, "TEXT", "#/texts/66", 1.0, - 15441160910541485865, - 13499010106333859091, + 10789690912301060760, + 11402402460262101791, 18446744073709551615, 18446744073709551615, - 86, - 88, - 86, - 88, - 19, - 20, + 214, + 226, + 210, + 222, + 51, + 52, true, - "to", - "to" + "affiliations", + "affiliations" ], [ - "geoloc", - "country", + "term", + "single-term", 6410818076508661508, "TEXT", "#/texts/66", @@ -56820,8989 +58655,9010 @@ "Switzerland" ], [ - "parenthesis", - "round brackets", - 12813875992986832439, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/67", + "#/texts/66", 1.0, - 11589998698201685701, - 2108045663283293889, + 8106397759446161562, + 18418816159488809177, 18446744073709551615, 18446744073709551615, - 47, - 67, - 47, - 67, - 6, - 12, + 273, + 280, + 269, + 276, + 63, + 64, true, - "(or human-annotated)", - "(or human-annotated)" + "authors", + "authors" ], [ - "sentence", - "", - 12813875992986832439, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/67", + "#/texts/66", 1.0, - 3678194766815209883, - 5187453010508481258, + 389609625632495660, + 11825518189017132096, 18446744073709551615, 18446744073709551615, + 374, + 378, + 368, + 372, 92, - 162, - 92, - 162, - 16, - 30, + 93, true, - "It should be noted that no machine learning is used in this component.", - "It should be noted that no machine learning is used in this component." + "prov", + "prov" ], [ - "sentence", - "", - 12813875992986832439, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/67", + "#/texts/66", 1.0, - 6629453114376390697, - 8094247588633397965, + 389609625686435799, + 11831829466319151350, 18446744073709551615, 18446744073709551615, - 163, - 226, - 163, - 226, - 30, - 40, + 386, + 390, + 380, + 384, + 98, + 99, true, - "It is purely rule based and therefore completely deterministic.", - "It is purely rule based and therefore completely deterministic." + "bbox", + "bbox" ], [ "term", "single-term", - 12813875992986832439, + 6410818076508661508, "TEXT", - "#/texts/67", + "#/texts/66", 1.0, - 16568806906391567217, - 17793791609084484746, + 389609625632301461, + 11826274251713036201, 18446744073709551615, 18446744073709551615, - 119, - 135, - 119, - 135, - 22, - 24, + 432, + 436, + 426, + 430, + 112, + 113, true, - "machine learning", - "machine learning" + "page", + "page" ], [ "term", "single-term", - 12813875992986832439, + 6410818076508661508, "TEXT", - "#/texts/67", + "#/texts/66", 1.0, - 5947879501615734370, - 3445417967466009645, + 389609625631434316, + 11825532397025369026, 18446744073709551615, 18446744073709551615, - 152, - 161, - 152, - 161, - 28, - 29, + 447, + 451, + 441, + 445, + 120, + 121, true, - "component", - "component" + "type", + "type" ], [ "term", "single-term", - 12813875992986832439, + 6410818076508661508, "TEXT", - "#/texts/67", + "#/texts/66", 1.0, - 389609625633008101, - 12949720961570224958, + 389609625631325904, + 11825471963320823231, 18446744073709551615, 18446744073709551615, - 176, - 180, - 176, - 180, - 33, - 34, + 477, + 481, + 471, + 475, + 128, + 129, true, - "rule", - "rule" + "text", + "text" ], [ - "verb", - "compound-verb", - 12813875992986832439, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/67", + "#/texts/66", 1.0, - 10453859466047522884, - 10239611338580811250, + 8523954622022126279, + 1560220356059846107, 18446744073709551615, 18446744073709551615, - 95, - 110, - 95, - 110, - 17, - 20, + 488, + 500, + 482, + 494, + 133, + 134, true, - "should be noted", - "should be noted" + "INTRODUCTION", + "INTRODUCTION" ], [ - "verb", - "compound-verb", - 12813875992986832439, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/67", + "#/texts/66", 1.0, - 8106398132977396513, - 9427955354524457620, + 389609625632495660, + 11825518189017115188, 18446744073709551615, 18446744073709551615, - 136, - 143, - 136, - 143, - 24, - 26, + 509, + 513, + 503, + 507, + 139, + 140, true, - "is used", - "is used" + "prov", + "prov" ], [ - "verb", - "compound-verb", - 12813875992986832439, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/67", + "#/texts/66", 1.0, - 6181919770894982462, - 17141845908897276483, + 389609625686435799, + 11831829466319043998, 18446744073709551615, 18446744073709551615, - 166, - 175, - 166, - 175, - 31, - 33, + 521, + 525, + 515, + 519, + 145, + 146, true, - "is purely", - "is purely" + "bbox", + "bbox" ], [ - "verb", - "single-verb", - 12813875992986832439, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/67", + "#/texts/66", 1.0, - 329104159219515955, - 12104426966588498612, + 389609625632301461, + 11826274251713027840, 18446744073709551615, 18446744073709551615, - 181, - 186, - 181, - 186, - 34, - 35, + 567, + 571, + 561, + 565, + 159, + 160, true, - "based", - "based" + "page", + "page" ], [ - "conn", - "single-conn", - 12813875992986832439, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/67", + "#/texts/66", 1.0, - 8106351186178321347, - 18145291139271698703, + 389609625631434316, + 11825532397025491822, 18446744073709551615, 18446744073709551615, - 111, - 118, - 111, - 118, - 20, - 22, + 582, + 586, + 576, + 580, + 167, + 168, true, - "that no", - "that no" + "type", + "type" ], [ - "conn", - "single-conn", - 12813875992986832439, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/67", + "#/texts/66", 1.0, - 8106398107541152403, - 17574223839716805875, + 6169141668427353082, + 15820812710042168185, 18446744073709551615, 18446744073709551615, - 144, - 151, - 144, - 151, - 26, - 28, - true, - "in this", - "in this" + 591, + 600, + 585, + 594, + 171, + 172, + true, + "paragraph", + "paragraph" ], [ - "numval", - "ival", - 11030869010407626539, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 17767354399704235161, - 5543555095985442958, + 389609625631325904, + 11825471963320873695, 18446744073709551615, 18446744073709551615, - 528, - 529, - 528, - 529, - 97, - 98, + 605, + 609, + 599, + 603, + 175, + 176, true, - "1", - "1" + "text", + "text" ], [ - "sentence", - "", - 11030869010407626539, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 10627520535034650380, - 17531239345629359200, + 8106398484416909789, + 13304813118405536345, 18446744073709551615, 18446744073709551615, - 0, - 41, - 0, - 41, - 0, - 9, + 656, + 663, + 650, + 657, + 187, + 188, true, - "The assembly phase is a two step process.", - "The assembly phase is a two step process." + "context", + "context" ], [ - "sentence", - "", - 11030869010407626539, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 16727770016948924314, - 14992801873823104190, + 16381206513098478539, + 6385837633590220909, 18446744073709551615, 18446744073709551615, - 42, - 161, - 42, - 161, - 9, - 30, + 676, + 682, + 670, + 676, + 196, + 197, true, - "First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order.", - "First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order." + "tables", + "tables" ], [ - "sentence", - "", - 11030869010407626539, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 14844075509771675718, - 14837722875086120924, + 3719221080692272657, + 10124958637147004614, 18446744073709551615, 18446744073709551615, - 162, - 263, - 162, - 263, - 30, - 50, + 686, + 697, + 680, + 691, + 199, + 200, true, - "Then, the text of all cells that have the same label is contracted into a temporary document objects.", - "Then, the text of all cells that have the same label is contracted into a temporary document objects." + "[{etc},etc]", + "[{...},...]" ], [ - "sentence", - "", - 11030869010407626539, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 13321150786190303145, - 15098908199975296360, + 16381206560620045048, + 6780363823004583122, 18446744073709551615, 18446744073709551615, - 264, - 386, - 264, - 386, - 50, - 72, + 700, + 706, + 694, + 700, + 202, + 203, true, - "Third, we build the internal structure of the temporary document objects, based on the information provided by the models.", - "Third, we build the internal structure of the temporary document objects, based on the information provided by the models." + "images", + "images" ], [ - "sentence", - "", - 11030869010407626539, + "term", + "single-term", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 13420423012039011390, - 1170303508595995079, + 3719221080692272657, + 10124958637146997952, 18446744073709551615, 18446744073709551615, - 387, - 467, - 387, - 467, - 72, - 86, + 710, + 721, + 704, + 715, + 205, + 206, true, - "The latter is only applicable for internally structured objects, such as tables.", - "The latter is only applicable for internally structured objects, such as tables." + "[{etc},etc]", + "[{...},...]" ], [ - "sentence", - "", - 11030869010407626539, + "verb", + "compound-verb", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 12664594763449438938, - 4431678268083815697, + 17551793109234931072, + 2438234792868575090, 18446744073709551615, 18446744073709551615, - 468, - 530, - 468, - 530, - 86, - 99, + 617, + 629, + 611, + 623, + 180, + 182, true, - "An example of the generated JSON output is shown in Listing 1.", - "An example of the generated JSON output is shown in Listing 1." + "is estimated", + "is estimated" ], [ - "term", - "single-term", - 11030869010407626539, + "verb", + "single-verb", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 14290303081280478932, - 16534232039347859570, + 14639581097006750428, + 657388776835985868, 18446744073709551615, 18446744073709551615, - 4, - 18, - 4, + 68, + 76, + 68, + 76, + 17, 18, - 1, - 3, true, - "assembly phase", - "assembly phase" + "learning", + "learning" ], [ - "term", - "single-term", - 11030869010407626539, + "verb", + "single-verb", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 17347109100190648605, - 3336910274907778664, + 16381206560503286032, + 6900030997592563197, 18446744073709551615, 18446744073709551615, - 28, - 40, - 28, - 40, - 6, - 8, + 89, + 95, + 89, + 95, + 20, + 21, true, - "step process", - "step process" + "ingest", + "ingest" ], [ - "term", - "single-term", - 11030869010407626539, + "verb", + "single-verb", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 2317020437411802284, - 2943170210053648815, + 12178341415895640485, + 31501442084677883, 18446744073709551615, 18446744073709551615, - 97, - 118, - 97, - 118, - 19, - 22, + 641, + 644, + 635, + 638, + 184, + 185, true, - "layout semantic label", - "layout semantic label" + "put", + "put" ], [ - "term", - "single-term", - 11030869010407626539, + "conn", + "single-conn", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 15944815540688621742, - 12538918597954147758, + 15441160910541487054, + 13499006115035219017, 18446744073709551615, 18446744073709551615, - 204, - 214, - 204, - 214, - 40, - 42, + 106, + 108, + 106, + 108, + 22, + 23, true, - "same label", - "same label" + "at", + "at" ], [ - "term", - "single-term", - 11030869010407626539, + "conn", + "single-conn", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 16002692145973620163, - 4639543490213745049, + 14650945419058940869, + 14589130895763263298, 18446744073709551615, 18446744073709551615, - 236, - 262, - 236, - 262, - 46, - 49, + 138, + 146, + 136, + 144, + 34, + 36, true, - "temporary document objects", - "temporary document objects" + "Over the", + "Over the" ], [ - "term", - "single-term", - 11030869010407626539, + "conn", + "single-conn", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 10566132640081128, - 4599927001618331381, + 15441160910541485670, + 13499010107336150957, 18446744073709551615, 18446744073709551615, - 284, - 302, - 284, - 302, - 55, - 57, + 176, + 178, + 174, + 176, + 42, + 43, true, - "internal structure", - "internal structure" + "of", + "of" ], [ - "term", - "single-term", - 11030869010407626539, + "conn", + "single-conn", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 16002692145973620163, - 4639543490213757407, + 389609625631229034, + 11825499861021689519, 18446744073709551615, 18446744073709551615, - 310, - 336, - 310, - 336, - 59, - 62, + 630, + 634, + 624, + 628, + 182, + 183, true, - "temporary document objects", - "temporary document objects" + "that", + "that" ], [ - "term", - "single-term", - 11030869010407626539, + "conn", + "single-conn", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 7430002429723240008, - 11164050789656870747, + 5748787292106066554, + 7811517258161395358, 18446744073709551615, 18446744073709551615, - 486, - 507, - 486, - 507, - 90, - 93, + 645, + 655, + 639, + 649, + 185, + 187, true, - "generated JSON output", - "generated JSON output" + "these into", + "these into" ], [ - "term", - "single-term", - 11030869010407626539, + "conn", + "single-conn", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 8106397416725855571, - 6968734469406140499, + 15441160910541485865, + 13499010106333859091, 18446744073709551615, 18446744073709551615, - 53, - 60, - 53, - 60, - 12, - 13, + 86, + 88, + 86, + 88, + 19, + 20, true, - "gathers", - "gathers" + "to", + "to" ], [ - "term", - "single-term", - 11030869010407626539, + "geoloc", + "country", + 6410818076508661508, "TEXT", - "#/texts/68", + "#/texts/66", 1.0, - 329104161531686411, - 14389223814653826808, + 2664439525053388608, + 9116367361930621434, 18446744073709551615, 18446744073709551615, - 69, - 74, - 69, - 74, - 15, - 16, + 257, + 268, + 253, + 264, + 59, + 60, true, - "cells", - "cells" + "Switzerland", + "Switzerland" ], [ - "term", - "single-term", - 11030869010407626539, + "parenthesis", + "round brackets", + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 329104161571401725, - 14130794494432512208, + 11589998698201685701, + 2108045663283293889, 18446744073709551615, 18446744073709551615, - 155, - 160, - 155, - 160, - 28, - 29, + 47, + 67, + 47, + 67, + 6, + 12, true, - "order", - "order" + "(or human-annotated)", + "(or human-annotated)" ], [ - "term", - "single-term", - 11030869010407626539, + "sentence", + "", + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 389609625631325904, - 11472131617453103029, + 3678194766815209883, + 5187453010508481258, 18446744073709551615, 18446744073709551615, - 172, - 176, - 172, - 176, - 33, - 34, + 92, + 162, + 92, + 162, + 16, + 30, true, - "text", - "text" + "It should be noted that no machine learning is used in this component.", + "It should be noted that no machine learning is used in this component." ], [ - "term", - "single-term", - 11030869010407626539, + "sentence", + "", + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 329104161531686411, - 14389223814653817543, + 6629453114376390697, + 8094247588633397965, 18446744073709551615, 18446744073709551615, - 184, - 189, - 184, - 189, - 36, - 37, + 163, + 226, + 163, + 226, + 30, + 40, true, - "cells", - "cells" + "It is purely rule based and therefore completely deterministic.", + "It is purely rule based and therefore completely deterministic." ], [ "term", "single-term", - 11030869010407626539, + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 329104161844229707, - 14389961847775103244, + 7093698410186659732, + 515265999464842782, 18446744073709551615, 18446744073709551615, - 264, - 269, - 264, - 269, - 50, - 51, + 68, + 90, + 68, + 90, + 12, + 15, true, - "Third", - "Third" + "layout semantic labels", + "layout semantic labels" ], [ "term", "single-term", - 11030869010407626539, + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 14388065630035882329, - 10056850550847032004, + 16568806906391567217, + 17793791609084484746, 18446744073709551615, 18446744073709551615, - 351, - 362, - 351, - 362, - 66, - 67, + 119, + 135, + 119, + 135, + 22, + 24, true, - "information", - "information" + "machine learning", + "machine learning" ], [ "term", "single-term", - 11030869010407626539, + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 16381206567230470443, - 12055872324404544162, + 2989796650905950968, + 9358591355722397397, 18446744073709551615, 18446744073709551615, - 379, - 385, - 379, - 385, - 70, - 71, + 3, + 14, + 3, + 14, + 1, + 2, true, - "models", - "models" + "combination", + "combination" ], [ "term", "single-term", - 11030869010407626539, + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 16381206590630461421, - 13437572129232177666, + 5947879501615734370, + 3445417967466009645, 18446744073709551615, 18446744073709551615, - 391, - 397, - 391, - 397, - 73, - 74, + 152, + 161, + 152, + 161, + 28, + 29, true, - "latter", - "latter" + "component", + "component" ], [ "term", "single-term", - 11030869010407626539, + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 8106342034010873556, - 5767697418284233272, + 389609625633008101, + 12949720961570224958, 18446744073709551615, 18446744073709551615, - 443, - 450, - 443, - 450, - 80, - 81, + 176, + 180, + 176, + 180, + 33, + 34, true, - "objects", - "objects" + "rule", + "rule" ], [ - "term", - "single-term", - 11030869010407626539, + "verb", + "compound-verb", + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 16381206513098478539, - 14656247513331790784, + 14972061082429253578, + 7992280381584678423, 18446744073709551615, 18446744073709551615, - 460, - 466, - 460, - 466, - 84, - 85, + 26, + 46, + 26, + 46, + 4, + 6, true, - "tables", - "tables" + "associated predicted", + "associated predicted" ], [ - "term", - "single-term", - 11030869010407626539, + "verb", + "compound-verb", + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 8106397496085150773, - 7053203505372722327, + 10453859466047522884, + 10239611338580811250, 18446744073709551615, 18446744073709551615, - 471, - 478, - 471, - 478, - 87, - 88, + 95, + 110, + 95, + 110, + 17, + 20, true, - "example", - "example" + "should be noted", + "should be noted" ], [ "verb", "compound-verb", - 11030869010407626539, + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 17902514739826327922, - 7529083656148566052, + 8106398132977396513, + 9427955354524457620, 18446744073709551615, 18446744073709551615, - 134, - 154, - 134, - 154, - 25, - 28, + 136, + 143, + 136, + 143, + 24, + 26, true, - "according to reading", - "according to reading" + "is used", + "is used" ], [ "verb", "compound-verb", - 11030869010407626539, + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 12000496086994902479, - 13430903801362966440, + 6181919770894982462, + 17141845908897276483, 18446744073709551615, 18446744073709551615, - 215, - 228, - 215, - 228, - 42, - 44, + 166, + 175, + 166, + 175, + 31, + 33, true, - "is contracted", - "is contracted" + "is purely", + "is purely" ], [ "verb", - "compound-verb", - 11030869010407626539, + "single-verb", + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 8106398132970509785, - 7717436398183375812, + 5946726816546568286, + 14981204101613166078, 18446744073709551615, 18446744073709551615, - 398, - 405, - 398, - 405, - 74, - 76, + 57, + 66, + 57, + 66, + 10, + 11, true, - "is only", - "is only" + "annotated", + "annotated" ], [ "verb", - "compound-verb", - 11030869010407626539, + "single-verb", + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 14637951881518043285, - 9028879385327672482, + 329104159219515955, + 12104426966588498612, 18446744073709551615, 18446744073709551615, - 508, - 516, - 508, - 516, - 93, - 95, + 181, + 186, + 181, + 186, + 34, + 35, true, - "is shown", - "is shown" + "based", + "based" ], [ - "verb", - "single-verb", - 11030869010407626539, + "conn", + "single-conn", + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 15441160910541486535, - 10491326776662798407, + 15441160910541486538, + 13969787273736284569, + 18446744073709551615, + 18446744073709551615, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "in", + "in" + ], + [ + "conn", + "single-conn", + 12813875992986832439, + "TEXT", + "#/texts/67", + 1.0, + 389609625618037948, + 12948291122374484621, 18446744073709551615, 18446744073709551615, + 15, 19, - 21, + 15, 19, - 21, + 2, 3, - 4, true, - "is", - "is" + "with", + "with" ], [ - "verb", - "single-verb", - 11030869010407626539, + "conn", + "single-conn", + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 5615021626537608757, - 17464799388347342780, + 8106351186178321347, + 18145291139271698703, 18446744073709551615, 18446744073709551615, - 86, - 96, - 86, - 96, - 18, - 19, + 111, + 118, + 111, + 118, + 20, + 22, true, - "associated", - "associated" + "that no", + "that no" ], [ - "verb", - "single-verb", - 11030869010407626539, + "conn", + "single-conn", + 12813875992986832439, "TEXT", - "#/texts/68", + "#/texts/67", 1.0, - 329104161786092648, - 14156877157946491894, + 8106398107541152403, + 17574223839716805875, 18446744073709551615, 18446744073709551615, - 123, - 128, - 123, - 128, - 23, - 24, + 144, + 151, + 144, + 151, + 26, + 28, true, - "sorts", - "sorts" + "in this", + "in this" ], [ - "verb", - "single-verb", + "numval", + "ival", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 389609625695387621, - 11482694222264222746, + 17767354399704235161, + 5543555095985442958, 18446744073709551615, 18446744073709551615, - 195, - 199, - 195, - 199, - 38, - 39, + 528, + 529, + 528, + 529, + 97, + 98, true, - "have", - "have" + "1", + "1" ], [ - "verb", - "single-verb", + "sentence", + "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 329104159303279946, - 14234820330716235313, + 10627520535034650380, + 17531239345629359200, 18446744073709551615, 18446744073709551615, - 274, - 279, - 274, - 279, - 53, - 54, + 0, + 41, + 0, + 41, + 0, + 9, true, - "build", - "build" + "The assembly phase is a two step process.", + "The assembly phase is a two step process." ], [ - "verb", - "single-verb", + "sentence", + "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 329104159219515955, - 13988686724284707554, + 16727770016948924314, + 14992801873823104190, 18446744073709551615, 18446744073709551615, - 338, - 343, - 338, - 343, - 63, - 64, + 42, + 161, + 42, + 161, + 9, + 30, true, - "based", - "based" + "First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order.", + "First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order." ], [ - "verb", - "single-verb", + "sentence", + "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 14814125838089603136, - 10841486009179486453, + 14844075509771675718, + 14837722875086120924, 18446744073709551615, 18446744073709551615, - 363, - 371, - 363, - 371, - 67, - 68, + 162, + 263, + 162, + 263, + 30, + 50, true, - "provided", - "provided" + "Then, the text of all cells that have the same label is contracted into a temporary document objects.", + "Then, the text of all cells that have the same label is contracted into a temporary document objects." ], [ - "verb", - "single-verb", + "sentence", + "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 14120356269929906423, - 17410768018743515205, + 13321150786190303145, + 15098908199975296360, 18446744073709551615, 18446744073709551615, - 432, - 442, - 432, - 442, - 79, - 80, + 264, + 386, + 264, + 386, + 50, + 72, true, - "structured", - "structured" + "Third, we build the internal structure of the temporary document objects, based on the information provided by the models.", + "Third, we build the internal structure of the temporary document objects, based on the information provided by the models." ], [ - "verb", - "single-verb", + "sentence", + "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 8106471806274607440, - 1670327284070813530, + 13420423012039011390, + 1170303508595995079, 18446744073709551615, 18446744073709551615, - 520, - 527, - 520, - 527, - 96, - 97, + 387, + 467, + 387, + 467, + 72, + 86, true, - "Listing", - "Listing" + "The latter is only applicable for internally structured objects, such as tables.", + "The latter is only applicable for internally structured objects, such as tables." ], [ - "conn", - "single-conn", + "sentence", + "", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 901249285509952446, - 4327457466414367608, + 12664594763449438938, + 4431678268083815697, 18446744073709551615, 18446744073709551615, - 406, - 420, - 406, - 420, - 76, - 78, + 468, + 530, + 468, + 530, + 86, + 99, true, - "applicable for", - "applicable for" + "An example of the generated JSON output is shown in Listing 1.", + "An example of the generated JSON output is shown in Listing 1." ], [ - "conn", - "single-conn", + "term", + "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 8106478685702231057, - 8652417891385661854, + 14290303081280478932, + 16534232039347859570, 18446744073709551615, 18446744073709551615, - 452, - 459, - 452, - 459, - 82, - 84, + 4, + 18, + 4, + 18, + 1, + 3, true, - "such as", - "such as" + "assembly phase", + "assembly phase" ], [ - "conn", - "single-conn", + "term", + "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 389609625618037948, - 11467344892940421528, + 17347109100190648605, + 3336910274907778664, 18446744073709551615, 18446744073709551615, - 75, - 79, - 75, - 79, - 16, - 17, + 28, + 40, + 28, + 40, + 6, + 8, true, - "with", - "with" + "step process", + "step process" ], [ - "conn", - "single-conn", + "term", + "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 16381206565712007226, - 11723414488362140611, + 2317020437411802284, + 2943170210053648815, 18446744073709551615, 18446744073709551615, - 177, - 183, - 177, - 183, - 34, - 36, + 97, + 118, + 97, + 118, + 19, + 22, true, - "of all", - "of all" + "layout semantic label", + "layout semantic label" ], [ - "conn", - "single-conn", + "term", + "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 16381206560517276114, - 11772523745347271510, + 15944815540688621742, + 12538918597954147758, 18446744073709551615, 18446744073709551615, - 229, - 235, - 229, - 235, - 44, - 46, + 204, + 214, + 204, + 214, + 40, + 42, true, - "into a", - "into a" + "same label", + "same label" ], [ - "conn", - "single-conn", + "term", + "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 16381206565712212855, - 11847078438284722432, + 16002692145973620163, + 4639543490213745049, 18446744073709551615, 18446744073709551615, - 303, - 309, - 303, - 309, - 57, - 59, + 236, + 262, + 236, + 262, + 46, + 49, true, - "of the", - "of the" + "temporary document objects", + "temporary document objects" ], [ - "conn", - "single-conn", + "term", + "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 16381206566339127348, - 11698215495646926996, + 10566132640081128, + 4599927001618331381, 18446744073709551615, 18446744073709551615, - 344, - 350, - 344, - 350, - 64, - 66, + 284, + 302, + 284, + 302, + 55, + 57, true, - "on the", - "on the" + "internal structure", + "internal structure" ], [ - "conn", - "single-conn", + "term", + "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 16381206574363061705, - 17340129670882622867, + 16002692145973620163, + 4639543490213757407, 18446744073709551615, 18446744073709551615, - 372, - 378, - 372, - 378, - 68, - 70, + 310, + 336, + 310, + 336, + 59, + 62, true, - "by the", - "by the" + "temporary document objects", + "temporary document objects" ], [ - "conn", - "single-conn", + "term", + "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 16381206565712212855, - 11847078438284787116, + 7430002429723240008, + 11164050789656870747, 18446744073709551615, 18446744073709551615, - 479, - 485, - 479, - 485, - 88, + 486, + 507, + 486, + 507, 90, + 93, true, - "of the", - "of the" + "generated JSON output", + "generated JSON output" ], [ - "conn", - "single-conn", + "term", + "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 15441160910541486538, - 10491326776470778829, + 8106397416725855571, + 6968734469406140499, 18446744073709551615, 18446744073709551615, - 517, - 519, - 517, - 519, - 95, - 96, + 53, + 60, + 53, + 60, + 12, + 13, true, - "in", - "in" + "gathers", + "gathers" ], [ - "conn", - "single-conn", + "term", + "single-term", 11030869010407626539, "TEXT", "#/texts/68", 1.0, - 15441160910541485865, - 10491326711005526490, + 329104161531686411, + 14389223814653826808, 18446744073709551615, 18446744073709551615, - 144, - 146, - 144, - 146, - 26, - 27, + 69, + 74, + 69, + 74, + 15, + 16, true, - "to", - "to" + "cells", + "cells" ], [ - "numval", - "ival", - 2142320548375900929, + "term", + "single-term", + 11030869010407626539, "TEXT", - "#/texts/69", + "#/texts/68", 1.0, - 17767354399704235156, - 16458659285473085163, + 329104161571401725, + 14130794494432512208, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 155, + 160, + 155, + 160, + 28, + 29, true, - "4", - "4" + "order", + "order" ], [ - "expression", - "word-concatenation", - 12747011194397783283, + "term", + "single-term", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 6285955549867796622, - 12192460564545960229, + 389609625631325904, + 11472131617453103029, 18446744073709551615, 18446744073709551615, - 618, - 634, - 618, - 634, - 111, - 112, + 172, + 176, + 172, + 176, + 33, + 34, true, - "time-to-solution", - "time-to-solution" + "text", + "text" ], [ - "sentence", - "", - 12747011194397783283, + "term", + "single-term", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 11044655914692672378, - 2888733359687006370, + 329104161531686411, + 14389223814653817543, 18446744073709551615, 18446744073709551615, - 0, - 123, - 0, - 123, - 0, - 22, + 184, + 189, + 184, + 189, + 36, + 37, true, - "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated.", - "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated." + "cells", + "cells" ], [ - "sentence", - "", - 12747011194397783283, + "term", + "single-term", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 9774189456888168740, - 4152543508246757256, + 329104161844229707, + 14389961847775103244, 18446744073709551615, 18446744073709551615, - 124, - 246, - 124, - 246, - 22, - 43, + 264, + 269, + 264, + 269, + 50, + 51, true, - "Before discussing the technical details, we would like to point out our requirements for the architecture of the platform.", - "Before discussing the technical details, we would like to point out our requirements for the architecture of the platform." + "Third", + "Third" ], [ - "sentence", - "", - 12747011194397783283, + "term", + "single-term", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 12407957798033762804, - 13470604212648561724, + 14388065630035882329, + 10056850550847032004, 18446744073709551615, 18446744073709551615, - 247, - 293, - 247, - 293, - 43, - 51, + 351, + 362, + 351, + 362, + 66, + 67, true, - "These requirements are all related to scaling.", - "These requirements are all related to scaling." + "information", + "information" ], [ - "sentence", - "", - 12747011194397783283, + "term", + "single-term", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 4653964671317425985, - 17216044985232325101, + 16381206567230470443, + 12055872324404544162, 18446744073709551615, 18446744073709551615, - 294, - 461, - 294, - 461, - 51, - 83, + 379, + 385, + 379, + 385, + 70, + 71, true, - "Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources.", - "Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources." + "models", + "models" ], [ - "sentence", - "", - 12747011194397783283, + "term", + "single-term", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 17228622883758304054, - 15113971675963977401, + 16381206590630461421, + 13437572129232177666, 18446744073709551615, 18446744073709551615, - 462, - 680, - 462, - 680, - 83, - 121, + 391, + 397, + 391, + 397, + 73, + 74, true, - "In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation.", - "In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation." + "latter", + "latter" ], [ - "sentence", - "", - 12747011194397783283, + "term", + "single-term", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 6820290209528918513, - 3633182920105543370, - 18446744073709551615, - 18446744073709551615, - 681, - 777, - 681, - 777, - 121, - 138, - true, - "It is clear that the architecture of such a service is heavily influenced by these requirements.", - "It is clear that the architecture of such a service is heavily influenced by these requirements." - ], - [ - "term", - "single-term", - 12747011194397783283, - "TEXT", - "#/texts/70", - 1.0, - 11289641670498948963, - 4109634796027215399, - 18446744073709551615, - 18446744073709551615, - 146, - 163, - 146, - 163, - 25, - 27, - true, - "technical details", - "technical details" - ], - [ - "term", - "single-term", - 12747011194397783283, - "TEXT", - "#/texts/70", - 1.0, - 4421383392096991748, - 4820655472322214248, + 8106342034010873556, + 5767697418284233272, 18446744073709551615, 18446744073709551615, 443, - 460, + 450, 443, - 460, + 450, 80, - 82, + 81, true, - "compute resources", - "compute resources" + "objects", + "objects" ], [ "term", "single-term", - 12747011194397783283, + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 16088126245064377604, - 12842078242820415728, + 16381206513098478539, + 14656247513331790784, 18446744073709551615, 18446744073709551615, - 465, - 476, - 465, - 476, + 460, + 466, + 460, + 466, 84, - 86, + 85, true, - "other words", - "other words" + "tables", + "tables" ], [ "term", "single-term", - 12747011194397783283, + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 4421383392096991748, - 4820655472321830361, + 8106397496085150773, + 7053203505372722327, 18446744073709551615, 18446744073709551615, - 586, - 603, - 586, - 603, - 106, - 108, + 471, + 478, + 471, + 478, + 87, + 88, true, - "compute resources", - "compute resources" + "example", + "example" ], [ - "term", - "single-term", - 12747011194397783283, + "verb", + "compound-verb", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 8106478708629288965, - 853306226471699405, + 17902514739826327922, + 7529083656148566052, 18446744073709551615, 18446744073709551615, - 8, - 15, - 8, - 15, - 2, - 3, + 134, + 154, + 134, + 154, + 25, + 28, true, - "section", - "section" + "according to reading", + "according to reading" ], [ - "term", - "single-term", - 12747011194397783283, + "verb", + "compound-verb", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 990358581043194791, - 2414189034056929402, + 12000496086994902479, + 13430903801362966440, 18446744073709551615, 18446744073709551615, - 37, - 50, - 37, - 50, - 8, - 9, + 215, + 228, + 215, + 228, + 42, + 44, true, - "microservices", - "microservices" + "is contracted", + "is contracted" ], [ - "term", - "single-term", - 12747011194397783283, + "verb", + "compound-verb", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 2703018952916355661, - 17317252314622786864, + 8106398132970509785, + 7717436398183375812, 18446744073709551615, 18446744073709551615, - 66, - 76, - 66, + 398, + 405, + 398, + 405, + 74, 76, - 13, - 14, true, - "components", - "components" + "is only", + "is only" ], [ - "term", - "single-term", - 12747011194397783283, + "verb", + "compound-verb", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 14814125365076808131, - 4170838424915628816, + 14637951881518043285, + 9028879385327672482, 18446744073709551615, 18446744073709551615, - 84, - 92, - 84, - 92, - 16, - 17, + 508, + 516, + 508, + 516, + 93, + 95, true, - "platform", - "platform" + "is shown", + "is shown" ], [ - "term", - "single-term", - 12747011194397783283, + "verb", + "single-verb", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 13240311013633905449, - 11928407068432787608, + 15441160910541486535, + 10491326776662798407, 18446744073709551615, 18446744073709551615, - 196, - 208, - 196, - 208, - 35, - 36, + 19, + 21, + 19, + 21, + 3, + 4, true, - "requirements", - "requirements" + "is", + "is" ], [ - "term", - "single-term", - 12747011194397783283, + "verb", + "single-verb", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 11899564443746965611, - 1669599917395635316, + 5615021626537608757, + 17464799388347342780, 18446744073709551615, 18446744073709551615, - 217, - 229, - 217, - 229, - 38, - 39, + 86, + 96, + 86, + 96, + 18, + 19, true, - "architecture", - "architecture" + "associated", + "associated" ], [ - "term", - "single-term", - 12747011194397783283, + "verb", + "single-verb", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 14814125365076808131, - 4170838424915634854, + 329104161786092648, + 14156877157946491894, 18446744073709551615, 18446744073709551615, - 237, - 245, - 237, - 245, - 41, - 42, + 123, + 128, + 123, + 128, + 23, + 24, true, - "platform", - "platform" + "sorts", + "sorts" ], [ - "term", - "single-term", - 12747011194397783283, + "verb", + "single-verb", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 13240311013633905449, - 11928407068432751416, + 389609625695387621, + 11482694222264222746, 18446744073709551615, 18446744073709551615, - 253, - 265, - 253, - 265, - 44, - 45, + 195, + 199, + 195, + 199, + 38, + 39, true, - "requirements", - "requirements" + "have", + "have" ], [ - "term", - "single-term", - 12747011194397783283, + "verb", + "single-verb", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 14814125365076808131, - 4170838424915633248, + 329104159303279946, + 14234820330716235313, 18446744073709551615, 18446744073709551615, - 326, - 334, - 326, - 334, - 57, - 58, + 274, + 279, + 274, + 279, + 53, + 54, true, - "platform", - "platform" + "build", + "build" ], [ - "term", - "single-term", - 12747011194397783283, + "verb", + "single-verb", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 16381206574973295053, - 579996873747921936, + 329104159219515955, + 13988686724284707554, 18446744073709551615, 18446744073709551615, - 353, - 359, - 353, - 359, - 62, + 338, + 343, + 338, + 343, 63, + 64, true, - "number", - "number" + "based", + "based" ], [ - "term", - "single-term", - 12747011194397783283, + "verb", + "single-verb", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 6167933651658664291, - 7440866408497827921, + 14814125838089603136, + 10841486009179486453, 18446744073709551615, 18446744073709551615, 363, - 372, + 371, 363, - 372, - 64, - 65, + 371, + 67, + 68, true, - "documents", - "documents" + "provided", + "provided" ], [ - "term", - "single-term", - 12747011194397783283, + "verb", + "single-verb", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 16381206574973295053, - 579996873747911416, + 14120356269929906423, + 17410768018743515205, 18446744073709551615, 18446744073709551615, - 378, - 384, - 378, - 384, - 67, - 68, + 432, + 442, + 432, + 442, + 79, + 80, true, - "number", - "number" + "structured", + "structured" ], [ - "term", - "single-term", - 12747011194397783283, + "verb", + "single-verb", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 329104159157820437, - 15600004509778203866, + 8106471806274607440, + 1670327284070813530, 18446744073709551615, 18446744073709551615, - 388, - 393, - 388, - 393, - 69, - 70, + 520, + 527, + 520, + 527, + 96, + 97, true, - "users", - "users" + "Listing", + "Listing" ], [ - "term", - "single-term", - 12747011194397783283, + "conn", + "single-conn", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 16381206574973295053, - 579996873747912575, + 901249285509952446, + 4327457466414367608, 18446744073709551615, 18446744073709551615, - 421, - 427, - 421, - 427, + 406, + 420, + 406, + 420, 76, - 77, + 78, true, - "number", - "number" + "applicable for", + "applicable for" ], [ - "term", - "single-term", - 12747011194397783283, + "conn", + "single-conn", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 329104161517016668, - 13957283097469922549, + 8106478685702231057, + 8652417891385661854, 18446744073709551615, 18446744073709551615, - 431, - 436, - 431, - 436, - 78, - 79, + 452, + 459, + 452, + 459, + 82, + 84, true, - "cloud", - "cloud" + "such as", + "such as" ], [ - "term", - "single-term", - 12747011194397783283, + "conn", + "single-conn", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 8106478708506632112, - 1549478568074441550, - 18446744073709551615, + 389609625618037948, + 11467344892940421528, 18446744073709551615, - 488, - 495, - 488, - 495, - 90, - 91, + 18446744073709551615, + 75, + 79, + 75, + 79, + 16, + 17, true, - "service", - "service" + "with", + "with" ], [ - "term", - "single-term", - 12747011194397783283, + "conn", + "single-conn", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 14638289822750178210, - 16529051670404838156, + 16381206565712007226, + 11723414488362140611, 18446744073709551615, 18446744073709551615, - 512, - 520, - 512, - 520, - 94, - 95, + 177, + 183, + 177, + 183, + 34, + 36, true, - "millions", - "millions" + "of all", + "of all" ], [ - "term", - "single-term", - 12747011194397783283, + "conn", + "single-conn", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 6167933651658664291, - 7440866408497716574, + 16381206560517276114, + 11772523745347271510, 18446744073709551615, 18446744073709551615, - 524, - 533, - 524, - 533, - 96, - 97, + 229, + 235, + 229, + 235, + 44, + 46, true, - "documents", - "documents" + "into a", + "into a" ], [ - "term", - "single-term", - 12747011194397783283, + "conn", + "single-conn", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 3504070246238334482, - 7971751554704088263, + 16381206565712212855, + 11847078438284722432, 18446744073709551615, 18446744073709551615, - 553, - 562, - 553, - 562, - 100, - 101, + 303, + 309, + 303, + 309, + 57, + 59, true, - "thousands", - "thousands" + "of the", + "of the" ], [ - "term", - "single-term", - 12747011194397783283, + "conn", + "single-conn", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 329104159157820437, - 15600004509778174339, + 16381206566339127348, + 11698215495646926996, 18446744073709551615, 18446744073709551615, - 566, - 571, - 566, - 571, - 102, - 103, + 344, + 350, + 344, + 350, + 64, + 66, true, - "users", - "users" + "on the", + "on the" ], [ - "term", - "single-term", - 12747011194397783283, + "conn", + "single-conn", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 6285955549867796622, - 12192460564545960229, + 16381206574363061705, + 17340129670882622867, 18446744073709551615, 18446744073709551615, - 618, - 634, - 618, - 634, - 111, - 112, + 372, + 378, + 372, + 378, + 68, + 70, true, - "time-to-solution", - "time-to-solution" + "by the", + "by the" ], [ - "term", - "single-term", - 12747011194397783283, + "conn", + "single-conn", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 329104159219994925, - 15605472043071850604, + 16381206565712212855, + 11847078438284787116, 18446744073709551615, 18446744073709551615, - 656, - 661, - 656, - 661, - 116, - 117, + 479, + 485, + 479, + 485, + 88, + 90, true, - "times", - "times" + "of the", + "of the" ], [ - "term", - "single-term", - 12747011194397783283, + "conn", + "single-conn", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 6167836358624304835, - 12533972813433648220, + 15441160910541486538, + 10491326776470778829, 18446744073709551615, 18446744073709551615, - 670, - 679, - 670, - 679, - 119, - 120, + 517, + 519, + 517, + 519, + 95, + 96, true, - "operation", - "operation" + "in", + "in" ], [ - "term", - "single-term", - 12747011194397783283, + "conn", + "single-conn", + 11030869010407626539, "TEXT", - "#/texts/70", + "#/texts/68", 1.0, - 11899564443746965611, - 1669599917395666812, + 15441160910541485865, + 10491326711005526490, 18446744073709551615, 18446744073709551615, - 702, - 714, - 702, - 714, - 126, - 127, + 144, + 146, + 144, + 146, + 26, + 27, true, - "architecture", - "architecture" + "to", + "to" ], [ - "term", - "single-term", - 12747011194397783283, + "numval", + "ival", + 2142320548375900929, "TEXT", - "#/texts/70", + "#/texts/69", 1.0, - 8106478708506632112, - 1549478568074460237, + 17767354399704235156, + 16458659285473085163, 18446744073709551615, 18446744073709551615, - 725, - 732, - 725, - 732, - 130, - 131, + 0, + 1, + 0, + 1, + 0, + 1, true, - "service", - "service" + "4", + "4" ], [ - "term", - "single-term", + "expression", + "word-concatenation", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 13240311013633905449, - 11928407068432784250, + 6285955549867796622, + 12192460564545960229, 18446744073709551615, 18446744073709551615, - 764, - 776, - 764, - 776, - 136, - 137, + 618, + 634, + 618, + 634, + 111, + 112, true, - "requirements", - "requirements" + "time-to-solution", + "time-to-solution" ], [ - "verb", - "compound-verb", + "sentence", + "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 12669508327642496792, - 11272358114773168348, + 11044655914692672378, + 2888733359687006370, 18446744073709551615, 18446744073709551615, - 93, - 105, - 93, - 105, - 17, - 19, + 0, + 123, + 0, + 123, + 0, + 22, true, - "are deployed", - "are deployed" + "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated.", + "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated." ], [ - "verb", - "compound-verb", + "sentence", + "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 17737636265695672887, - 8822130707725823076, + 9774189456888168740, + 4152543508246757256, 18446744073709551615, 18446744073709551615, - 168, - 187, - 168, - 187, - 29, - 33, + 124, + 246, + 124, + 246, + 22, + 43, true, - "would like to point", - "would like to point" + "Before discussing the technical details, we would like to point out our requirements for the architecture of the platform.", + "Before discussing the technical details, we would like to point out our requirements for the architecture of the platform." ], [ - "verb", - "compound-verb", + "sentence", + "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 4717893903194484574, - 13497868670598652853, + 12407957798033762804, + 13470604212648561724, 18446744073709551615, 18446744073709551615, - 274, - 292, - 274, - 292, - 47, - 50, + 247, + 293, + 247, + 293, + 43, + 51, true, - "related to scaling", - "related to scaling" + "These requirements are all related to scaling.", + "These requirements are all related to scaling." ], [ - "verb", - "compound-verb", + "sentence", + "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 9576455331508001963, - 2005151878314602116, + 4653964671317425985, + 17216044985232325101, 18446744073709551615, 18446744073709551615, - 535, - 552, - 535, - 552, - 98, - 100, + 294, + 461, + 294, + 461, + 51, + 83, true, - "serve potentially", - "serve potentially" + "Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources.", + "Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources." ], [ - "verb", - "compound-verb", + "sentence", + "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 6062403169006746003, - 8883787506358796560, + 17228622883758304054, + 15113971675963977401, 18446744073709551615, 18446744073709551615, - 733, - 754, - 733, - 754, - 131, - 134, + 462, + 680, + 462, + 680, + 83, + 121, true, - "is heavily influenced", - "is heavily influenced" + "In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation.", + "In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation." ], [ - "verb", - "single-verb", + "sentence", + "", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 14652261806242873016, - 7890494648004461696, + 6820290209528918513, + 3633182920105543370, 18446744073709551615, 18446744073709551615, - 20, - 28, - 20, - 28, - 5, - 6, + 681, + 777, + 681, + 777, + 121, + 138, true, - "describe", - "describe" + "It is clear that the architecture of such a service is heavily influenced by these requirements.", + "It is clear that the architecture of such a service is heavily influenced by these requirements." ], [ - "verb", - "single-verb", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 13632574162947055061, - 147315883317329044, + 11289641670498948963, + 4109634796027215399, 18446744073709551615, 18446744073709551615, - 110, - 122, - 110, - 122, - 20, - 21, + 146, + 163, + 146, + 163, + 25, + 27, true, - "orchestrated", - "orchestrated" + "technical details", + "technical details" ], [ - "verb", - "single-verb", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 5314857828561765555, - 11123792899717439144, + 4421383392096991748, + 4820655472322214248, 18446744073709551615, 18446744073709551615, - 131, - 141, - 131, - 141, - 23, - 24, + 443, + 460, + 443, + 460, + 80, + 82, true, - "discussing", - "discussing" + "compute resources", + "compute resources" ], [ - "verb", - "single-verb", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 12178341415895564896, - 16193825294775180695, + 16088126245064377604, + 12842078242820415728, 18446744073709551615, 18446744073709551615, - 266, - 269, - 266, - 269, - 45, - 46, + 465, + 476, + 465, + 476, + 84, + 86, true, - "are", - "are" + "other words", + "other words" ], [ - "verb", - "single-verb", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 8380894560351698162, - 15803013507579142869, + 4421383392096991748, + 4820655472321830361, 18446744073709551615, 18446744073709551615, - 311, - 321, - 311, - 321, - 54, - 56, + 586, + 603, + 586, + 603, + 106, + 108, true, - "would like", - "would like" + "compute resources", + "compute resources" ], [ - "verb", - "single-verb", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 329104161785194305, - 13942660614226268092, + 8106478708629288965, + 853306226471699405, 18446744073709551615, 18446744073709551615, - 338, - 343, - 338, - 343, - 59, - 60, + 8, + 15, + 8, + 15, + 2, + 3, true, - "scale", - "scale" + "section", + "section" ], [ - "verb", - "single-verb", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 329104159219515955, - 15594698497900091739, + 990358581043194791, + 2414189034056929402, 18446744073709551615, 18446744073709551615, - 437, - 442, - 437, - 442, - 79, - 80, + 37, + 50, + 37, + 50, + 8, + 9, true, - "based", - "based" + "microservices", + "microservices" ], [ - "verb", - "single-verb", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 389609625633595931, - 15688211806062539958, + 2703018952916355661, + 17317252314622786864, 18446744073709551615, 18446744073709551615, - 481, - 485, - 481, - 485, - 88, - 89, + 66, + 76, + 66, + 76, + 13, + 14, true, - "want", - "want" + "components", + "components" ], [ - "verb", - "single-verb", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 2873440693780286732, - 10449764614793007239, + 14814125365076808131, + 4170838424915628816, 18446744073709551615, 18446744073709551615, - 501, - 511, - 501, - 511, + 84, 92, - 94, + 84, + 92, + 16, + 17, true, - "can ingest", - "can ingest" + "platform", + "platform" ], [ - "verb", - "single-verb", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 329104161785194305, - 13942660614225758865, + 13240311013633905449, + 11928407068432787608, 18446744073709551615, 18446744073709551615, - 576, - 581, - 576, - 581, - 104, - 105, + 196, + 208, + 196, + 208, + 35, + 36, true, - "scale", - "scale" + "requirements", + "requirements" ], [ - "verb", - "single-verb", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 15441160910541486535, - 2048505449065788699, + 11899564443746965611, + 1669599917395635316, 18446744073709551615, 18446744073709551615, - 635, - 637, - 635, - 637, - 112, - 113, + 217, + 229, + 217, + 229, + 38, + 39, true, - "is", - "is" + "architecture", + "architecture" ], [ - "verb", - "single-verb", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 15441160910541486535, - 2048505449065787833, + 14814125365076808131, + 4170838424915634854, 18446744073709551615, 18446744073709551615, - 684, - 686, - 684, - 686, - 122, - 123, + 237, + 245, + 237, + 245, + 41, + 42, true, - "is", - "is" + "platform", + "platform" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 6165459236568015364, - 497035845389833334, + 13240311013633905449, + 11928407068432751416, 18446744073709551615, 18446744073709551615, - 604, - 613, - 604, - 613, - 108, - 110, + 253, + 265, + 253, + 265, + 44, + 45, true, - "such that", - "such that" + "requirements", + "requirements" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 16386233399945118620, - 6139299107000348345, + 14814125365076808131, + 4170838424915633248, 18446744073709551615, 18446744073709551615, - 638, - 651, - 638, - 651, - 113, - 115, + 326, + 334, + 326, + 334, + 57, + 58, true, - "reasonable at", - "reasonable at" + "platform", + "platform" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 2617690495147367356, - 5753489008096455564, + 16381206574973295053, + 579996873747921936, 18446744073709551615, 18446744073709551615, - 687, - 697, - 687, - 697, - 123, - 125, + 353, + 359, + 353, + 359, + 62, + 63, true, - "clear that", - "clear that" + "number", + "number" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 8106396862006371970, - 10149877881189646287, + 6167933651658664291, + 7440866408497827921, 18446744073709551615, 18446744073709551615, - 0, - 7, - 0, - 7, - 0, - 2, + 363, + 372, + 363, + 372, + 64, + 65, true, - "In this", - "In this" + "documents", + "documents" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 8106398107541243064, - 4725564592462762947, + 16381206574973295053, + 579996873747911416, 18446744073709551615, 18446744073709551615, - 51, - 58, - 51, - 58, - 9, - 11, + 378, + 384, + 378, + 384, + 67, + 68, true, - "in each", - "in each" + "number", + "number" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 16381206565712212855, - 16630894630023874072, + 329104159157820437, + 15600004509778203866, 18446744073709551615, 18446744073709551615, - 59, - 65, - 59, - 65, - 11, - 13, + 388, + 393, + 388, + 393, + 69, + 70, true, - "of the", - "of the" + "users", + "users" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 16381206565712212855, - 16630894630023888451, + 16381206574973295053, + 579996873747912575, 18446744073709551615, 18446744073709551615, + 421, + 427, + 421, + 427, + 76, 77, - 83, - 77, - 83, - 14, - 16, true, - "of the", - "of the" + "number", + "number" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 16381206535679983326, - 14828520614292756444, + 329104161517016668, + 13957283097469922549, 18446744073709551615, 18446744073709551615, - 124, - 130, - 124, - 130, - 22, - 23, + 431, + 436, + 431, + 436, + 78, + 79, true, - "Before", - "Before" + "cloud", + "cloud" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 8106397727991264470, - 15908125160341103167, + 8106478708506632112, + 1549478568074441550, 18446744073709551615, 18446744073709551615, - 209, - 216, - 209, - 216, - 36, - 38, + 488, + 495, + 488, + 495, + 90, + 91, true, - "for the", - "for the" + "service", + "service" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 16381206565712212855, - 16630894630015899877, + 14638289822750178210, + 16529051670404838156, 18446744073709551615, 18446744073709551615, - 230, - 236, - 230, - 236, - 39, - 41, + 512, + 520, + 512, + 520, + 94, + 95, true, - "of the", - "of the" + "millions", + "millions" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 14638857868319795209, - 4352025152199097228, + 6167933651658664291, + 7440866408497716574, 18446744073709551615, 18446744073709551615, - 344, - 352, - 344, - 352, - 60, - 62, + 524, + 533, + 524, + 533, + 96, + 97, true, - "with the", - "with the" + "documents", + "documents" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 15441160910541485670, - 2048505603752346374, + 3504070246238334482, + 7971751554704088263, 18446744073709551615, 18446744073709551615, - 360, - 362, - 360, - 362, - 63, - 64, + 553, + 562, + 553, + 562, + 100, + 101, true, - "of", - "of" + "thousands", + "thousands" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 15441160910541485670, - 2048505603752352577, + 329104159157820437, + 15600004509778174339, 18446744073709551615, 18446744073709551615, - 385, - 387, - 385, - 387, - 68, - 69, + 566, + 571, + 566, + 571, + 102, + 103, true, - "of", - "of" + "users", + "users" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 15441160910541485670, - 2048505603752367355, + 6285955549867796622, + 12192460564545960229, 18446744073709551615, 18446744073709551615, - 428, - 430, - 428, - 430, - 77, - 78, + 618, + 634, + 618, + 634, + 111, + 112, true, - "of", - "of" + "time-to-solution", + "time-to-solution" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 15441160910541480354, - 2048505281272838279, + 329104159219994925, + 15605472043071850604, 18446744073709551615, 18446744073709551615, - 462, - 464, - 462, - 464, - 83, - 84, + 656, + 661, + 656, + 661, + 116, + 117, true, - "In", - "In" + "times", + "times" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 15441160910541485670, - 2048505603752360531, + 6167836358624304835, + 12533972813433648220, 18446744073709551615, 18446744073709551615, - 521, - 523, - 521, - 523, - 95, - 96, + 670, + 679, + 670, + 679, + 119, + 120, true, - "of", - "of" + "operation", + "operation" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 15441160910541485670, - 2048505603752440089, + 11899564443746965611, + 1669599917395666812, 18446744073709551615, 18446744073709551615, - 563, - 565, - 563, - 565, - 101, - 102, + 702, + 714, + 702, + 714, + 126, + 127, true, - "of", - "of" + "architecture", + "architecture" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 8106397728094825258, - 15814643395009540075, + 8106478708506632112, + 1549478568074460237, 18446744073709551615, 18446744073709551615, - 662, - 669, - 662, - 669, - 117, - 119, + 725, + 732, + 725, + 732, + 130, + 131, true, - "for any", - "for any" + "service", + "service" ], [ - "conn", - "single-conn", + "term", + "single-term", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 15441160910541485670, - 2048505603752438307, + 13240311013633905449, + 11928407068432784250, 18446744073709551615, 18446744073709551615, - 715, - 717, - 715, - 717, - 127, - 128, + 764, + 776, + 764, + 776, + 136, + 137, true, - "of", - "of" + "requirements", + "requirements" ], [ - "conn", - "single-conn", + "verb", + "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 14652255025526908904, - 16607703748518201877, + 12669508327642496792, + 11272358114773168348, 18446744073709551615, 18446744073709551615, - 755, - 763, - 755, - 763, - 134, - 136, + 93, + 105, + 93, + 105, + 17, + 19, true, - "by these", - "by these" + "are deployed", + "are deployed" ], [ - "conn", - "single-conn", + "verb", + "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 15441160910541485865, - 2048505449664077172, + 17737636265695672887, + 8822130707725823076, 18446744073709551615, 18446744073709551615, - 179, - 181, - 179, - 181, - 31, - 32, + 168, + 187, + 168, + 187, + 29, + 33, true, - "to", - "to" + "would like to point", + "would like to point" ], [ - "conn", - "single-conn", + "verb", + "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 15441160910541485865, - 2048505449663964299, + 4717893903194484574, + 13497868670598652853, 18446744073709551615, 18446744073709551615, - 282, - 284, - 282, - 284, - 48, - 49, + 274, + 292, + 274, + 292, + 47, + 50, true, - "to", - "to" + "related to scaling", + "related to scaling" ], [ - "conn", - "single-conn", + "verb", + "compound-verb", 12747011194397783283, "TEXT", "#/texts/70", 1.0, - 15441160910541485865, - 2048505449663959877, + 9576455331508001963, + 2005151878314602116, 18446744073709551615, 18446744073709551615, - 335, - 337, - 335, - 337, - 58, - 59, + 535, + 552, + 535, + 552, + 98, + 100, true, - "to", - "to" + "serve potentially", + "serve potentially" ], [ - "numval", - "fval", - 174789262945188010, + "verb", + "compound-verb", + 12747011194397783283, "TEXT", - "#/texts/71", + "#/texts/70", 1.0, - 12178341415896306585, - 8581499132904184537, + 6062403169006746003, + 8883787506358796560, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 733, + 754, + 733, + 754, + 131, + 134, true, - "4.1", - "4.1" + "is heavily influenced", + "is heavily influenced" ], [ - "numval", - "ival", - 7228893318503650455, + "verb", + "single-verb", + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 17767354399704235161, - 5235953771215622646, + 14652261806242873016, + 7890494648004461696, 18446744073709551615, 18446744073709551615, - 10, - 11, - 10, - 11, - 2, - 3, + 20, + 28, + 20, + 28, + 5, + 6, true, - "1", - "1" + "describe", + "describe" ], [ - "numval", - "ival", - 7228893318503650455, + "verb", + "single-verb", + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 17767354399704235158, - 5235953771432357895, + 13632574162947055061, + 147315883317329044, 18446744073709551615, 18446744073709551615, - 101, - 102, - 101, - 102, + 110, + 122, + 110, + 122, + 20, 21, - 22, true, - "6", - "6" + "orchestrated", + "orchestrated" ], [ - "sentence", - "", - 7228893318503650455, + "verb", + "single-verb", + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 17007226042152908832, - 18331404462945221276, + 5314857828561765555, + 11123792899717439144, 18446744073709551615, 18446744073709551615, - 0, - 90, - 0, - 90, - 0, - 19, + 131, + 141, + 131, + 141, + 23, + 24, true, - "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents.", - "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents." + "discussing", + "discussing" ], [ - "term", - "single-term", - 7228893318503650455, + "verb", + "single-verb", + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 16381206514091025767, - 10265022769446664856, + 12178341415895564896, + 16193825294775180695, 18446744073709551615, 18446744073709551615, - 3, - 9, - 3, - 9, - 1, - 2, + 266, + 269, + 266, + 269, + 45, + 46, true, - "Figure", - "Figure" + "are", + "are" ], [ - "term", - "single-term", - 7228893318503650455, + "verb", + "single-verb", + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 8106396896178898697, - 4219910857709835922, + 8380894560351698162, + 15803013507579142869, 18446744073709551615, 18446744073709551615, - 29, - 36, - 29, - 36, - 8, - 9, + 311, + 321, + 311, + 321, + 54, + 56, true, - "diagram", - "diagram" + "would like", + "would like" ], [ - "term", - "single-term", - 7228893318503650455, + "verb", + "single-verb", + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 14814125852840540191, - 5403353526375880725, + 329104161785194305, + 13942660614226268092, 18446744073709551615, 18446744073709551615, - 44, - 52, - 44, - 52, - 11, - 12, + 338, + 343, + 338, + 343, + 59, + 60, true, - "pipeline", - "pipeline" + "scale", + "scale" ], [ - "term", - "single-term", - 7228893318503650455, + "verb", + "single-verb", + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 14814125365076808131, - 1502793658629529948, + 329104159219515955, + 15594698497900091739, 18446744073709551615, 18446744073709551615, - 60, - 68, - 60, - 68, - 14, - 15, + 437, + 442, + 437, + 442, + 79, + 80, true, - "platform", - "platform" + "based", + "based" ], [ - "term", - "single-term", - 7228893318503650455, + "verb", + "single-verb", + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 6167933651658664291, - 2252968926517446007, + 389609625633595931, + 15688211806062539958, 18446744073709551615, 18446744073709551615, - 80, - 89, - 80, + 481, + 485, + 481, + 485, + 88, 89, - 17, - 18, true, - "documents", - "documents" + "want", + "want" ], [ "verb", - "compound-verb", - 7228893318503650455, + "single-verb", + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 5518720687765131523, - 9686268265492720351, + 2873440693780286732, + 10449764614793007239, 18446744073709551615, 18446744073709551615, - 16, - 26, - 16, - 26, - 5, - 7, + 501, + 511, + 501, + 511, + 92, + 94, true, - "have shown", - "have shown" + "can ingest", + "can ingest" ], [ "verb", "single-verb", - 7228893318503650455, + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 8106476000254393164, - 1942641588307467677, + 329104161785194305, + 13942660614225758865, 18446744073709551615, 18446744073709551615, - 72, - 79, - 72, - 79, - 16, - 17, + 576, + 581, + 576, + 581, + 104, + 105, true, - "process", - "process" + "scale", + "scale" ], [ - "conn", - "single-conn", - 7228893318503650455, + "verb", + "single-verb", + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 15441160910541480354, - 17121927414994045497, + 15441160910541486535, + 2048505449065788699, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 635, + 637, + 635, + 637, + 112, + 113, true, - "In", - "In" + "is", + "is" ], [ - "conn", - "single-conn", - 7228893318503650455, + "verb", + "single-verb", + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 15441160910541485670, - 17121926226924974742, + 15441160910541486535, + 2048505449065787833, 18446744073709551615, 18446744073709551615, - 37, - 39, - 37, - 39, - 9, - 10, + 684, + 686, + 684, + 686, + 122, + 123, true, - "of", - "of" + "is", + "is" ], [ "conn", "single-conn", - 7228893318503650455, + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 16381206566339127348, - 9987704510349709695, + 6165459236568015364, + 497035845389833334, 18446744073709551615, 18446744073709551615, - 53, - 59, - 53, - 59, - 12, - 14, + 604, + 613, + 604, + 613, + 108, + 110, true, - "on the", - "on the" + "such that", + "such that" ], [ "conn", "single-conn", - 7228893318503650455, + 12747011194397783283, "TEXT", - "#/texts/72", + "#/texts/70", 1.0, - 15441160910541485865, - 17121926292459753487, + 16386233399945118620, + 6139299107000348345, 18446744073709551615, 18446744073709551615, - 69, - 71, - 69, - 71, - 15, - 16, + 638, + 651, + 638, + 651, + 113, + 115, true, - "to", - "to" + "reasonable at", + "reasonable at" ], [ - "sentence", - "", - 9230667184712205690, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/73", + "#/texts/70", 1.0, - 105368025718952442, - 5450071664030950078, + 2617690495147367356, + 5753489008096455564, 18446744073709551615, 18446744073709551615, - 14, - 79, - 14, - 79, - 2, - 16, + 687, + 697, + 687, + 697, + 123, + 125, true, - "As one can observe, we have grouped the service into four layers.", - "As one can observe, we have grouped the service into four layers." + "clear that", + "clear that" ], [ - "term", - "single-term", - 9230667184712205690, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/73", + "#/texts/70", 1.0, - 8106478708506632112, - 6233289218919425562, + 8106396862006371970, + 10149877881189646287, 18446744073709551615, 18446744073709551615, - 54, - 61, - 54, - 61, - 11, - 12, + 0, + 7, + 0, + 7, + 0, + 2, true, - "service", - "service" + "In this", + "In this" ], [ - "term", - "single-term", - 9230667184712205690, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/73", + "#/texts/70", 1.0, - 16381206590620802860, - 16233116481575014775, + 8106398107541243064, + 4725564592462762947, 18446744073709551615, 18446744073709551615, - 72, - 78, - 72, - 78, - 14, - 15, + 51, + 58, + 51, + 58, + 9, + 11, true, - "layers", - "layers" + "in each", + "in each" ], [ - "verb", - "compound-verb", - 9230667184712205690, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/73", + "#/texts/70", 1.0, - 189925242426617641, - 13895959288404047356, + 16381206565712212855, + 16630894630023874072, 18446744073709551615, 18446744073709551615, - 37, - 49, - 37, - 49, - 8, - 10, + 59, + 65, + 59, + 65, + 11, + 13, true, - "have grouped", - "have grouped" + "of the", + "of the" ], [ - "verb", - "single-verb", - 9230667184712205690, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/73", + "#/texts/70", 1.0, - 14892726175400695403, - 16590583946158903014, + 16381206565712212855, + 16630894630023888451, 18446744073709551615, 18446744073709551615, - 21, - 32, - 21, - 32, - 4, - 6, + 77, + 83, + 77, + 83, + 14, + 16, true, - "can observe", - "can observe" + "of the", + "of the" ], [ "conn", "single-conn", - 9230667184712205690, + 12747011194397783283, "TEXT", - "#/texts/73", + "#/texts/70", 1.0, - 15441160910541480533, - 4819755269055644271, + 16381206535679983326, + 14828520614292756444, 18446744073709551615, 18446744073709551615, - 14, - 16, - 14, - 16, - 2, - 3, + 124, + 130, + 124, + 130, + 22, + 23, true, - "As", - "As" + "Before", + "Before" ], [ "conn", "single-conn", - 9230667184712205690, + 12747011194397783283, "TEXT", - "#/texts/73", + "#/texts/70", 1.0, - 389609625698622943, - 11283567657878655855, + 8106397727991264470, + 15908125160341103167, 18446744073709551615, 18446744073709551615, - 62, - 66, - 62, - 66, - 12, - 13, + 209, + 216, + 209, + 216, + 36, + 38, true, - "into", - "into" + "for the", + "for the" ], [ - "numval", - "ival", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 17767354399704235161, - 17804011231002177177, + 16381206565712212855, + 16630894630015899877, 18446744073709551615, 18446744073709551615, - 1, - 2, - 1, - 2, - 1, - 2, + 230, + 236, + 230, + 236, + 39, + 41, true, - "1", - "1" + "of the", + "of the" ], [ - "numval", - "ival", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 15441160910541481977, - 15610781920844557983, + 14638857868319795209, + 4352025152199097228, 18446744073709551615, 18446744073709551615, - 275, - 277, - 275, - 277, - 46, - 47, + 344, + 352, + 344, + 352, + 60, + 62, true, - "13", - "13" + "with the", + "with the" ], [ - "parenthesis", - "reference", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 12178341415896395122, - 13204308870015609887, + 15441160910541485670, + 2048505603752346374, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 3, + 360, + 362, + 360, + 362, + 63, + 64, true, - "(1)", - "(1)" + "of", + "of" ], [ - "expression", - "word-concatenation", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 14652188385287077849, - 15073411726952517228, + 15441160910541485670, + 2048505603752352577, 18446744073709551615, 18446744073709551615, - 42, - 50, - 42, - 50, - 9, - 10, + 385, + 387, + 385, + 387, + 68, + 69, true, - "REST-API", - "REST-API" + "of", + "of" ], [ - "expression", - "word-concatenation", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 14652188385287077849, - 15073411726952601508, + 15441160910541485670, + 2048505603752367355, 18446744073709551615, 18446744073709551615, - 138, - 146, - 138, - 146, - 27, - 28, + 428, + 430, + 428, + 430, + 77, + 78, true, - "REST-API", - "REST-API" + "of", + "of" ], [ - "expression", - "word-concatenation", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 3753411203337468488, - 5100377154689721404, + 15441160910541480354, + 2048505281272838279, 18446744073709551615, 18446744073709551615, - 181, - 193, - 181, - 193, - 33, - 34, + 462, + 464, + 462, + 464, + 83, + 84, true, - "ground-truth", - "ground-truth" + "In", + "In" ], [ - "expression", - "word-concatenation", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 14652188385287077849, - 15073411726952522114, + 15441160910541485670, + 2048505603752360531, 18446744073709551615, 18446744073709551615, - 209, - 217, - 209, - 217, - 37, - 38, + 521, + 523, + 521, + 523, + 95, + 96, true, - "REST-API", - "REST-API" + "of", + "of" ], [ - "sentence", - "", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 5975418266041050143, - 7900187415786058174, - 18446744073709551615, + 15441160910541485670, + 2048505603752440089, 18446744073709551615, - 4, - 204, - 4, - 204, - 3, - 36, + 18446744073709551615, + 563, + 565, + 563, + 565, + 101, + 102, true, - "An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering.", - "An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering." + "of", + "of" ], [ - "sentence", - "", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 16292425914778879272, - 3552138339604334986, + 8106397728094825258, + 15814643395009540075, 18446744073709551615, 18446744073709551615, - 205, - 307, - 205, - 307, - 36, - 53, + 662, + 669, + 662, + 669, + 117, + 119, true, - "The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", - "The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python." + "for any", + "for any" ], [ - "term", - "single-term", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 8692614377683751894, - 6543009394914129596, + 15441160910541485670, + 2048505603752438307, 18446744073709551615, 18446744073709551615, - 7, - 22, - 7, - 22, - 4, - 6, + 715, + 717, + 715, + 717, + 127, + 128, true, - "interface layer", - "interface layer" + "of", + "of" ], [ - "term", - "single-term", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 11968118699453218413, - 16853549282351953165, + 14652255025526908904, + 16607703748518201877, 18446744073709551615, 18446744073709551615, - 57, - 70, - 57, - 70, - 12, - 14, + 755, + 763, + 755, + 763, + 134, + 136, true, - "user frontend", - "user frontend" + "by these", + "by these" ], [ - "term", - "single-term", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 11968118699453218413, - 16853549282351954610, + 15441160910541485865, + 2048505449664077172, 18446744073709551615, 18446744073709551615, - 76, - 89, - 76, - 89, - 16, - 18, + 179, + 181, + 179, + 181, + 31, + 32, true, - "user frontend", - "user frontend" + "to", + "to" ], [ - "term", - "single-term", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 17244914598722202958, - 15499641398155697595, + 15441160910541485865, + 2048505449663964299, 18446744073709551615, 18446744073709551615, - 96, - 117, - 96, - 117, - 20, - 22, + 282, + 284, + 282, + 284, + 48, + 49, true, - "AngularJS application", - "AngularJS application" + "to", + "to" ], [ - "term", - "single-term", - 17419815751432442882, + "conn", + "single-conn", + 12747011194397783283, "TEXT", - "#/texts/74", + "#/texts/70", 1.0, - 1591019414094504294, - 17465194081095954832, + 15441160910541485865, + 2048505449663959877, 18446744073709551615, 18446744073709551615, - 181, - 203, - 181, - 203, - 33, - 35, + 335, + 337, + 335, + 337, + 58, + 59, true, - "ground-truth gathering", - "ground-truth gathering" + "to", + "to" ], [ - "term", - "single-term", - 17419815751432442882, + "numval", + "fval", + 174789262945188010, "TEXT", - "#/texts/74", + "#/texts/71", 1.0, - 17622757252402159492, - 18169862670430999681, + 12178341415896306585, + 8581499132904184537, 18446744073709551615, 18446744073709551615, - 252, - 274, - 252, - 274, - 44, - 46, + 0, + 3, + 0, + 3, + 0, + 1, true, - "OpenAPI specifications", - "OpenAPI specifications" + "4.1", + "4.1" ], [ - "term", - "single-term", - 17419815751432442882, + "numval", + "ival", + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 14652188385287077849, - 15073411726952517228, + 17767354399704235161, + 5235953771215622646, 18446744073709551615, 18446744073709551615, - 42, - 50, - 42, - 50, - 9, 10, + 11, + 10, + 11, + 2, + 3, true, - "REST-API", - "REST-API" + "1", + "1" ], [ - "term", - "single-term", - 17419815751432442882, + "numval", + "ival", + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 12178341415895527965, - 13202575941196575545, + 17767354399704235158, + 5235953771432357895, 18446744073709551615, 18446744073709551615, - 127, - 130, - 127, - 130, - 24, - 25, + 101, + 102, + 101, + 102, + 21, + 22, true, - "top", - "top" + "6", + "6" ], [ - "term", - "single-term", - 17419815751432442882, + "sentence", + "", + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 14652188385287077849, - 15073411726952601508, + 17007226042152908832, + 18331404462945221276, 18446744073709551615, 18446744073709551615, - 138, - 146, - 138, - 146, - 27, - 28, + 0, + 90, + 0, + 90, + 0, + 19, true, - "REST-API", - "REST-API" + "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents.", + "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents." ], [ "term", "single-term", - 17419815751432442882, + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 15359807916847569012, - 12690297070768585539, + 16381206514091025767, + 10265022769446664856, 18446744073709551615, 18446744073709551615, - 166, - 176, - 166, - 176, - 31, - 32, + 3, + 9, + 3, + 9, + 1, + 2, true, - "annotators", - "annotators" + "Figure", + "Figure" ], [ "term", "single-term", - 17419815751432442882, + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 14652188385287077849, - 15073411726952522114, + 8106396896178898697, + 4219910857709835922, 18446744073709551615, 18446744073709551615, - 209, - 217, - 209, - 217, - 37, - 38, + 29, + 36, + 29, + 36, + 8, + 9, true, - "REST-API", - "REST-API" + "diagram", + "diagram" ], [ "term", "single-term", - 17419815751432442882, + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 16381206485156459004, - 2190123068096885489, + 14814125852840540191, + 5403353526375880725, 18446744073709551615, 18446744073709551615, - 300, - 306, - 300, - 306, - 51, + 44, + 52, + 44, 52, + 11, + 12, true, - "Python", - "Python" + "pipeline", + "pipeline" ], [ - "verb", - "compound-verb", - 17419815751432442882, + "term", + "single-term", + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 14637952033947516066, - 1170598421847841274, + 14814125365076808131, + 1502793658629529948, 18446744073709551615, 18446744073709551615, - 218, - 226, - 218, - 226, - 38, - 40, + 60, + 68, + 60, + 68, + 14, + 15, true, - "is built", - "is built" + "platform", + "platform" ], [ - "verb", - "compound-verb", - 17419815751432442882, + "term", + "single-term", + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 116696039858106091, - 15266057091493719963, + 6167933651658664291, + 2252968926517446007, 18446744073709551615, 18446744073709551615, - 231, - 247, - 231, - 247, - 41, - 43, + 80, + 89, + 80, + 89, + 17, + 18, true, - "documented using", - "documented using" + "documents", + "documents" ], [ - "verb", - "compound-verb", - 17419815751432442882, + "term", + "single-term", + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 37170045853396780, - 10862211224580790545, + 16381206514091025767, + 10265022769446670193, 18446744073709551615, 18446744073709551615, - 282, - 296, - 282, - 296, - 48, - 50, + 94, + 100, + 94, + 100, + 20, + 21, true, - "is implemented", - "is implemented" + "Figure", + "Figure" ], [ - "verb", - "single-verb", - 17419815751432442882, + "term", + "single-term", + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 5584174880054122043, - 13797832223961649041, + 16381206578503830159, + 17949034927811561938, 18446744073709551615, 18446744073709551615, - 29, - 39, - 29, - 39, - 7, - 8, + 114, + 120, + 114, + 120, + 26, + 27, true, - "implements", - "implements" + "sketch", + "sketch" ], [ "verb", - "single-verb", - 17419815751432442882, + "compound-verb", + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 15441160910541486535, - 15610783856184804840, + 5518720687765131523, + 9686268265492720351, 18446744073709551615, 18446744073709551615, - 90, - 92, - 90, - 92, - 18, - 19, + 16, + 26, + 16, + 26, + 5, + 7, true, - "is", - "is" + "have shown", + "have shown" ], [ "verb", "single-verb", - 17419815751432442882, + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 329104159303279946, - 14770817403596920463, + 8106476000254393164, + 1942641588307467677, 18446744073709551615, 18446744073709551615, - 118, - 123, - 118, - 123, - 22, - 23, + 72, + 79, + 72, + 79, + 16, + 17, true, - "build", - "build" + "process", + "process" ], [ "verb", "single-verb", - 17419815751432442882, + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 5584174880054122043, - 13797832223961674239, + 389609625741152123, + 12332880687353575352, 18446744073709551615, 18446744073709551615, - 151, - 161, - 151, - 161, - 29, - 30, + 107, + 111, + 107, + 111, + 24, + 25, true, - "implements", - "implements" + "show", + "show" ], [ "conn", "single-conn", - 17419815751432442882, + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 15441160910541485678, - 15610783856720662618, + 15441160910541480354, + 17121927414994045497, 18446744073709551615, 18446744073709551615, - 124, - 126, - 124, - 126, - 23, - 24, + 0, + 2, + 0, + 2, + 0, + 1, true, - "on", - "on" + "In", + "In" ], [ "conn", "single-conn", - 17419815751432442882, + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 16381206565712212855, - 3629200545768582333, + 15441160910541485670, + 17121926226924974742, 18446744073709551615, 18446744073709551615, - 131, - 137, - 131, - 137, - 25, - 27, + 37, + 39, + 37, + 39, + 9, + 10, true, - "of the", - "of the" + "of", + "of" ], [ "conn", "single-conn", - 17419815751432442882, + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 12178341415895625940, - 13202525365648469119, + 16381206566339127348, + 9987704510349709695, 18446744073709551615, 18446744073709551615, - 177, - 180, - 177, - 180, - 32, - 33, + 53, + 59, + 53, + 59, + 12, + 14, true, - "for", - "for" + "on the", + "on the" ], [ "conn", "single-conn", - 17419815751432442882, + 7228893318503650455, "TEXT", - "#/texts/74", + "#/texts/72", 1.0, - 15441160910541486538, - 15610783856135866232, + 15441160910541480354, + 17121927414994051922, 18446744073709551615, 18446744073709551615, - 297, - 299, - 297, - 299, - 50, - 51, + 91, + 93, + 91, + 93, + 19, + 20, true, - "in", - "in" + "In", + "In" ], [ - "numval", - "ival", - 11194226403360998426, + "conn", + "single-conn", + 7228893318503650455, "TEXT", - "#/texts/75", + "#/texts/72", 1.0, - 17767354399704235162, - 766019618037252930, + 15441160910541485670, + 17121926226924973211, 18446744073709551615, 18446744073709551615, - 1, - 2, - 1, - 2, - 1, - 2, + 121, + 123, + 121, + 123, + 27, + 28, true, - "2", - "2" + "of", + "of" ], [ - "parenthesis", - "reference", - 11194226403360998426, + "conn", + "single-conn", + 7228893318503650455, "TEXT", - "#/texts/75", + "#/texts/72", 1.0, - 12178341415896395187, - 17029329038495000300, + 15441160910541485865, + 17121926292459753487, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 3, + 69, + 71, + 69, + 71, + 15, + 16, true, - "(2)", - "(2)" + "to", + "to" ], [ - "parenthesis", - "round brackets", - 11194226403360998426, + "sentence", + "", + 9230667184712205690, "TEXT", - "#/texts/75", + "#/texts/73", 1.0, - 1812897535394120128, - 3870874385890063204, + 105368025718952442, + 5450071664030950078, 18446744073709551615, 18446744073709551615, - 303, - 497, - 303, - 497, - 51, - 87, + 14, + 79, + 14, + 79, + 2, + 16, true, - "(e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images)", - "(e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images)" + "As one can observe, we have grouped the service into four layers.", + "As one can observe, we have grouped the service into four layers." ], [ - "expression", - "common", - 11194226403360998426, + "term", + "single-term", + 9230667184712205690, "TEXT", - "#/texts/75", + "#/texts/73", 1.0, - 15441160910541487324, - 5910392785272575830, + 11899564443746965611, + 16284427380227364102, 18446744073709551615, 18446744073709551615, - 304, - 309, - 304, - 309, - 52, - 53, + 0, + 12, + 0, + 12, + 0, + 1, true, - "eg", - "e. g." + "architecture", + "architecture" ], [ - "expression", - "word-concatenation", - 11194226403360998426, + "term", + "single-term", + 9230667184712205690, "TEXT", - "#/texts/75", + "#/texts/73", 1.0, - 6187817560337829240, - 10074786117573267255, + 8106478708506632112, + 6233289218919425562, 18446744073709551615, 18446744073709551615, - 222, - 231, - 222, - 231, - 39, - 40, + 54, + 61, + 54, + 61, + 11, + 12, true, - "in-memory", - "in-memory" + "service", + "service" ], [ - "expression", - "word-concatenation", - 11194226403360998426, + "term", + "single-term", + 9230667184712205690, "TEXT", - "#/texts/75", + "#/texts/73", 1.0, - 10210587797782980674, - 269685690895573883, + 16381206590620802860, + 16233116481575014775, 18446744073709551615, 18446744073709551615, - 653, - 667, - 653, - 667, - 114, - 115, + 72, + 78, + 72, + 78, + 14, + 15, true, - "fault-tolerant", - "fault-tolerant" + "layers", + "layers" ], [ - "expression", - "wtoken-concatenation", - 11194226403360998426, + "term", + "single-term", + 9230667184712205690, "TEXT", - "#/texts/75", + "#/texts/73", 1.0, - 15503455610017494293, - 14071574465570134500, + 16381206590620802860, + 16233116481574945048, 18446744073709551615, 18446744073709551615, - 175, - 190, - 175, - 190, - 31, - 32, + 86, + 92, + 86, + 92, + 17, + 18, true, - "RabbitMQ^{14}", - "RabbitMQ$^{14}$" + "layers", + "layers" ], [ - "expression", - "wtoken-concatenation", - 11194226403360998426, + "verb", + "compound-verb", + 9230667184712205690, "TEXT", - "#/texts/75", + "#/texts/73", 1.0, - 9275871508895795608, - 13145605651607786139, + 189925242426617641, + 13895959288404047356, 18446744073709551615, 18446744073709551615, - 243, - 255, - 243, - 255, - 42, - 43, + 37, + 49, + 37, + 49, + 8, + 10, true, - "Redis^{15}", - "Redis$^{15}$" + "have grouped", + "have grouped" ], [ - "sentence", - "", - 11194226403360998426, + "verb", + "single-verb", + 9230667184712205690, "TEXT", - "#/texts/75", + "#/texts/73", 1.0, - 4025108859080697854, - 7684932098811697541, + 14892726175400695403, + 16590583946158903014, 18446744073709551615, 18446744073709551615, + 21, + 32, + 21, + 32, 4, - 122, - 4, - 122, - 3, - 22, + 6, true, - "An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result.", - "An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result." + "can observe", + "can observe" ], [ - "sentence", - "", - 11194226403360998426, + "verb", + "single-verb", + 9230667184712205690, "TEXT", - "#/texts/75", + "#/texts/73", 1.0, - 10578140482875773017, - 5422705622766817180, + 12178341415895564896, + 1634290879494977673, 18446744073709551615, 18446744073709551615, - 123, - 191, - 123, - 191, - 22, - 33, + 93, + 96, + 93, + 96, + 18, + 19, true, - "The task scheduling is done with the Message Broker RabbitMQ$^{14}$.", - "The task scheduling is done with the Message Broker RabbitMQ$^{14}$." + "are", + "are" ], [ - "sentence", - "", - 11194226403360998426, + "conn", + "single-conn", + 9230667184712205690, "TEXT", - "#/texts/75", + "#/texts/73", 1.0, - 13063193318125667342, - 9479480819005320541, + 15441160910541480533, + 4819755269055644271, 18446744073709551615, 18446744073709551615, - 192, - 256, - 192, - 256, - 33, - 44, + 14, + 16, + 14, + 16, + 2, + 3, true, - "The results are stored in the in-memory data store Redis$^{15}$.", - "The results are stored in the in-memory data store Redis$^{15}$." + "As", + "As" ], [ - "sentence", - "", - 11194226403360998426, + "conn", + "single-conn", + 9230667184712205690, "TEXT", - "#/texts/75", + "#/texts/73", 1.0, - 1495400135141364806, - 13254541674959843841, + 389609625698622943, + 11283567657878655855, 18446744073709551615, 18446744073709551615, - 257, - 612, - 257, - 612, - 44, - 106, + 62, + 66, + 62, + 66, + 12, + 13, true, - "In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully.", - "In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully." + "into", + "into" ], [ - "sentence", - "", - 11194226403360998426, + "numval", + "ival", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 1841397930198716309, - 11891118918529386819, + 17767354399704235161, + 17804011231002177177, 18446744073709551615, 18446744073709551615, - 613, - 702, - 613, - 702, - 106, - 121, + 1, + 2, + 1, + 2, + 1, + 2, true, - "This approach allows for a very robust, fault-tolerant service with very little downtime.", - "This approach allows for a very robust, fault-tolerant service with very little downtime." + "1", + "1" ], [ - "term", - "single-term", - 11194226403360998426, + "numval", + "ival", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 881931955171775830, - 1850213153382221251, + 15441160910541481977, + 15610781920844557983, 18446744073709551615, 18446744073709551615, - 7, - 26, - 7, - 26, - 4, - 6, - true, - "orchestration layer", - "orchestration layer" + 275, + 277, + 275, + 277, + 46, + 47, + true, + "13", + "13" ], [ - "term", - "single-term", - 11194226403360998426, + "parenthesis", + "reference", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 16569031532297427649, - 2288991119528845313, + 12178341415896395122, + 13204308870015609887, 18446744073709551615, 18446744073709551615, - 88, - 104, - 88, - 104, - 16, - 18, + 0, + 3, + 0, + 3, + 0, + 3, true, - "execution status", - "execution status" + "(1)", + "(1)" ], [ - "term", - "single-term", - 11194226403360998426, + "expression", + "word-concatenation", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 12318137194760091867, - 7829156630170179123, + 14652188385287077849, + 15073411726952517228, 18446744073709551615, 18446744073709551615, - 109, - 121, - 109, - 121, - 19, - 21, + 42, + 50, + 42, + 50, + 9, + 10, true, - "final result", - "final result" + "REST-API", + "REST-API" ], [ - "term", - "single-term", - 11194226403360998426, + "expression", + "word-concatenation", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 6315348039533026141, - 3213548333245122529, + 14652188385287077849, + 15073411726952601508, 18446744073709551615, 18446744073709551615, - 127, - 142, - 127, - 142, - 23, - 25, + 138, + 146, + 138, + 146, + 27, + 28, true, - "task scheduling", - "task scheduling" + "REST-API", + "REST-API" ], [ - "term", - "single-term", - 11194226403360998426, + "expression", + "word-concatenation", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 8295209353697935236, - 9538738240789817737, + 3753411203337468488, + 5100377154689721404, 18446744073709551615, 18446744073709551615, - 160, - 190, - 160, - 190, - 29, - 32, + 181, + 193, + 181, + 193, + 33, + 34, true, - "Message Broker RabbitMQ^{14}", - "Message Broker RabbitMQ$^{14}$" + "ground-truth", + "ground-truth" ], [ - "term", - "single-term", - 11194226403360998426, + "expression", + "word-concatenation", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 10696383395384997690, - 12295999164377771107, + 14652188385287077849, + 15073411726952522114, 18446744073709551615, 18446744073709551615, - 222, - 255, - 222, - 255, - 39, - 43, + 209, + 217, + 209, + 217, + 37, + 38, true, - "in-memory data store Redis^{15}", - "in-memory data store Redis$^{15}$" + "REST-API", + "REST-API" ], [ - "term", - "single-term", - 11194226403360998426, + "sentence", + "", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 1969668578613914549, - 8249504550464603474, + 5975418266041050143, + 7900187415786058174, 18446744073709551615, 18446744073709551615, - 277, - 302, - 277, - 302, - 48, - 51, + 4, + 204, + 4, + 204, + 3, + 36, true, - "certain consecutive tasks", - "certain consecutive tasks" + "An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering.", + "An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering." ], [ - "term", - "single-term", - 11194226403360998426, + "sentence", + "", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 14650937348812924036, - 6147429119200407258, + 16292425914778879272, + 3552138339604334986, 18446744073709551615, 18446744073709551615, - 320, - 328, - 320, - 328, - 55, - 57, + 205, + 307, + 205, + 307, + 36, + 53, true, - "PDF page", - "PDF page" + "The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", + "The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python." ], [ "term", "single-term", - 11194226403360998426, + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 15096203362930329687, - 5146287085934249298, + 8692614377683751894, + 6543009394914129596, 18446744073709551615, 18446744073709551615, - 390, - 411, - 390, - 411, - 67, - 70, + 7, + 22, + 7, + 22, + 4, + 6, true, - "programmatic PDF page", - "programmatic PDF page" + "interface layer", + "interface layer" ], [ "term", "single-term", - 11194226403360998426, + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 9914165367421220601, - 17615564355797482202, + 11968118699453218413, + 16853549282351953165, 18446744073709551615, 18446744073709551615, - 446, - 457, - 446, - 457, - 77, - 79, + 57, + 70, + 57, + 70, + 12, + 14, true, - "OCR service", - "OCR service" + "user frontend", + "user frontend" ], [ "term", "single-term", - 11194226403360998426, + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 10873436773834842694, - 9664366626516883504, + 11968118699453218413, + 16853549282351954610, 18446744073709551615, 18446744073709551615, - 537, - 553, - 537, - 553, - 95, - 97, + 76, + 89, + 76, + 89, + 16, + 18, true, - "subsequent steps", - "subsequent steps" + "user frontend", + "user frontend" ], [ "term", "single-term", - 11194226403360998426, + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 16455430351063957858, - 8578040699139249120, + 17244914598722202958, + 15499641398155697595, 18446744073709551615, 18446744073709551615, - 653, - 675, - 653, - 675, - 114, - 116, + 96, + 117, + 96, + 117, + 20, + 22, true, - "fault-tolerant service", - "fault-tolerant service" + "AngularJS application", + "AngularJS application" ], [ "term", "single-term", - 11194226403360998426, + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 3478107702264293237, - 8540445734424787667, + 1591019414094504294, + 17465194081095954832, 18446744073709551615, 18446744073709551615, - 686, - 701, - 686, - 701, - 118, - 120, + 181, + 203, + 181, + 203, + 33, + 35, true, - "little downtime", - "little downtime" + "ground-truth gathering", + "ground-truth gathering" ], [ "term", "single-term", - 11194226403360998426, + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 329104159214088329, - 8194156825567116069, + 17622757252402159492, + 18169862670430999681, 18446744073709551615, 18446744073709551615, + 252, + 274, + 252, + 274, + 44, 46, - 51, - 46, - 51, - 9, - 10, true, - "tasks", - "tasks" + "OpenAPI specifications", + "OpenAPI specifications" ], [ "term", "single-term", - 11194226403360998426, + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 990358581043194791, - 1684395431220408370, + 14652188385287077849, + 15073411726952517228, 18446744073709551615, 18446744073709551615, - 60, - 73, - 60, - 73, - 12, - 13, + 42, + 50, + 42, + 50, + 9, + 10, true, - "microservices", - "microservices" + "REST-API", + "REST-API" ], [ "term", "single-term", - 11194226403360998426, + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 16381206578935372333, - 8703937210789941258, + 12178341415895527965, + 13202575941196575545, 18446744073709551615, 18446744073709551615, - 75, - 81, - 75, - 81, - 14, - 15, + 127, + 130, + 127, + 130, + 24, + 25, true, - "stores", - "stores" + "top", + "top" ], [ "term", "single-term", - 11194226403360998426, + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 8106478445190161533, - 10842974023255663515, + 14652188385287077849, + 15073411726952601508, 18446744073709551615, 18446744073709551615, - 196, - 203, - 196, - 203, - 34, - 35, + 138, + 146, + 138, + 146, + 27, + 28, true, - "results", - "results" + "REST-API", + "REST-API" ], [ "term", "single-term", - 11194226403360998426, + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 329104161571401725, - 8064733993734324746, + 15359807916847569012, + 12690297070768585539, 18446744073709551615, 18446744073709551615, - 260, - 265, - 260, - 265, - 45, - 46, + 166, + 176, + 166, + 176, + 31, + 32, true, - "order", - "order" + "annotators", + "annotators" ], [ "term", "single-term", - 11194226403360998426, + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 15441160910541487324, - 5910392785272575830, + 14652188385287077849, + 15073411726952522114, 18446744073709551615, 18446744073709551615, - 304, - 309, - 304, - 309, - 52, - 53, + 209, + 217, + 209, + 217, + 37, + 38, true, - "eg", - "e. g." + "REST-API", + "REST-API" ], [ "term", "single-term", - 11194226403360998426, + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 16381206560620045048, - 3483212362973437467, + 16381206485156459004, + 2190123068096885489, 18446744073709551615, 18446744073709551615, - 351, - 357, - 351, - 357, - 60, - 61, + 300, + 306, + 300, + 306, + 51, + 52, true, - "images", - "images" + "Python", + "Python" ], [ - "term", - "single-term", - 11194226403360998426, + "verb", + "compound-verb", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 8106479143794098783, - 12365847001997486008, + 14637952033947516066, + 1170598421847841274, 18446744073709551615, 18446744073709551615, - 375, - 382, - 375, - 382, - 64, - 65, - true, - "parsing", - "parsing" + 218, + 226, + 218, + 226, + 38, + 40, + true, + "is built", + "is built" ], [ - "term", - "single-term", - 11194226403360998426, + "verb", + "compound-verb", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 16381206560620045048, - 3483212362973413702, + 116696039858106091, + 15266057091493719963, 18446744073709551615, 18446744073709551615, - 427, - 433, - 427, - 433, - 73, - 74, + 231, + 247, + 231, + 247, + 41, + 43, true, - "images", - "images" + "documented using", + "documented using" ], [ - "term", - "single-term", - 11194226403360998426, + "verb", + "compound-verb", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 329104161531686411, - 8019458249201971616, + 37170045853396780, + 10862211224580790545, 18446744073709551615, 18446744073709551615, - 473, - 478, - 473, - 478, - 82, - 83, + 282, + 296, + 282, + 296, + 48, + 50, true, - "cells", - "cells" + "is implemented", + "is implemented" ], [ - "term", - "single-term", - 11194226403360998426, + "verb", + "single-verb", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 16381206560620045048, - 3483212362973442358, + 5584174880054122043, + 13797832223961649041, 18446744073709551615, 18446744073709551615, - 490, - 496, - 490, - 496, - 85, - 86, + 29, + 39, + 29, + 39, + 7, + 8, true, - "images", - "images" + "implements", + "implements" ], [ - "term", - "single-term", - 11194226403360998426, + "verb", + "single-verb", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 329104159214088329, - 8194156825567072634, + 15441160910541486535, + 15610783856184804840, 18446744073709551615, 18446744073709551615, - 520, - 525, - 520, - 525, - 91, + 90, + 92, + 90, 92, + 18, + 19, true, - "tasks", - "tasks" + "is", + "is" ], [ - "term", - "single-term", - 11194226403360998426, + "verb", + "single-verb", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 14650448032998792781, - 16582753633148168921, + 329104159303279946, + 14770817403596920463, 18446744073709551615, 18446744073709551615, - 618, - 626, - 618, - 626, - 107, - 108, + 118, + 123, + 118, + 123, + 22, + 23, true, - "approach", - "approach" + "build", + "build" ], [ "verb", - "compound-verb", - 11194226403360998426, + "single-verb", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 8106398132958436429, - 6971888441348882646, + 5584174880054122043, + 13797832223961674239, 18446744073709551615, 18446744073709551615, - 143, - 150, - 143, - 150, - 25, - 27, + 151, + 161, + 151, + 161, + 29, + 30, true, - "is done", - "is done" + "implements", + "implements" ], [ - "verb", - "compound-verb", - 11194226403360998426, + "conn", + "single-conn", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 15388942590337907789, - 4280950829150221216, + 15441160910541485678, + 15610783856720662618, 18446744073709551615, 18446744073709551615, - 204, - 214, - 204, - 214, - 35, - 37, + 124, + 126, + 124, + 126, + 23, + 24, true, - "are stored", - "are stored" + "on", + "on" ], [ - "verb", - "compound-verb", - 11194226403360998426, + "conn", + "single-conn", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 11296839647862937485, - 4164176593204929584, + 16381206565712212855, + 3629200545768582333, 18446744073709551615, 18446744073709551615, - 334, - 350, - 334, - 350, - 58, - 60, + 131, + 137, + 131, + 137, + 25, + 27, true, - "embedded scanned", - "embedded scanned" + "of the", + "of the" ], [ - "verb", - "compound-verb", - 11194226403360998426, + "conn", + "single-conn", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 13646629520376931899, - 8519320198473003477, + 12178341415895625940, + 13202525365648469119, 18446744073709551615, 18446744073709551615, - 358, - 372, - 358, - 372, - 61, - 63, + 177, + 180, + 177, + 180, + 32, + 33, true, - "requires first", - "requires first" + "for", + "for" ], [ - "verb", - "compound-verb", - 11194226403360998426, + "conn", + "single-conn", + 17419815751432442882, "TEXT", - "#/texts/75", + "#/texts/74", 1.0, - 17982373942613951464, - 3937427824351288252, + 15441160910541486538, + 15610783856135866232, 18446744073709551615, 18446744073709551615, - 554, - 571, - 554, - 571, - 97, - 100, + 297, + 299, + 297, + 299, + 50, + 51, true, - "are only executed", - "are only executed" + "in", + "in" ], [ - "verb", - "compound-verb", + "numval", + "ival", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 12714940176042944879, - 15936925534730964046, + 17767354399704235162, + 766019618037252930, 18446744073709551615, 18446744073709551615, - 588, - 611, - 588, - 611, - 103, - 105, + 1, + 2, + 1, + 2, + 1, + 2, true, - "terminated successfully", - "terminated successfully" + "2", + "2" ], [ - "verb", - "single-verb", + "parenthesis", + "reference", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 6168537129726002426, - 152533883599703009, + 12178341415896395187, + 17029329038495000300, 18446744073709551615, 18446744073709551615, - 32, - 41, - 32, - 41, - 7, - 8, + 0, + 3, + 0, + 3, + 0, + 3, true, - "schedules", - "schedules" + "(2)", + "(2)" ], [ - "verb", - "single-verb", + "parenthesis", + "round brackets", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 8106475907566715134, - 4681167473274342910, + 1812897535394120128, + 3870874385890063204, 18446744073709551615, 18446744073709551615, - 269, - 276, - 269, - 276, - 47, - 48, + 303, + 497, + 303, + 497, + 51, + 87, true, - "perform", - "perform" + "(e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images)", + "(e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images)" ], [ - "verb", - "single-verb", + "expression", + "common", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 8106479143794098783, - 12365847001997489621, + 15441160910541487324, + 5910392785272575830, 18446744073709551615, 18446744073709551615, - 310, - 317, - 310, - 317, + 304, + 309, + 304, + 309, + 52, 53, - 54, true, - "parsing", - "parsing" + "eg", + "e. g." ], [ - "verb", - "single-verb", + "expression", + "word-concatenation", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 8106397496930289884, - 8772117640065310876, + 6187817560337829240, + 10074786117573267255, 18446744073709551615, 18446744073709551615, - 415, - 422, - 415, - 422, - 71, - 72, + 222, + 231, + 222, + 231, + 39, + 40, true, - "extract", - "extract" + "in-memory", + "in-memory" ], [ - "verb", - "single-verb", + "expression", + "word-concatenation", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 8106397496930289884, - 8772117640065258856, + 10210587797782980674, + 269685690895573883, 18446744073709551615, 18446744073709551615, - 461, - 468, - 461, - 468, - 80, - 81, + 653, + 667, + 653, + 667, + 114, + 115, true, - "extract", - "extract" + "fault-tolerant", + "fault-tolerant" ], [ - "verb", - "single-verb", + "expression", + "wtoken-concatenation", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 329104161556625920, - 8062732787574674007, + 15503455610017494293, + 14071574465570134500, 18446744073709551615, 18446744073709551615, - 514, - 519, - 514, - 519, - 90, - 91, + 175, + 190, + 175, + 190, + 31, + 32, true, - "chain", - "chain" + "RabbitMQ^{14}", + "RabbitMQ$^{14}$" ], [ - "verb", - "single-verb", + "expression", + "wtoken-concatenation", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 16381206569317834029, - 14824105829638492947, + 9275871508895795608, + 13145605651607786139, 18446744073709551615, 18446744073709551615, - 627, - 633, - 627, - 633, - 108, - 109, + 243, + 255, + 243, + 255, + 42, + 43, true, - "allows", - "allows" + "Redis^{15}", + "Redis$^{15}$" ], [ - "conn", - "single-conn", + "sentence", + "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 6165459236568015364, - 2354429920637518397, + 4025108859080697854, + 7684932098811697541, 18446744073709551615, 18446744073709551615, - 527, - 536, - 527, - 536, - 93, - 95, + 4, + 122, + 4, + 122, + 3, + 22, true, - "such that", - "such that" + "An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result.", + "An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result." ], [ - "conn", - "single-conn", + "sentence", + "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 8106397727991264470, - 2795094083464015695, + 10578140482875773017, + 5422705622766817180, 18446744073709551615, 18446744073709551615, - 52, - 59, - 52, - 59, - 10, - 12, + 123, + 191, + 123, + 191, + 22, + 33, true, - "for the", - "for the" + "The task scheduling is done with the Message Broker RabbitMQ$^{14}$.", + "The task scheduling is done with the Message Broker RabbitMQ$^{14}$." ], [ - "conn", - "single-conn", + "sentence", + "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 14638857868319795209, - 456352319925648448, + 13063193318125667342, + 9479480819005320541, 18446744073709551615, 18446744073709551615, - 151, - 159, - 151, - 159, - 27, - 29, + 192, + 256, + 192, + 256, + 33, + 44, true, - "with the", - "with the" + "The results are stored in the in-memory data store Redis$^{15}$.", + "The results are stored in the in-memory data store Redis$^{15}$." ], [ - "conn", - "single-conn", + "sentence", + "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 16381206560518651853, - 3478068517407956556, + 1495400135141364806, + 13254541674959843841, 18446744073709551615, 18446744073709551615, - 215, - 221, - 215, - 221, - 37, - 39, + 257, + 612, + 257, + 612, + 44, + 106, true, - "in the", - "in the" + "In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully.", + "In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully." ], [ - "conn", - "single-conn", + "sentence", + "", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 15441160910541480354, - 5910392698548049506, + 1841397930198716309, + 11891118918529386819, 18446744073709551615, 18446744073709551615, - 257, - 259, - 257, - 259, - 44, - 45, + 613, + 702, + 613, + 702, + 106, + 121, true, - "In", - "In" + "This approach allows for a very robust, fault-tolerant service with very little downtime.", + "This approach allows for a very robust, fault-tolerant service with very little downtime." ], [ - "conn", - "single-conn", + "term", + "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 389609625618037948, - 4452313474781359012, + 881931955171775830, + 1850213153382221251, 18446744073709551615, 18446744073709551615, - 329, - 333, - 329, - 333, - 57, - 58, + 7, + 26, + 7, + 26, + 4, + 6, true, - "with", - "with" + "orchestration layer", + "orchestration layer" ], [ - "conn", - "single-conn", + "term", + "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 16381206565712212855, - 14812884387065852796, + 16569031532297427649, + 2288991119528845313, 18446744073709551615, 18446744073709551615, - 383, - 389, - 383, - 389, - 65, - 67, + 88, + 104, + 88, + 104, + 16, + 18, true, - "of the", - "of the" + "execution status", + "execution status" ], [ - "conn", - "single-conn", + "term", + "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 16057368201763467386, - 5837363433449722185, + 12318137194760091867, + 7829156630170179123, 18446744073709551615, 18446744073709551615, - 479, - 489, - 479, - 489, - 83, - 85, + 109, + 121, + 109, + 121, + 19, + 21, true, - "from these", - "from these" + "final result", + "final result" ], [ - "conn", - "single-conn", + "term", + "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 16381206478470086874, - 3195302339494553733, + 6315348039533026141, + 3213548333245122529, 18446744073709551615, 18446744073709551615, - 572, - 578, - 572, - 578, - 100, - 102, + 127, + 142, + 127, + 142, + 23, + 25, true, - "if the", - "if the" + "task scheduling", + "task scheduling" ], [ - "conn", - "single-conn", + "term", + "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 329104161711024499, - 8003597956646423596, + 8295209353697935236, + 9538738240789817737, 18446744073709551615, 18446744073709551615, - 634, - 639, - 634, - 639, - 109, - 111, + 160, + 190, + 160, + 190, + 29, + 32, true, - "for a", - "for a" + "Message Broker RabbitMQ^{14}", + "Message Broker RabbitMQ$^{14}$" ], [ - "conn", - "single-conn", + "term", + "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 389609625618037948, - 4452313474781410633, + 10696383395384997690, + 12295999164377771107, 18446744073709551615, 18446744073709551615, - 676, - 680, - 676, - 680, - 116, - 117, + 222, + 255, + 222, + 255, + 39, + 43, true, - "with", - "with" + "in-memory data store Redis^{15}", + "in-memory data store Redis$^{15}$" ], [ - "conn", - "single-conn", + "term", + "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 15441160910541485865, - 5910392672594327793, + 1969668578613914549, + 8249504550464603474, 18446744073709551615, 18446744073709551615, - 266, - 268, - 266, - 268, - 46, - 47, + 277, + 302, + 277, + 302, + 48, + 51, true, - "to", - "to" + "certain consecutive tasks", + "certain consecutive tasks" ], [ - "conn", - "single-conn", + "term", + "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 15441160910541485865, - 5910392672594322553, + 14650937348812924036, + 6147429119200407258, 18446744073709551615, 18446744073709551615, - 412, - 414, - 412, - 414, - 70, - 71, + 320, + 328, + 320, + 328, + 55, + 57, true, - "to", - "to" + "PDF page", + "PDF page" ], [ - "conn", - "single-conn", + "term", + "single-term", 11194226403360998426, "TEXT", "#/texts/75", 1.0, - 15441160910541485865, - 5910392672594307233, + 15096203362930329687, + 5146287085934249298, 18446744073709551615, 18446744073709551615, - 458, - 460, - 458, - 460, - 79, - 80, + 390, + 411, + 390, + 411, + 67, + 70, true, - "to", - "to" + "programmatic PDF page", + "programmatic PDF page" ], [ - "numval", - "ival", - 9005324696118733701, + "term", + "single-term", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 17767354399704235163, - 6623757277320803060, + 9914165367421220601, + 17615564355797482202, 18446744073709551615, 18446744073709551615, - 1, - 2, - 1, - 2, - 1, - 2, + 446, + 457, + 446, + 457, + 77, + 79, true, - "3", - "3" + "OCR service", + "OCR service" ], [ - "numval", - "ival", - 9005324696118733701, + "term", + "single-term", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 17767354399704235163, - 6623757277320810717, + 10873436773834842694, + 9664366626516883504, 18446744073709551615, 18446744073709551615, - 74, - 75, - 74, - 75, - 13, - 14, + 537, + 553, + 537, + 553, + 95, + 97, true, - "3", - "3" + "subsequent steps", + "subsequent steps" ], [ - "parenthesis", - "reference", - 9005324696118733701, + "term", + "single-term", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 12178341415896394992, - 10915561974328134756, + 16455430351063957858, + 8578040699139249120, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 3, + 653, + 675, + 653, + 675, + 114, + 116, true, - "(3)", - "(3)" + "fault-tolerant service", + "fault-tolerant service" ], [ - "parenthesis", - "round brackets", - 9005324696118733701, + "term", + "single-term", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 9121140803212188746, - 5062856742962380004, + 3478107702264293237, + 8540445734424787667, 18446744073709551615, 18446744073709551615, - 148, - 200, - 148, - 200, - 26, - 38, + 686, + 701, + 686, + 701, + 118, + 120, true, - "(e.g. parsing, training, predictions, assembly, etc)", - "(e.g. parsing, training, predictions, assembly, etc)" + "little downtime", + "little downtime" ], [ - "expression", - "common", - 9005324696118733701, + "term", + "single-term", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 15441160910541487324, - 13616139199714584790, + 329104159214088329, + 8194156825567116069, 18446744073709551615, 18446744073709551615, - 149, - 153, - 149, - 153, - 27, - 28, + 46, + 51, + 46, + 51, + 9, + 10, true, - "eg", - "e.g." + "tasks", + "tasks" ], [ - "expression", - "wtoken-concatenation", - 9005324696118733701, + "term", + "single-term", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 10737622929664928958, - 8651677615518322335, + 990358581043194791, + 1684395431220408370, 18446744073709551615, 18446744073709551615, - 332, - 346, - 332, - 346, - 61, - 62, + 60, + 73, + 60, + 73, + 12, + 13, true, - "library^{16}", - "library$^{16}$" + "microservices", + "microservices" ], [ - "sentence", - "", - 9005324696118733701, + "term", + "single-term", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 2864828402709972468, - 11332229229505390779, + 16381206578935372333, + 8703937210789941258, 18446744073709551615, 18446744073709551615, - 4, - 201, - 4, - 201, - 3, - 39, + 75, + 81, + 75, + 81, + 14, + 15, true, - "A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc).", - "A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc)." + "stores", + "stores" ], [ - "sentence", - "", - 9005324696118733701, + "term", + "single-term", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 15435795280083407866, - 4623434773980135134, + 8106478445190161533, + 10842974023255663515, 18446744073709551615, 18446744073709551615, - 202, - 347, - 202, - 347, - 39, - 63, + 196, + 203, + 196, + 203, + 34, + 35, true, - "In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$.", - "In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$." + "results", + "results" ], [ - "sentence", - "", - 9005324696118733701, + "term", + "single-term", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 11964442506319969125, - 6249550519038030170, + 329104161571401725, + 8064733993734324746, 18446744073709551615, 18446744073709551615, - 348, - 503, - 348, - 503, - 63, - 90, + 260, + 265, + 260, + 265, + 45, + 46, true, - "This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker.", - "This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker." + "order", + "order" ], [ - "sentence", - "", - 9005324696118733701, + "term", + "single-term", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 17777896679674985198, - 11534624099623021000, + 15441160910541487324, + 5910392785272575830, 18446744073709551615, 18446744073709551615, - 504, - 579, - 504, - 579, - 90, - 106, + 304, + 309, + 304, + 309, + 52, + 53, true, - "The workers are not only consumers of tasks, but may also produce new ones.", - "The workers are not only consumers of tasks, but may also produce new ones." + "eg", + "e. g." ], [ "term", "single-term", - 9005324696118733701, + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 5470814617574924291, - 724407251113024, + 16381206560620045048, + 3483212362973437467, 18446744073709551615, 18446744073709551615, - 6, - 19, - 6, - 19, - 4, - 6, + 351, + 357, + 351, + 357, + 60, + 61, true, - "compute layer", - "compute layer" + "images", + "images" ], [ "term", "single-term", - 9005324696118733701, + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 9909278470053653981, - 12829744493301079322, + 8106479143794098783, + 12365847001997486008, 18446744073709551615, 18446744073709551615, - 124, - 147, - 124, - 147, - 24, - 26, + 375, + 382, + 375, + 382, + 64, + 65, true, - "available microservices", - "available microservices" + "parsing", + "parsing" ], [ "term", "single-term", - 9005324696118733701, + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 14058638345038458245, - 12275078423904715575, + 16381206560620045048, + 3483212362973413702, 18446744073709551615, 18446744073709551615, - 149, - 161, - 149, - 161, - 27, - 29, + 427, + 433, + 427, + 433, + 73, + 74, true, - "eg parsing", - "e.g. parsing" + "images", + "images" ], [ "term", "single-term", - 9005324696118733701, + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 4681591099663460584, - 16278920567637920045, + 329104161531686411, + 8019458249201971616, 18446744073709551615, 18446744073709551615, - 304, - 314, - 304, - 314, - 56, - 58, + 473, + 478, + 473, + 478, + 82, + 83, true, - "task queue", - "task queue" + "cells", + "cells" ], [ "term", "single-term", - 9005324696118733701, + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 11198085877607539434, - 9973077180559472567, + 16381206560620045048, + 3483212362973442358, 18446744073709551615, 18446744073709551615, - 325, - 346, - 325, - 346, - 60, - 62, + 490, + 496, + 490, + 496, + 85, + 86, true, - "Celery library^{16}", - "Celery library$^{16}$" + "images", + "images" ], [ "term", "single-term", - 9005324696118733701, + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 4421383392096991748, - 16024629256340455225, + 329104159214088329, + 8194156825567072634, 18446744073709551615, 18446744073709551615, - 388, - 405, - 388, - 405, - 70, - 72, + 520, + 525, + 520, + 525, + 91, + 92, true, - "compute resources", - "compute resources" + "tasks", + "tasks" ], [ "term", "single-term", - 9005324696118733701, + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 8115411903316729668, - 9549090779302063453, + 14650448032998792781, + 16582753633148168921, 18446744073709551615, 18446744073709551615, - 524, - 538, - 524, - 538, - 94, - 96, + 618, + 626, + 618, + 626, + 107, + 108, true, - "only consumers", - "only consumers" + "approach", + "approach" ], [ - "term", - "single-term", - 9005324696118733701, + "verb", + "compound-verb", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 14814151107139696752, - 1255249427891598545, + 8106398132958436429, + 6971888441348882646, 18446744073709551615, 18446744073709551615, - 570, - 578, - 570, - 578, - 103, - 105, + 143, + 150, + 143, + 150, + 25, + 27, true, - "new ones", - "new ones" + "is done", + "is done" ], [ - "term", - "single-term", - 9005324696118733701, + "verb", + "compound-verb", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 990358581043194791, - 13405780939829855380, + 15388942590337907789, + 4280950829150221216, 18446744073709551615, 18446744073709551615, - 40, - 53, - 40, - 53, - 9, - 10, + 204, + 214, + 204, + 214, + 35, + 37, true, - "microservices", - "microservices" + "are stored", + "are stored" ], [ - "term", - "single-term", - 9005324696118733701, + "verb", + "compound-verb", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 8106478708629288965, - 12667054332205292279, + 11296839647862937485, + 4164176593204929584, 18446744073709551615, 18446744073709551615, - 66, - 73, - 66, - 73, - 12, - 13, + 334, + 350, + 334, + 350, + 58, + 60, true, - "section", - "section" + "embedded scanned", + "embedded scanned" ], [ - "term", - "single-term", - 9005324696118733701, + "verb", + "compound-verb", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 8106478059506484182, - 10697147794249982519, + 13646629520376931899, + 8519320198473003477, 18446744073709551615, 18446744073709551615, - 89, - 96, - 89, - 96, - 18, - 19, + 358, + 372, + 358, + 372, + 61, + 63, true, - "workers", - "workers" + "requires first", + "requires first" ], [ - "term", - "single-term", - 9005324696118733701, + "verb", + "compound-verb", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 329104161624475862, - 13848348646967812138, + 17982373942613951464, + 3937427824351288252, 18446744073709551615, 18446744073709551615, - 105, - 110, - 105, - 110, - 21, - 22, + 554, + 571, + 554, + 571, + 97, + 100, true, - "layer", - "layer" + "are only executed", + "are only executed" ], [ - "term", - "single-term", - 9005324696118733701, + "verb", + "compound-verb", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 14634153919632515335, - 7524102672994522753, + 12714940176042944879, + 15936925534730964046, 18446744073709551615, 18446744073709551615, - 163, - 171, - 163, - 171, - 30, - 31, + 588, + 611, + 588, + 611, + 103, + 105, true, - "training", - "training" + "terminated successfully", + "terminated successfully" ], [ - "term", - "single-term", - 9005324696118733701, + "verb", + "single-verb", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 15175963360124346573, - 14989804171272821450, + 6168537129726002426, + 152533883599703009, 18446744073709551615, 18446744073709551615, - 173, - 184, - 173, - 184, 32, - 33, + 41, + 32, + 41, + 7, + 8, true, - "predictions", - "predictions" + "schedules", + "schedules" ], [ - "term", - "single-term", - 9005324696118733701, + "verb", + "single-verb", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 14650448171968968290, - 3790776944315438052, + 8106475907566715134, + 4681167473274342910, 18446744073709551615, 18446744073709551615, - 186, - 194, - 186, - 194, - 34, - 35, + 269, + 276, + 269, + 276, + 47, + 48, true, - "assembly", - "assembly" + "perform", + "perform" ], [ - "term", - "single-term", - 9005324696118733701, + "verb", + "single-verb", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 329104161571401725, - 13849737740624165279, + 8106479143794098783, + 12365847001997489621, 18446744073709551615, 18446744073709551615, - 205, - 210, - 205, - 210, - 40, - 41, + 310, + 317, + 310, + 317, + 53, + 54, true, - "order", - "order" + "parsing", + "parsing" ], [ - "term", - "single-term", - 9005324696118733701, + "verb", + "single-verb", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 16381206521526353544, - 1782906868391855387, + 8106397496930289884, + 8772117640065310876, 18446744073709551615, 18446744073709551615, - 225, - 231, - 225, - 231, - 44, - 45, + 415, + 422, + 415, + 422, + 71, + 72, true, - "regard", - "regard" - ], + "extract", + "extract" + ], [ - "term", - "single-term", - 9005324696118733701, + "verb", + "single-verb", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 6168338487309432467, - 5928008866800453885, + 8106397496930289884, + 8772117640065258856, 18446744073709551615, 18446744073709551615, - 235, - 244, - 235, - 244, - 46, - 47, + 461, + 468, + 461, + 468, + 80, + 81, true, - "resources", - "resources" + "extract", + "extract" ], [ - "term", - "single-term", - 9005324696118733701, + "verb", + "single-verb", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 16682817150367627875, - 17485526265701101, + 329104161556625920, + 8062732787574674007, 18446744073709551615, 18446744073709551615, - 272, - 284, - 272, - 284, - 52, - 53, + 514, + 519, + 514, + 519, + 90, + 91, true, - "microservice", - "microservice" + "chain", + "chain" ], [ - "term", - "single-term", - 9005324696118733701, + "verb", + "single-verb", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 16381206557159905849, - 8006079737033218220, + 16381206569317834029, + 14824105829638492947, 18446744073709551615, 18446744073709551615, - 418, - 424, - 418, - 424, - 75, - 76, + 627, + 633, + 627, + 633, + 108, + 109, true, - "worker", - "worker" + "allows", + "allows" ], [ - "term", - "single-term", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 8106398485449787361, - 5675061092301137187, + 6165459236568015364, + 2354429920637518397, 18446744073709551615, 18446744073709551615, - 461, - 468, - 461, - 468, - 82, - 83, + 527, + 536, + 527, + 536, + 93, + 95, true, - "cluster", - "cluster" + "such that", + "such that" ], [ - "term", - "single-term", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 16381206570348587859, - 15808885045293000288, + 8106397727991264470, + 2795094083464015695, 18446744073709551615, 18446744073709551615, - 496, - 502, - 496, - 502, - 88, - 89, + 52, + 59, + 52, + 59, + 10, + 12, true, - "broker", - "broker" + "for the", + "for the" ], [ - "term", - "single-term", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 8106478059506484182, - 10697147794249956396, + 14638857868319795209, + 456352319925648448, 18446744073709551615, 18446744073709551615, - 508, - 515, - 508, - 515, - 91, - 92, + 151, + 159, + 151, + 159, + 27, + 29, true, - "workers", - "workers" + "with the", + "with the" ], [ - "term", - "single-term", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 329104159214088329, - 14097861728688353508, + 16381206560518651853, + 3478068517407956556, 18446744073709551615, 18446744073709551615, - 542, - 547, - 542, - 547, - 97, - 98, + 215, + 221, + 215, + 221, + 37, + 39, true, - "tasks", - "tasks" + "in the", + "in the" ], [ - "verb", - "compound-verb", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 13859584371553084961, - 6953162611440438890, + 15441160910541480354, + 5910392698548049506, 18446744073709551615, 18446744073709551615, - 249, - 266, - 249, - 266, - 49, - 51, + 257, + 259, + 257, + 259, + 44, + 45, true, - "have encapsulated", - "have encapsulated" + "In", + "In" ], [ - "verb", - "compound-verb", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 288538720869017437, - 4206979805055504968, + 389609625618037948, + 4452313474781359012, 18446744073709551615, 18446744073709551615, - 425, - 453, - 425, - 453, - 76, - 80, + 329, + 333, + 329, + 333, + 57, + 58, true, - "can be spawned automatically", - "can be spawned automatically" + "with", + "with" ], [ - "verb", - "compound-verb", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 8106397797831668975, - 4782227961575271919, + 16381206565712212855, + 14812884387065852796, 18446744073709551615, 18446744073709551615, - 516, - 523, - 516, - 523, - 92, - 94, + 383, + 389, + 383, + 389, + 65, + 67, true, - "are not", - "are not" + "of the", + "of the" ], [ - "verb", - "single-verb", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 5584174880054122043, - 15438871383215853010, + 16057368201763467386, + 5837363433449722185, 18446744073709551615, 18446744073709551615, - 25, - 35, - 25, - 35, - 7, - 8, + 479, + 489, + 479, + 489, + 83, + 85, true, - "implements", - "implements" + "from these", + "from these" ], [ - "verb", - "single-verb", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 14652261813489544447, - 11247279661113316629, + 16381206478470086874, + 3195302339494553733, 18446744073709551615, 18446744073709551615, - 54, - 62, - 54, - 62, - 10, - 11, + 572, + 578, + 572, + 578, + 100, + 102, true, - "detailed", - "detailed" + "if the", + "if the" ], [ - "verb", - "single-verb", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 14652255854767583909, - 12382046103724054031, + 329104161711024499, + 8003597956646423596, 18446744073709551615, 18446744073709551615, + 634, + 639, + 634, + 639, + 109, 111, - 119, - 111, - 119, - 22, - 23, true, - "executes", - "executes" + "for a", + "for a" ], [ - "verb", - "single-verb", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 329104161785194305, - 13850093838201494630, + 389609625618037948, + 4452313474781410633, 18446744073709551615, 18446744073709551615, - 214, - 219, - 214, - 219, - 42, - 43, + 676, + 680, + 676, + 680, + 116, + 117, true, - "scale", - "scale" + "with", + "with" ], [ - "verb", - "single-verb", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 1477344672819384985, - 8283526875963376019, + 15441160910541485865, + 5910392672594327793, 18446744073709551615, 18446744073709551615, - 292, - 303, - 292, - 303, - 55, - 56, + 266, + 268, + 266, + 268, + 46, + 47, true, - "distributed", - "distributed" + "to", + "to" ], [ - "verb", - "single-verb", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 329104159157798023, - 14298779374945162593, + 15441160910541485865, + 5910392672594322553, 18446744073709551615, 18446744073709551615, - 315, - 320, - 315, - 320, - 58, - 59, + 412, + 414, + 412, + 414, + 70, + 71, true, - "using", - "using" + "to", + "to" ], [ - "verb", - "single-verb", - 9005324696118733701, + "conn", + "single-conn", + 11194226403360998426, "TEXT", - "#/texts/76", + "#/texts/75", 1.0, - 16381206569317834029, - 15096333879001227968, + 15441160910541485865, + 5910392672594307233, 18446744073709551615, 18446744073709551615, - 353, - 359, - 353, - 359, - 64, - 65, + 458, + 460, + 458, + 460, + 79, + 80, true, - "allows", - "allows" + "to", + "to" ], [ - "verb", - "single-verb", + "numval", + "ival", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 329104161785194305, - 13850093838201414935, + 17767354399704235163, + 6623757277320803060, 18446744073709551615, 18446744073709551615, - 378, - 383, - 378, - 383, - 68, - 69, + 1, + 2, + 1, + 2, + 1, + 2, true, - "scale", - "scale" + "3", + "3" ], [ - "verb", - "single-verb", + "numval", + "ival", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 14634109580260092070, - 816619907238816136, + 17767354399704235163, + 6623757277320810717, 18446744073709551615, 18446744073709551615, - 473, - 481, - 473, - 481, - 84, - 85, + 74, + 75, + 74, + 75, + 13, + 14, true, - "register", - "register" + "3", + "3" ], [ - "verb", - "single-verb", + "parenthesis", + "reference", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 8106476000256008955, - 10662702206647144879, + 12178341415896394992, + 10915561974328134756, 18446744073709551615, 18446744073709551615, - 562, - 569, - 562, - 569, - 102, - 103, + 0, + 3, + 0, + 3, + 0, + 3, true, - "produce", - "produce" + "(3)", + "(3)" ], [ - "conn", - "single-conn", + "parenthesis", + "round brackets", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 15441160910541486538, - 13616139232389586146, + 9121140803212188746, + 5062856742962380004, 18446744073709551615, 18446744073709551615, - 63, - 65, - 63, - 65, - 11, - 12, + 148, + 200, + 148, + 200, + 26, + 38, true, - "in", - "in" + "(e.g. parsing, training, predictions, assembly, etc)", + "(e.g. parsing, training, predictions, assembly, etc)" ], [ - "conn", - "single-conn", + "expression", + "common", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 3612640462697257855, - 15059124849281620447, + 15441160910541487324, + 13616139199714584790, 18446744073709551615, 18446744073709551615, - 77, - 88, - 77, - 88, - 15, - 18, + 149, + 153, + 149, + 153, + 27, + 28, true, - "Each of the", - "Each of the" + "eg", + "e.g." ], [ - "conn", - "single-conn", + "expression", + "wtoken-concatenation", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 8106398107541152403, - 3040495791226350629, + 10737622929664928958, + 8651677615518322335, 18446744073709551615, 18446744073709551615, - 97, - 104, - 97, - 104, - 19, - 21, + 332, + 346, + 332, + 346, + 61, + 62, true, - "in this", - "in this" + "library^{16}", + "library$^{16}$" ], [ - "conn", - "single-conn", + "sentence", + "", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 15441160910541480354, - 13616133383081621648, + 2864828402709972468, + 11332229229505390779, 18446744073709551615, 18446744073709551615, - 202, - 204, - 202, - 204, + 4, + 201, + 4, + 201, + 3, 39, - 40, true, - "In", - "In" + "A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc).", + "A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc)." ], [ - "conn", - "single-conn", + "sentence", + "", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 389609625618037948, - 638487817302062508, + 15435795280083407866, + 4623434773980135134, 18446744073709551615, 18446744073709551615, - 220, - 224, - 220, - 224, - 43, - 44, + 202, + 347, + 202, + 347, + 39, + 63, true, - "with", - "with" + "In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$.", + "In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$." ], [ - "conn", - "single-conn", + "sentence", + "", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 16381206560517276114, - 15829604646240034102, + 11964442506319969125, + 6249550519038030170, 18446744073709551615, 18446744073709551615, - 285, - 291, - 285, - 291, - 53, - 55, + 348, + 503, + 348, + 503, + 63, + 90, true, - "into a", - "into a" + "This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker.", + "This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker." ], [ - "conn", - "single-conn", + "sentence", + "", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 14091433066300748251, - 5574629252352928036, + 17777896679674985198, + 11534624099623021000, 18446744073709551615, 18446744073709551615, - 407, - 417, - 407, - 417, - 73, - 75, + 504, + 579, + 504, + 579, + 90, + 106, true, - "since each", - "since each" + "The workers are not only consumers of tasks, but may also produce new ones.", + "The workers are not only consumers of tasks, but may also produce new ones." ], [ - "conn", - "single-conn", + "term", + "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 16381206566339127348, - 15152243859443904216, + 5470814617574924291, + 724407251113024, 18446744073709551615, 18446744073709551615, - 454, - 460, - 454, - 460, - 80, - 82, + 6, + 19, + 6, + 19, + 4, + 6, true, - "on the", - "on the" + "compute layer", + "compute layer" ], [ - "conn", - "single-conn", + "term", + "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 15441160910541485670, - 13616139037782441930, + 9909278470053653981, + 12829744493301079322, 18446744073709551615, 18446744073709551615, - 539, - 541, - 539, - 541, - 96, - 97, + 124, + 147, + 124, + 147, + 24, + 26, true, - "of", - "of" + "available microservices", + "available microservices" ], [ - "conn", - "single-conn", + "term", + "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 15441160910541485865, - 13616139237691872940, + 14058638345038458245, + 12275078423904715575, 18446744073709551615, 18446744073709551615, - 211, - 213, - 211, - 213, - 41, - 42, + 149, + 161, + 149, + 161, + 27, + 29, true, - "to", - "to" + "eg parsing", + "e.g. parsing" ], [ - "conn", - "single-conn", + "term", + "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 15441160910541485865, - 13616139237691874671, + 4681591099663460584, + 16278920567637920045, 18446744073709551615, 18446744073709551615, - 232, - 234, - 232, - 234, - 45, - 46, + 304, + 314, + 304, + 314, + 56, + 58, true, - "to", - "to" + "task queue", + "task queue" ], [ - "conn", - "single-conn", + "term", + "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 15441160910541485865, - 13616139237691883462, + 11198085877607539434, + 9973077180559472567, 18446744073709551615, 18446744073709551615, - 363, - 365, - 363, - 365, - 66, - 67, + 325, + 346, + 325, + 346, + 60, + 62, true, - "to", - "to" + "Celery library^{16}", + "Celery library$^{16}$" ], [ - "conn", - "single-conn", + "term", + "single-term", 9005324696118733701, "TEXT", "#/texts/76", 1.0, - 16381206519425733256, - 1744518844831028097, + 4421383392096991748, + 16024629256340455225, 18446744073709551615, 18446744073709551615, - 489, - 495, - 489, - 495, - 86, - 88, + 388, + 405, + 388, + 405, + 70, + 72, true, - "to the", - "to the" + "compute resources", + "compute resources" ], [ - "parenthesis", - "round brackets", - 8082547756621048511, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/77", + "#/texts/76", 1.0, - 15451245949012109980, - 860212891132498684, + 8115411903316729668, + 9549090779302063453, 18446744073709551615, 18446744073709551615, - 105, - 118, - 105, - 118, - 16, - 20, + 524, + 538, + 524, + 538, + 94, + 96, true, - "(or document)", - "(or document)" + "only consumers", + "only consumers" ], [ - "expression", - "word-concatenation", - 8082547756621048511, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/77", + "#/texts/76", 1.0, - 5470814635586025487, - 3023904040799855893, + 14814151107139696752, + 1255249427891598545, 18446744073709551615, 18446744073709551615, - 68, - 81, - 68, - 81, - 11, - 12, + 570, + 578, + 570, + 578, + 103, + 105, true, - "compute-heavy", - "compute-heavy" + "new ones", + "new ones" ], [ - "sentence", - "", - 8082547756621048511, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/77", + "#/texts/76", 1.0, - 8943027620035512136, - 10854655135118326590, + 990358581043194791, + 13405780939829855380, 18446744073709551615, 18446744073709551615, - 31, - 125, - 31, - 125, - 6, - 22, + 40, + 53, + 40, + 53, + 9, + 10, true, - "Whenever possible we parallelise the compute-heavy operations at the page (or document) level.", - "Whenever possible we parallelise the compute-heavy operations at the page (or document) level." + "microservices", + "microservices" ], [ "term", "single-term", - 8082547756621048511, + 9005324696118733701, "TEXT", - "#/texts/77", + "#/texts/76", 1.0, - 13988986336887005746, - 1446674937315880970, + 8106478708629288965, + 12667054332205292279, 18446744073709551615, 18446744073709551615, - 68, - 92, - 68, - 92, - 11, + 66, + 73, + 66, + 73, + 12, 13, true, - "compute-heavy operations", - "compute-heavy operations" + "section", + "section" ], [ "term", "single-term", - 8082547756621048511, + 9005324696118733701, "TEXT", - "#/texts/77", + "#/texts/76", 1.0, - 389609625632301461, - 15632188389001375550, + 8106478059506484182, + 10697147794249982519, 18446744073709551615, 18446744073709551615, - 100, - 104, - 100, - 104, - 15, - 16, + 89, + 96, + 89, + 96, + 18, + 19, true, - "page", - "page" + "workers", + "workers" ], [ "term", "single-term", - 8082547756621048511, + 9005324696118733701, "TEXT", - "#/texts/77", + "#/texts/76", 1.0, - 14650401089286948001, - 1809325515137941529, + 329104161624475862, + 13848348646967812138, 18446744073709551615, 18446744073709551615, - 109, - 117, - 109, - 117, - 18, - 19, + 105, + 110, + 105, + 110, + 21, + 22, true, - "document", - "document" + "layer", + "layer" ], [ "term", "single-term", - 8082547756621048511, + 9005324696118733701, "TEXT", - "#/texts/77", + "#/texts/76", 1.0, - 329104161602483077, - 5312276037637913177, + 14634153919632515335, + 7524102672994522753, 18446744073709551615, 18446744073709551615, - 119, - 124, - 119, - 124, - 20, - 21, + 163, + 171, + 163, + 171, + 30, + 31, true, - "level", - "level" + "training", + "training" ], [ - "verb", - "single-verb", - 8082547756621048511, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/77", + "#/texts/76", 1.0, - 18223316012831076072, - 4378757623349607195, + 15175963360124346573, + 14989804171272821450, 18446744073709551615, 18446744073709551615, - 52, - 63, - 52, - 63, - 9, - 10, + 173, + 184, + 173, + 184, + 32, + 33, true, - "parallelise", - "parallelise" + "predictions", + "predictions" ], [ - "conn", - "single-conn", - 8082547756621048511, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/77", + "#/texts/76", 1.0, - 16381206568372064271, - 9744783902447945030, + 14650448171968968290, + 3790776944315438052, 18446744073709551615, 18446744073709551615, - 93, - 99, - 93, - 99, - 13, - 15, + 186, + 194, + 186, + 194, + 34, + 35, true, - "at the", - "at the" + "assembly", + "assembly" ], [ - "numval", - "ival", - 7791113385466815951, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 17767354399704235156, - 7397297711065841756, + 329104161571401725, + 13849737740624165279, 18446744073709551615, 18446744073709551615, - 1, - 2, - 1, - 2, - 1, - 2, + 205, + 210, + 205, + 210, + 40, + 41, true, - "4", - "4" + "order", + "order" ], [ - "parenthesis", - "reference", - 7791113385466815951, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 12178341415896395057, - 17882276138977820280, + 16381206521526353544, + 1782906868391855387, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 3, + 225, + 231, + 225, + 231, + 44, + 45, true, - "(4)", - "(4)" + "regard", + "regard" ], [ - "parenthesis", - "round brackets", - 7791113385466815951, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 7182556421351177654, - 17408013221930808816, + 6168338487309432467, + 5928008866800453885, 18446744073709551615, 18446744073709551615, - 207, - 256, - 207, - 256, - 38, - 50, + 235, + 244, + 235, + 244, + 46, + 47, true, - "(e. g. the parsed PDF pages, trained models, etc)", - "(e. g. the parsed PDF pages, trained models, etc)" + "resources", + "resources" ], [ - "parenthesis", - "round brackets", - 7791113385466815951, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 8366781084765568282, - 12073061348432203105, + 16682817150367627875, + 17485526265701101, 18446744073709551615, 18446744073709551615, - 541, - 576, - 541, - 576, - 103, - 112, + 272, + 284, + 272, + 284, + 52, + 53, true, - "(in our case we use MongoDB$^{17}$)", - "(in our case we use MongoDB$^{17}$)" + "microservice", + "microservice" ], [ - "expression", - "common", - 7791113385466815951, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 15441160910541487324, - 14307783780196245817, + 16381206557159905849, + 8006079737033218220, 18446744073709551615, 18446744073709551615, - 208, - 213, - 208, - 213, - 39, - 40, + 418, + 424, + 418, + 424, + 75, + 76, true, - "eg", - "e. g." + "worker", + "worker" ], [ - "expression", - "word-concatenation", - 7791113385466815951, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 5910674217167684246, - 3596748896249425734, + 8106398485449787361, + 5675061092301137187, 18446744073709551615, 18446744073709551615, - 147, - 159, - 147, - 159, - 30, - 31, + 461, + 468, + 461, + 468, + 82, + 83, true, - "object-store", - "object-store" + "cluster", + "cluster" ], [ - "expression", - "word-concatenation", - 7791113385466815951, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 5910674217167684246, - 3596748896249446260, + 16381206570348587859, + 15808885045293000288, 18446744073709551615, 18446744073709551615, - 333, - 345, - 333, - 345, - 64, - 65, + 496, + 502, + 496, + 502, + 88, + 89, true, - "object-store", - "object-store" + "broker", + "broker" ], [ - "expression", - "word-concatenation", - 7791113385466815951, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 5910674217167684246, - 3596748896249438801, + 8106478059506484182, + 10697147794249956396, 18446744073709551615, 18446744073709551615, - 351, - 363, - 351, - 363, - 67, - 68, + 508, + 515, + 508, + 515, + 91, + 92, true, - "object-store", - "object-store" + "workers", + "workers" ], [ - "expression", - "word-concatenation", - 7791113385466815951, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 7393395602818997382, - 2445570796174034956, + 329104159214088329, + 14097861728688353508, 18446744073709551615, 18446744073709551615, - 620, - 632, - 620, - 632, - 122, - 123, + 542, + 547, + 542, + 547, + 97, + 98, true, - "access-layer", - "access-layer" + "tasks", + "tasks" ], [ - "expression", - "latex", - 7791113385466815951, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 329104159258632281, - 15385845941904838235, + 389609625695123443, + 614461596643170667, 18446744073709551615, 18446744073709551615, - 568, - 575, - 568, - 575, + 592, + 596, + 592, + 596, + 109, 110, - 111, true, - "^{17}", - "$^{17}$" + "case", + "case" ], [ - "sentence", - "", - 7791113385466815951, + "term", + "single-term", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 17454558322938465001, - 17890104706267422072, + 14634109266514673153, + 16089458740036970126, 18446744073709551615, 18446744073709551615, - 4, - 346, - 4, - 346, - 3, - 66, + 605, + 613, + 605, + 613, + 112, + 113, true, - "A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store.", - "A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store." + "requests", + "requests" ], [ - "sentence", - "", - 7791113385466815951, + "verb", + "compound-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 5622932258824479139, - 10516707730337709393, + 13859584371553084961, + 6953162611440438890, 18446744073709551615, 18446744073709551615, - 347, - 451, - 347, - 451, - 66, - 84, + 249, + 266, + 249, + 266, + 49, + 51, true, - "The object-store allows us to easily scale the storage with regard to the number of processed documents.", - "The object-store allows us to easily scale the storage with regard to the number of processed documents." + "have encapsulated", + "have encapsulated" ], [ - "sentence", - "", - 7791113385466815951, + "verb", + "compound-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 8816129983583997199, - 15303863633151315650, + 288538720869017437, + 4206979805055504968, 18446744073709551615, 18446744073709551615, - 452, - 633, - 452, - 633, - 84, - 124, + 425, + 453, + 425, + 453, + 76, + 80, true, - "However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", - "However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer." + "can be spawned automatically", + "can be spawned automatically" ], [ - "term", - "enum-term-mark-2", - 7791113385466815951, + "verb", + "compound-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 9482072146671435678, - 4852192659277363715, + 8106397797831668975, + 4782227961575271919, 18446744073709551615, 18446744073709551615, - 598, - 613, - 598, - 613, - 117, - 120, + 516, + 523, + 516, + 523, + 92, + 94, true, - "storage and act", - "storage and act" + "are not", + "are not" ], [ - "term", - "single-term", - 7791113385466815951, + "verb", + "single-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 13382702060117711634, - 866988340100178668, + 5584174880054122043, + 15438871383215853010, 18446744073709551615, 18446744073709551615, - 6, - 19, - 6, - 19, - 4, - 6, + 25, + 35, + 25, + 35, + 7, + 8, true, - "storage layer", - "storage layer" + "implements", + "implements" ], [ - "term", - "single-term", - 7791113385466815951, + "verb", + "single-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 13382702060117711634, - 866988340100185003, + 14652261813489544447, + 11247279661113316629, 18446744073709551615, 18446744073709551615, - 97, - 110, - 97, - 110, - 20, - 22, + 54, + 62, + 54, + 62, + 10, + 11, true, - "storage layer", - "storage layer" + "detailed", + "detailed" ], [ - "term", - "single-term", - 7791113385466815951, + "verb", + "single-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 2903324788977241891, - 13900858649375924507, + 14652255854767583909, + 12382046103724054031, 18446744073709551615, 18446744073709551615, - 225, - 234, - 225, - 234, - 42, - 44, + 111, + 119, + 111, + 119, + 22, + 23, true, - "PDF pages", - "PDF pages" + "executes", + "executes" ], [ - "term", - "single-term", - 7791113385466815951, + "verb", + "single-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 9804322186740216471, - 16267225850960798578, + 329104161785194305, + 13850093838201494630, 18446744073709551615, 18446744073709551615, - 236, - 250, - 236, - 250, - 45, - 47, + 214, + 219, + 214, + 219, + 42, + 43, true, - "trained models", - "trained models" + "scale", + "scale" ], [ - "term", - "single-term", - 7791113385466815951, + "verb", + "single-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 1944794866286482065, - 4090230587991277542, + 1477344672819384985, + 8283526875963376019, 18446744073709551615, 18446744073709551615, - 263, - 287, - 263, - 287, - 52, + 292, + 303, + 292, + 303, 55, + 56, true, - "queryable NoSQL database", - "queryable NoSQL database" + "distributed", + "distributed" ], [ - "term", - "single-term", - 7791113385466815951, + "verb", + "single-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 16772942504422841315, - 12153285632385192646, + 329104159157798023, + 14298779374945162593, 18446744073709551615, 18446744073709551615, - 526, - 540, - 526, - 540, - 101, - 103, + 315, + 320, + 315, + 320, + 58, + 59, true, - "NoSQL database", - "NoSQL database" + "using", + "using" ], [ - "term", - "single-term", - 7791113385466815951, + "verb", + "single-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 16381206578935372333, - 11959326053889459522, + 16381206569317834029, + 15096333879001227968, 18446744073709551615, 18446744073709551615, - 25, - 31, - 25, - 31, - 7, - 8, + 353, + 359, + 353, + 359, + 64, + 65, true, - "stores", - "stores" + "allows", + "allows" ], [ - "term", - "single-term", - 7791113385466815951, + "verb", + "single-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 6167933651658664291, - 16849655876428761988, + 329104161785194305, + 13850093838201414935, 18446744073709551615, 18446744073709551615, - 36, - 45, - 36, - 45, - 9, - 10, + 378, + 383, + 378, + 383, + 68, + 69, true, - "documents", - "documents" + "scale", + "scale" ], [ - "term", - "single-term", - 7791113385466815951, + "verb", + "single-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 8106478445190161533, - 10724655109163266628, + 14634109580260092070, + 816619907238816136, 18446744073709551615, 18446744073709551615, - 61, - 68, - 61, - 68, - 14, - 15, + 473, + 481, + 473, + 481, + 84, + 85, true, - "results", - "results" + "register", + "register" ], [ - "term", - "single-term", - 7791113385466815951, + "verb", + "single-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 990358581043194791, - 9157342138188045037, + 8106476000256008955, + 10662702206647144879, 18446744073709551615, 18446744073709551615, - 78, - 91, - 78, - 91, - 17, - 18, + 562, + 569, + 562, + 569, + 102, + 103, true, - "microservices", - "microservices" + "produce", + "produce" ], [ - "term", - "single-term", - 7791113385466815951, + "verb", + "single-verb", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 14635102416861801722, - 13921851080814198183, + 15441160910541486535, + 13616139232205762064, 18446744073709551615, 18446744073709551615, - 134, - 142, - 134, - 142, - 27, - 28, + 585, + 587, + 585, + 587, + 107, + 108, true, - "services", - "services" + "is", + "is" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 5910674217167684246, - 3596748896249425734, + 15441160910541486538, + 13616139232389586146, 18446744073709551615, 18446744073709551615, - 147, - 159, - 147, - 159, - 30, - 31, + 63, + 65, + 63, + 65, + 11, + 12, true, - "object-store", - "object-store" + "in", + "in" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 16381206578935372333, - 11959326053892662509, + 3612640462697257855, + 15059124849281620447, 18446744073709551615, 18446744073709551615, - 165, - 171, - 165, - 171, - 32, - 33, + 77, + 88, + 77, + 88, + 15, + 18, true, - "stores", - "stores" + "Each of the", + "Each of the" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 6167933651658664291, - 16849655876428781193, + 8106398107541152403, + 3040495791226350629, 18446744073709551615, 18446744073709551615, - 176, - 185, - 176, - 185, - 34, - 35, + 97, + 104, + 97, + 104, + 19, + 21, true, - "documents", - "documents" + "in this", + "in this" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 16381206578939110576, - 11959779027228109556, + 15441160910541480354, + 13616133383081621648, 18446744073709551615, 18446744073709551615, - 200, - 206, - 200, - 206, - 37, - 38, + 202, + 204, + 202, + 204, + 39, + 40, true, - "stages", - "stages" + "In", + "In" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 14638347573453462708, - 11572179125786979791, + 389609625618037948, + 638487817302062508, 18446744073709551615, 18446744073709551615, - 304, - 312, - 304, - 312, - 58, - 59, + 220, + 224, + 220, + 224, + 43, + 44, true, - "metadata", - "metadata" + "with", + "with" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 389609625697824016, - 10086809110828577778, + 16381206560517276114, + 15829604646240034102, 18446744073709551615, 18446744073709551615, - 321, - 325, - 321, - 325, - 61, - 62, + 285, + 291, + 285, + 291, + 53, + 55, true, - "file", - "file" + "into a", + "into a" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 5910674217167684246, - 3596748896249446260, + 14091433066300748251, + 5574629252352928036, 18446744073709551615, 18446744073709551615, - 333, - 345, - 333, - 345, - 64, - 65, + 407, + 417, + 407, + 417, + 73, + 75, true, - "object-store", - "object-store" + "since each", + "since each" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 5910674217167684246, - 3596748896249438801, + 16381206566339127348, + 15152243859443904216, 18446744073709551615, 18446744073709551615, - 351, - 363, - 351, - 363, - 67, - 68, + 454, + 460, + 454, + 460, + 80, + 82, true, - "object-store", - "object-store" + "on the", + "on the" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 8106478700889254291, - 5556909971365278069, + 15441160910541485670, + 13616139037782441930, 18446744073709551615, 18446744073709551615, - 394, - 401, - 394, - 401, - 74, - 75, + 539, + 541, + 539, + 541, + 96, + 97, true, - "storage", - "storage" + "of", + "of" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 16381206521526353544, - 15388633276980566672, + 8106397727991264470, + 7248749092306664141, 18446744073709551615, 18446744073709551615, - 407, - 413, - 407, - 413, - 76, - 77, + 597, + 604, + 597, + 604, + 110, + 112, true, - "regard", - "regard" + "for the", + "for the" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 16381206574973295053, - 13874751204703215888, + 15441160910541485865, + 13616139237691872940, 18446744073709551615, 18446744073709551615, - 421, - 427, - 421, - 427, - 79, - 80, + 211, + 213, + 211, + 213, + 41, + 42, true, - "number", - "number" + "to", + "to" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 6167933651658664291, - 16849655876428863113, + 15441160910541485865, + 13616139237691874671, 18446744073709551615, 18446744073709551615, - 441, - 450, - 441, - 450, - 82, - 83, + 232, + 234, + 232, + 234, + 45, + 46, true, - "documents", - "documents" + "to", + "to" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 389609625695123443, - 10086936390401945955, + 15441160910541485865, + 13616139237691883462, 18446744073709551615, 18446744073709551615, - 549, - 553, - 549, - 553, - 106, - 107, + 363, + 365, + 363, + 365, + 66, + 67, true, - "case", - "case" + "to", + "to" ], [ - "term", - "single-term", - 7791113385466815951, + "conn", + "single-conn", + 9005324696118733701, "TEXT", - "#/texts/78", + "#/texts/76", 1.0, - 8106471292843117687, - 16633247643408803384, + 16381206519425733256, + 1744518844831028097, 18446744073709551615, 18446744073709551615, - 561, - 568, - 561, - 568, - 109, - 110, + 489, + 495, + 489, + 495, + 86, + 88, true, - "MongoDB", - "MongoDB" + "to the", + "to the" ], [ - "term", - "single-term", - 7791113385466815951, + "parenthesis", + "round brackets", + 8082547756621048511, "TEXT", - "#/texts/78", + "#/texts/77", 1.0, - 12178341415895527965, - 17882005209631256296, + 15451245949012109980, + 860212891132498684, 18446744073709551615, 18446744073709551615, - 580, - 583, - 580, - 583, - 113, - 114, + 105, + 118, + 105, + 118, + 16, + 20, true, - "top", - "top" + "(or document)", + "(or document)" ], [ - "term", - "single-term", - 7791113385466815951, + "expression", + "word-concatenation", + 8082547756621048511, "TEXT", - "#/texts/78", + "#/texts/77", 1.0, - 8106478700889254291, - 5556909971365264072, + 5470814635586025487, + 3023904040799855893, 18446744073709551615, 18446744073709551615, - 598, - 605, - 598, - 605, - 117, - 118, + 68, + 81, + 68, + 81, + 11, + 12, true, - "storage", - "storage" + "compute-heavy", + "compute-heavy" + ], + [ + "sentence", + "", + 8082547756621048511, + "TEXT", + "#/texts/77", + 1.0, + 8943027620035512136, + 10854655135118326590, + 18446744073709551615, + 18446744073709551615, + 31, + 125, + 31, + 125, + 6, + 22, + true, + "Whenever possible we parallelise the compute-heavy operations at the page (or document) level.", + "Whenever possible we parallelise the compute-heavy operations at the page (or document) level." ], [ "term", "single-term", - 7791113385466815951, + 8082547756621048511, "TEXT", - "#/texts/78", + "#/texts/77", 1.0, - 12178341415895571467, - 17882001345722866996, + 7803735128811820247, + 6440046018987122155, 18446744073709551615, 18446744073709551615, - 610, - 613, - 610, - 613, - 119, - 120, + 17, + 29, + 17, + 29, + 3, + 5, true, - "act", - "act" + "whole corpus", + "whole corpus" ], [ "term", "single-term", - 7791113385466815951, + 8082547756621048511, "TEXT", - "#/texts/78", + "#/texts/77", 1.0, - 7393395602818997382, - 2445570796174034956, + 13988986336887005746, + 1446674937315880970, 18446744073709551615, 18446744073709551615, - 620, - 632, - 620, - 632, - 122, - 123, + 68, + 92, + 68, + 92, + 11, + 13, true, - "access-layer", - "access-layer" + "compute-heavy operations", + "compute-heavy operations" ], [ - "verb", - "compound-verb", - 7791113385466815951, + "term", + "single-term", + 8082547756621048511, "TEXT", - "#/texts/78", + "#/texts/77", 1.0, - 9001167546411496730, - 5107946166734090998, + 389609625632301461, + 15632188389001375550, 18446744073709551615, 18446744073709551615, - 111, - 122, - 111, - 122, - 22, - 24, + 100, + 104, + 100, + 104, + 15, + 16, true, - "is composed", - "is composed" + "page", + "page" ], [ - "verb", - "compound-verb", - 7791113385466815951, + "term", + "single-term", + 8082547756621048511, "TEXT", - "#/texts/78", + "#/texts/77", 1.0, - 4557608131655756693, - 10894415445021592676, + 14650401089286948001, + 1809325515137941529, 18446744073709551615, 18446744073709551615, - 464, - 502, - 464, - 502, - 87, - 94, + 109, + 117, + 109, + 117, + 18, + 19, true, - "is not build to be queried efficiently", - "is not build to be queried efficiently" + "document", + "document" ], [ - "verb", - "single-verb", - 7791113385466815951, + "term", + "single-term", + 8082547756621048511, "TEXT", - "#/texts/78", + "#/texts/77", 1.0, - 6171728176299542016, - 16532079825940175611, + 329104161602483077, + 5312276037637913177, 18446744073709551615, 18446744073709551615, - 190, - 199, - 190, - 199, - 36, - 37, + 119, + 124, + 119, + 124, + 20, + 21, true, - "processed", - "processed" + "level", + "level" ], [ "verb", "single-verb", - 7791113385466815951, + 8082547756621048511, "TEXT", - "#/texts/78", + "#/texts/77", 1.0, - 16381206517379850387, - 15480389052081010479, + 6167836358624303500, + 7818132889864191879, 18446744073709551615, 18446744073709551615, - 218, - 224, - 218, - 224, - 41, - 42, + 0, + 9, + 0, + 9, + 0, + 1, true, - "parsed", - "parsed" + "operating", + "operating" ], [ "verb", "single-verb", - 7791113385466815951, + 8082547756621048511, "TEXT", - "#/texts/78", + "#/texts/77", 1.0, - 16381206578935372333, - 11959326053892654989, + 18223316012831076072, + 4378757623349607195, 18446744073709551615, 18446744073709551615, - 293, - 299, - 293, - 299, - 56, - 57, + 52, + 63, + 52, + 63, + 9, + 10, true, - "stores", - "stores" + "parallelise", + "parallelise" ], [ - "verb", - "single-verb", - 7791113385466815951, + "conn", + "single-conn", + 8082547756621048511, "TEXT", - "#/texts/78", + "#/texts/77", 1.0, - 16381206569317834029, - 11936595447040454128, + 16381206566339127348, + 1222781431356980611, 18446744073709551615, 18446744073709551615, - 364, - 370, - 364, - 370, - 68, - 69, + 10, + 16, + 10, + 16, + 1, + 3, true, - "allows", - "allows" + "on the", + "on the" ], [ - "verb", - "single-verb", - 7791113385466815951, + "conn", + "single-conn", + 8082547756621048511, "TEXT", - "#/texts/78", + "#/texts/77", 1.0, - 329104161785194305, - 774090374362380612, + 16381206568372064271, + 9744783902447945030, 18446744073709551615, 18446744073709551615, - 384, - 389, - 384, - 389, - 72, - 73, + 93, + 99, + 93, + 99, + 13, + 15, true, - "scale", - "scale" + "at the", + "at the" ], [ - "verb", - "single-verb", + "numval", + "ival", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 6171728176299542016, - 16532079825940159645, + 17767354399704235156, + 7397297711065841756, 18446744073709551615, 18446744073709551615, - 431, - 440, - 431, - 440, - 81, - 82, + 1, + 2, + 1, + 2, + 1, + 2, true, - "processed", - "processed" + "4", + "4" ], [ - "verb", - "single-verb", + "parenthesis", + "reference", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 15441160910541486535, - 14307783832258935505, + 12178341415896395057, + 17882276138977820280, 18446744073709551615, 18446744073709551615, - 510, - 512, - 510, - 512, - 96, - 97, + 0, + 3, + 0, + 3, + 0, + 3, true, - "is", - "is" + "(4)", + "(4)" ], [ - "verb", - "single-verb", + "parenthesis", + "round brackets", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 12178341415895640485, - 17882295449136083937, + 7182556421351177654, + 17408013221930808816, 18446744073709551615, 18446744073709551615, - 520, - 523, - 520, - 523, - 99, - 100, + 207, + 256, + 207, + 256, + 38, + 50, true, - "put", - "put" + "(e. g. the parsed PDF pages, trained models, etc)", + "(e. g. the parsed PDF pages, trained models, etc)" ], [ - "verb", - "single-verb", + "parenthesis", + "round brackets", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 12178341415895516060, - 17882004701528519561, + 8366781084765568282, + 12073061348432203105, 18446744073709551615, 18446744073709551615, - 557, - 560, - 557, - 560, - 108, - 109, + 541, + 576, + 541, + 576, + 103, + 112, true, - "use", - "use" + "(in our case we use MongoDB$^{17}$)", + "(in our case we use MongoDB$^{17}$)" ], [ - "verb", - "single-verb", + "expression", + "common", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 16381206594265787492, - 12314552731610003625, + 15441160910541487324, + 14307783780196245817, 18446744073709551615, 18446744073709551615, - 587, - 593, - 587, - 593, - 115, - 116, + 208, + 213, + 208, + 213, + 39, + 40, true, - "manage", - "manage" + "eg", + "e. g." ], [ - "conn", - "single-conn", + "expression", + "word-concatenation", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 389609625631229034, - 10076688135462708121, + 5910674217167684246, + 3596748896249425734, 18446744073709551615, 18446744073709551615, - 20, - 24, - 20, - 24, - 6, - 7, + 147, + 159, + 147, + 159, + 30, + 31, true, - "that", - "that" + "object-store", + "object-store" ], [ - "conn", - "single-conn", + "expression", + "word-concatenation", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 16381206568455155979, - 11882547791157225115, + 5910674217167684246, + 3596748896249446260, 18446744073709551615, 18446744073709551615, - 54, - 60, - 54, - 60, - 12, - 14, + 333, + 345, + 333, + 345, + 64, + 65, true, - "as the", - "as the" + "object-store", + "object-store" ], [ - "conn", - "single-conn", + "expression", + "word-concatenation", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 14637917359887717745, - 15358780278905995948, + 5910674217167684246, + 3596748896249438801, 18446744073709551615, 18446744073709551615, - 69, - 77, - 69, - 77, - 15, - 17, + 351, + 363, + 351, + 363, + 67, + 68, true, - "from the", - "from the" + "object-store", + "object-store" ], [ - "conn", - "single-conn", + "expression", + "word-concatenation", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 12178341415895623120, - 17882259379782471845, + 7393395602818997382, + 2445570796174034956, 18446744073709551615, 18446744073709551615, + 620, + 632, + 620, + 632, + 122, 123, - 126, - 123, - 126, - 24, - 25, true, - "out", - "out" + "access-layer", + "access-layer" ], [ - "conn", - "single-conn", + "expression", + "latex", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 15441160910541485670, - 14307783790885956650, + 329104159258632281, + 15385845941904838235, 18446744073709551615, 18446744073709551615, - 127, - 129, - 127, - 129, - 25, - 26, + 568, + 575, + 568, + 575, + 110, + 111, true, - "of", - "of" + "^{17}", + "$^{17}$" ], [ - "conn", - "single-conn", + "sentence", + "", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 389609625631229034, - 10076688135462665568, + 17454558322938465001, + 17890104706267422072, 18446744073709551615, 18446744073709551615, - 160, - 164, - 160, - 164, - 31, - 32, + 4, + 346, + 4, + 346, + 3, + 66, true, - "that", - "that" + "A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store.", + "A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store." ], [ - "conn", - "single-conn", + "sentence", + "", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 16381206564601699726, - 11976296382474591180, + 5622932258824479139, + 10516707730337709393, 18446744073709551615, 18446744073709551615, - 208, - 217, - 208, - 217, - 39, - 41, + 347, + 451, + 347, + 451, + 66, + 84, true, - "eg the", - "e. g. the" + "The object-store allows us to easily scale the storage with regard to the number of processed documents.", + "The object-store allows us to easily scale the storage with regard to the number of processed documents." ], [ - "conn", - "single-conn", + "sentence", + "", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 8106342927225405366, - 3870861981915582763, + 8816129983583997199, + 15303863633151315650, 18446744073709551615, 18446744073709551615, - 313, - 320, - 313, - 320, - 59, - 61, + 452, + 633, + 452, + 633, + 84, + 124, true, - "of each", - "of each" + "However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", + "However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer." ], [ - "conn", - "single-conn", + "term", + "enum-term-mark-2", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 16381206560518651853, - 14827520425559992801, + 9482072146671435678, + 4852192659277363715, 18446744073709551615, 18446744073709551615, - 326, - 332, - 326, - 332, - 62, - 64, + 598, + 613, + 598, + 613, + 117, + 120, true, - "in the", - "in the" + "storage and act", + "storage and act" ], [ - "conn", - "single-conn", + "term", + "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 389609625618037948, - 10078261007606591819, + 13382702060117711634, + 866988340100178668, 18446744073709551615, 18446744073709551615, - 402, - 406, - 402, - 406, - 75, - 76, + 6, + 19, + 6, + 19, + 4, + 6, true, - "with", - "with" + "storage layer", + "storage layer" ], [ - "conn", - "single-conn", + "term", + "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 15441160910541485670, - 14307783790886352415, + 13382702060117711634, + 866988340100185003, 18446744073709551615, 18446744073709551615, - 428, - 430, - 428, - 430, - 80, - 81, + 97, + 110, + 97, + 110, + 20, + 22, true, - "of", - "of" + "storage layer", + "storage layer" ], [ - "conn", - "single-conn", + "term", + "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 15441160910541486538, - 14307783830786016801, + 2903324788977241891, + 13900858649375924507, 18446744073709551615, 18446744073709551615, - 542, - 544, - 542, - 544, - 104, - 105, + 225, + 234, + 225, + 234, + 42, + 44, true, - "in", - "in" + "PDF pages", + "PDF pages" ], [ - "conn", - "single-conn", + "term", + "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 15441160910541485678, - 14307783792085522482, + 9804322186740216471, + 16267225850960798578, 18446744073709551615, 18446744073709551615, - 577, - 579, - 577, - 579, - 112, - 113, + 236, + 250, + 236, + 250, + 45, + 47, true, - "on", - "on" + "trained models", + "trained models" ], [ - "conn", - "single-conn", + "term", + "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 329104159171729452, - 15392320057005504366, + 1944794866286482065, + 4090230587991277542, 18446744073709551615, 18446744073709551615, - 614, - 619, - 614, - 619, - 120, - 122, + 263, + 287, + 263, + 287, + 52, + 55, true, - "as an", - "as an" + "queryable NoSQL database", + "queryable NoSQL database" ], [ - "conn", - "single-conn", + "term", + "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 15441160910541485865, - 14307783789814212187, + 16772942504422841315, + 12153285632385192646, 18446744073709551615, 18446744073709551615, - 374, - 376, - 374, - 376, - 70, - 71, + 526, + 540, + 526, + 540, + 101, + 103, true, - "to", - "to" + "NoSQL database", + "NoSQL database" ], [ - "conn", - "single-conn", + "term", + "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 16381206519425733256, - 15635996210936257264, + 16381206578935372333, + 11959326053889459522, 18446744073709551615, 18446744073709551615, - 414, - 420, - 414, - 420, - 77, - 79, + 25, + 31, + 25, + 31, + 7, + 8, true, - "to the", - "to the" + "stores", + "stores" ], [ - "conn", - "single-conn", + "term", + "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 15441160910541485865, - 14307783789814206195, + 6167933651658664291, + 16849655876428761988, 18446744073709551615, 18446744073709551615, - 477, - 479, - 477, - 479, - 90, - 91, + 36, + 45, + 36, + 45, + 9, + 10, true, - "to", - "to" + "documents", + "documents" ], [ - "conn", - "single-conn", + "term", + "single-term", 7791113385466815951, "TEXT", "#/texts/78", 1.0, - 15441160910541485865, - 14307783789814213462, + 8106478445190161533, + 10724655109163266628, 18446744073709551615, 18446744073709551615, - 584, - 586, - 584, - 586, - 114, - 115, + 61, + 68, + 61, + 68, + 14, + 15, true, - "to", - "to" + "results", + "results" ], [ - "expression", - "common", - 2845012065511066307, + "term", + "single-term", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 15441160910541486545, - 3028421707917465902, + 990358581043194791, + 9157342138188045037, 18446744073709551615, 18446744073709551615, - 69, - 73, - 69, - 73, - 13, - 14, + 78, + 91, + 78, + 91, + 17, + 18, true, - "ie", - "i.e." + "microservices", + "microservices" ], [ - "expression", - "common", - 2845012065511066307, + "term", + "single-term", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 12178341415895450733, - 6765936968254123994, + 14635102416861801722, + 13921851080814198183, 18446744073709551615, 18446744073709551615, - 561, - 565, - 561, - 565, - 98, - 99, + 134, + 142, + 134, + 142, + 27, + 28, true, - "etc", - "etc." + "services", + "services" ], [ - "expression", - "apostrophe", - 2845012065511066307, + "term", + "single-term", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 389609625696231302, - 874520044884738072, + 5910674217167684246, + 3596748896249425734, 18446744073709551615, 18446744073709551615, - 79, - 84, - 79, - 84, - 15, - 16, + 147, + 159, + 147, + 159, + 30, + 31, true, - "dont", - "don't" + "object-store", + "object-store" ], [ - "expression", - "word-concatenation", - 2845012065511066307, + "term", + "single-term", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 5044385734724420019, - 15039915568025682583, + 16381206578935372333, + 11959326053892662509, 18446744073709551615, 18446744073709551615, - 207, - 223, - 207, - 223, - 40, - 41, + 165, + 171, + 165, + 171, + 32, + 33, true, - "state-of-the-art", - "state-of-the-art" + "stores", + "stores" ], [ - "expression", - "word-concatenation", - 2845012065511066307, + "term", + "single-term", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 15169931585135175826, - 5000232017329418031, + 6167933651658664291, + 16849655876428781193, 18446744073709551615, 18446744073709551615, - 296, - 307, - 296, - 307, - 57, - 58, + 176, + 185, + 176, + 185, + 34, + 35, true, - "cloud-based", - "cloud-based" + "documents", + "documents" ], [ - "expression", - "word-concatenation", - 2845012065511066307, + "term", + "single-term", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 17036338369050073511, - 11876619048247685695, + 16381206578939110576, + 11959779027228109556, 18446744073709551615, 18446744073709551615, - 517, - 529, - 517, - 529, - 92, - 93, + 200, + 206, + 200, + 206, + 37, + 38, true, - "data-at-rest", - "data-at-rest" + "stages", + "stages" ], [ - "sentence", - "", - 2845012065511066307, + "term", + "single-term", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 6380046225039059930, - 1331102669406003609, + 14638347573453462708, + 11572179125786979791, 18446744073709551615, 18446744073709551615, - 0, - 125, - 0, - 125, - 0, - 26, + 304, + 312, + 304, + 312, + 58, + 59, true, - "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it.", - "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it." + "metadata", + "metadata" ], [ - "sentence", - "", - 2845012065511066307, + "term", + "single-term", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 17685124856943080749, - 17041292620595508182, + 389609625697824016, + 10086809110828577778, 18446744073709551615, 18446744073709551615, - 126, - 287, - 126, - 287, - 26, - 55, + 321, + 325, + 321, + 325, + 61, + 62, true, - "This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ.", - "This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ." + "file", + "file" ], [ "term", - "enum-term-mark-4", - 2845012065511066307, + "single-term", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 6417746280621449074, - 4697720481231698323, + 5910674217167684246, + 3596748896249446260, 18446744073709551615, 18446744073709551615, - 259, - 286, - 259, - 286, - 49, - 54, + 333, + 345, + 333, + 345, + 64, + 65, true, - "MongoDB, Redis and RabbitMQ", - "MongoDB, Redis and RabbitMQ" + "object-store", + "object-store" ], [ "term", "single-term", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 5470814617574924291, - 6460878720216756235, + 5910674217167684246, + 3596748896249438801, 18446744073709551615, 18446744073709551615, - 40, - 53, - 40, - 53, - 8, - 10, + 351, + 363, + 351, + 363, + 67, + 68, true, - "compute layer", - "compute layer" + "object-store", + "object-store" ], [ "term", "single-term", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 1269674564249719737, - 10143729425890314060, + 8106478700889254291, + 5556909971365278069, 18446744073709551615, 18446744073709551615, - 154, - 174, - 154, - 174, - 32, - 34, + 394, + 401, + 394, + 401, + 74, + 75, true, - "additional stability", - "additional stability" + "storage", + "storage" ], [ "term", "single-term", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 1142162931543826722, - 12325457556378121938, + 16381206521526353544, + 15388633276980566672, 18446744073709551615, 18446744073709551615, - 179, - 199, - 179, - 199, - 35, - 38, + 407, + 413, + 407, + 413, + 76, + 77, true, - "data safety concerns", - "data safety concerns" + "regard", + "regard" ], [ "term", "single-term", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 18398403256162540896, - 17634138153411653985, + 16381206574973295053, + 13874751204703215888, 18446744073709551615, 18446744073709551615, - 207, - 229, - 207, - 229, - 40, - 42, + 421, + 427, + 421, + 427, + 79, + 80, true, - "state-of-the-art tools", - "state-of-the-art tools" + "number", + "number" ], [ "term", "single-term", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 16381206568241679420, - 4060891951880802543, + 6167933651658664291, + 16849655876428863113, 18446744073709551615, 18446744073709551615, - 3, - 9, - 3, - 9, - 1, - 2, + 441, + 450, + 441, + 450, + 82, + 83, true, - "design", - "design" + "documents", + "documents" ], [ "term", "single-term", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 990358581043194791, - 13959011607976637903, + 389609625695123443, + 10086936390401945955, 18446744073709551615, 18446744073709551615, - 19, - 32, - 19, - 32, - 5, - 6, + 549, + 553, + 549, + 553, + 106, + 107, true, - "microservices", - "microservices" + "case", + "case" ], [ "term", "single-term", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 389609625696431489, - 874321965181516179, + 8106471292843117687, + 16633247643408803384, 18446744073709551615, 18446744073709551615, - 96, - 100, - 96, - 100, - 18, - 19, + 561, + 568, + 561, + 568, + 109, + 110, true, - "data", - "data" + "MongoDB", + "MongoDB" ], [ "term", "single-term", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 8106471292843117687, - 12637598480251227160, + 12178341415895527965, + 17882005209631256296, 18446744073709551615, 18446744073709551615, - 259, - 266, - 259, - 266, - 49, - 50, + 580, + 583, + 580, + 583, + 113, + 114, true, - "MongoDB", - "MongoDB" + "top", + "top" ], [ "term", "single-term", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 329104162172852560, - 4837073618500726237, + 8106478700889254291, + 5556909971365264072, 18446744073709551615, 18446744073709551615, - 268, - 273, - 268, - 273, - 51, - 52, + 598, + 605, + 598, + 605, + 117, + 118, true, - "Redis", - "Redis" + "storage", + "storage" ], [ "term", "single-term", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 14650252519075350211, - 3711332974386502064, + 12178341415895571467, + 17882001345722866996, 18446744073709551615, 18446744073709551615, - 278, - 286, - 278, - 286, - 53, - 54, + 610, + 613, + 610, + 613, + 119, + 120, true, - "RabbitMQ", - "RabbitMQ" + "act", + "act" ], [ - "verb", - "compound-verb", - 2845012065511066307, + "term", + "single-term", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 2116256630331469530, - 14901345236443982027, + 7393395602818997382, + 2445570796174034956, 18446744073709551615, 18446744073709551615, - 238, - 249, - 238, - 249, - 44, - 46, + 620, + 632, + 620, + 632, + 122, + 123, true, - "have chosen", - "have chosen" + "access-layer", + "access-layer" ], [ "verb", - "single-verb", - 2845012065511066307, + "compound-verb", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 12178341415895564896, - 6765880715380529817, + 9001167546411496730, + 5107946166734090998, 18446744073709551615, 18446744073709551615, - 54, - 57, - 54, - 57, - 10, - 11, + 111, + 122, + 111, + 122, + 22, + 24, true, - "are", - "are" + "is composed", + "is composed" ], [ "verb", - "single-verb", - 2845012065511066307, + "compound-verb", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 15441160910541486545, - 3028421707917465902, + 4557608131655756693, + 10894415445021592676, 18446744073709551615, 18446744073709551615, - 69, - 73, - 69, - 73, - 13, - 14, + 464, + 502, + 464, + 502, + 87, + 94, true, - "ie", - "i.e." + "is not build to be queried efficiently", + "is not build to be queried efficiently" ], [ "verb", "single-verb", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 16381206594265787492, - 6901918250154643292, + 6171728176299542016, + 16532079825940175611, 18446744073709551615, 18446744073709551615, - 85, - 91, - 85, - 91, - 16, - 17, + 190, + 199, + 190, + 199, + 36, + 37, true, - "manage", - "manage" + "processed", + "processed" ], [ "verb", "single-verb", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 8106342542940968443, - 7576392723763470277, + 16381206517379850387, + 15480389052081010479, 18446744073709551615, 18446744073709551615, - 111, - 118, - 111, - 118, - 22, - 23, + 218, + 224, + 218, + 224, + 41, + 42, true, - "operate", - "operate" + "parsed", + "parsed" ], [ "verb", "single-verb", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", + 1.0, + 16381206578935372333, + 11959326053892654989, + 18446744073709551615, + 18446744073709551615, + 293, + 299, + 293, + 299, + 56, + 57, + true, + "stores", + "stores" + ], + [ + "verb", + "single-verb", + 7791113385466815951, + "TEXT", + "#/texts/78", 1.0, 16381206569317834029, - 4041469951276441022, + 11936595447040454128, 18446744073709551615, 18446744073709551615, - 131, - 137, - 131, - 137, - 27, - 28, + 364, + 370, + 364, + 370, + 68, + 69, true, "allows", "allows" @@ -65810,2603 +67666,2687 @@ [ "verb", "single-verb", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 329104159241711190, - 4780637315619849705, + 329104161785194305, + 774090374362380612, 18446744073709551615, 18446744073709551615, - 144, - 149, - 144, - 149, - 30, - 31, + 384, + 389, + 384, + 389, + 72, + 73, true, - "trust", - "trust" + "scale", + "scale" ], [ - "conn", - "single-conn", - 2845012065511066307, + "verb", + "single-verb", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 8106478685702231057, - 6290141210502214270, + 6171728176299542016, + 16532079825940159645, 18446744073709551615, 18446744073709551615, - 251, - 258, - 251, - 258, - 47, - 49, + 431, + 440, + 431, + 440, + 81, + 82, true, - "such as", - "such as" + "processed", + "processed" ], [ - "conn", - "single-conn", - 2845012065511066307, + "verb", + "single-verb", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 15441160910541480853, - 3028421248297502894, + 15441160910541486535, + 14307783832258935505, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 510, + 512, + 510, + 512, + 96, + 97, true, - "By", - "By" + "is", + "is" ], [ - "conn", - "single-conn", - 2845012065511066307, + "verb", + "single-verb", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 16381206560518651853, - 5658730714977917939, + 12178341415895640485, + 17882295449136083937, 18446744073709551615, 18446744073709551615, - 33, - 39, - 33, - 39, - 6, - 8, + 520, + 523, + 520, + 523, + 99, + 100, true, - "in the", - "in the" + "put", + "put" ], [ - "conn", - "single-conn", - 2845012065511066307, + "verb", + "single-verb", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 15441160910541485678, - 3028421580747422282, + 12178341415895516060, + 17882004701528519561, 18446744073709551615, 18446744073709551615, - 119, - 121, - 119, - 121, - 23, - 24, + 557, + 560, + 557, + 560, + 108, + 109, true, - "on", - "on" + "use", + "use" ], [ - "conn", - "single-conn", - 2845012065511066307, + "verb", + "single-verb", + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 389609625631229034, - 1006213453265744340, + 16381206594265787492, + 12314552731610003625, 18446744073709551615, 18446744073709551615, - 230, - 234, - 230, - 234, - 42, - 43, + 587, + 593, + 587, + 593, + 115, + 116, true, - "that", - "that" + "manage", + "manage" ], [ "conn", "single-conn", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 15441160910541485865, - 3028421580029648804, + 389609625631229034, + 10076688135462708121, 18446744073709551615, 18446744073709551615, - 141, - 143, - 141, - 143, - 29, - 30, + 20, + 24, + 20, + 24, + 6, + 7, true, - "to", - "to" + "that", + "that" ], [ "conn", "single-conn", - 2845012065511066307, + 7791113385466815951, "TEXT", - "#/texts/79", + "#/texts/78", 1.0, - 16381206519425733256, - 14507313859404429169, + 16381206568455155979, + 11882547791157225115, 18446744073709551615, 18446744073709551615, - 200, - 206, - 200, - 206, - 38, - 40, + 54, + 60, + 54, + 60, + 12, + 14, true, - "to the", - "to the" + "as the", + "as the" ], [ - "numval", - "ival", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 17767354399704235158, - 2662324577726766030, + 14637917359887717745, + 15358780278905995948, 18446744073709551615, 18446744073709551615, - 132, - 133, - 132, - 133, - 25, - 26, + 69, + 77, + 69, + 77, + 15, + 17, true, - "6", - "6" + "from the", + "from the" ], [ - "parenthesis", - "round brackets", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 14654063839594813536, - 538351782817809335, + 12178341415895623120, + 17882259379782471845, 18446744073709551615, 18446744073709551615, + 123, 126, - 134, + 123, 126, - 134, - 22, - 27, + 24, + 25, true, - "(Fig. 6)", - "(Fig. 6)" + "out", + "out" ], [ - "expression", - "common", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 15441160910541487324, - 13392634893759554933, + 15441160910541485670, + 14307783790885956650, 18446744073709551615, 18446744073709551615, - 302, - 307, - 302, - 307, - 55, - 56, + 127, + 129, + 127, + 129, + 25, + 26, true, - "eg", - "e. g." + "of", + "of" ], [ - "expression", - "apostrophe", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 329104162099298038, - 4763741573118152283, + 389609625631229034, + 10076688135462665568, 18446744073709551615, 18446744073709551615, - 432, - 438, - 432, - 438, - 74, - 75, + 160, + 164, + 160, + 164, + 31, + 32, true, - "didnt", - "didn't" + "that", + "that" ], [ - "expression", - "word-concatenation", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 12953096966692611490, - 12141023217086338960, + 16381206564601699726, + 11976296382474591180, 18446744073709551615, 18446744073709551615, - 416, - 431, - 416, - 431, - 73, - 74, + 208, + 217, + 208, + 217, + 39, + 41, true, - "result-backends", - "result-backends" + "eg the", + "e. g. the" ], [ - "expression", - "word-concatenation", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 15312996304332666827, - 6035818222047083309, + 8106342927225405366, + 3870861981915582763, 18446744073709551615, 18446744073709551615, - 449, - 462, - 449, - 462, - 77, - 78, + 313, + 320, + 313, + 320, + 59, + 61, true, - "auto-cleaning", - "auto-cleaning" + "of each", + "of each" ], [ - "sentence", - "", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 530368145582943314, - 13295761460699684392, + 16381206560518651853, + 14827520425559992801, 18446744073709551615, 18446744073709551615, - 0, - 109, - 0, - 109, - 0, - 19, + 326, + 332, + 326, + 332, + 62, + 64, true, - "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform.", - "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform." + "in the", + "in the" ], [ - "sentence", - "", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 13690589465324431830, - 729303492509750058, + 389609625618037948, + 10078261007606591819, 18446744073709551615, 18446744073709551615, - 110, - 243, - 110, - 243, - 19, - 46, + 402, + 406, + 402, + 406, + 75, + 76, true, - "From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services.", - "From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services." + "with", + "with" ], [ - "sentence", - "", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 3799550530227447837, - 10235095163850190658, + 15441160910541485670, + 14307783790886352415, 18446744073709551615, 18446744073709551615, - 244, - 397, - 244, - 397, - 46, - 70, + 428, + 430, + 428, + 430, + 80, + 81, true, - "During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks.", - "During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks." + "of", + "of" ], [ - "term", - "enum-term-mark-2", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 17670119798254759554, - 11044523659752658873, + 15441160910541486538, + 14307783830786016801, 18446744073709551615, 18446744073709551615, - 362, - 384, - 362, - 384, - 65, - 68, + 542, + 544, + 542, + 544, + 104, + 105, true, - "performance or scaling", - "performance or scaling" + "in", + "in" ], [ - "term", - "single-term", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 14228775800347505852, - 1270934594966554784, + 15441160910541485678, + 14307783792085522482, 18446744073709551615, 18446744073709551615, - 40, - 52, - 40, - 52, - 8, - 10, + 577, + 579, + 577, + 579, + 112, + 113, true, - "crucial role", - "crucial role" + "on", + "on" ], [ - "term", - "single-term", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 7308464677014704448, - 5082168475013008068, + 329104159171729452, + 15392320057005504366, 18446744073709551615, 18446744073709551615, + 614, + 619, + 614, + 619, + 120, + 122, + true, + "as an", + "as an" + ], + [ + "conn", + "single-conn", + 7791113385466815951, + "TEXT", + "#/texts/78", + 1.0, + 15441160910541485865, + 14307783789814212187, + 18446744073709551615, + 18446744073709551615, + 374, + 376, + 374, + 376, + 70, 71, - 91, - 71, - 91, - 13, - 15, true, - "scaling requirements", - "scaling requirements" + "to", + "to" ], [ - "term", - "single-term", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 5470814617574924291, - 14078314495213552738, + 16381206519425733256, + 15635996210936257264, 18446744073709551615, 18446744073709551615, - 157, - 170, - 157, - 170, - 33, - 35, + 414, + 420, + 414, + 420, + 77, + 79, true, - "compute layer", - "compute layer" + "to the", + "to the" ], [ - "term", - "single-term", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 2732848371272418679, - 13963794921672736533, + 15441160910541485865, + 14307783789814206195, 18446744073709551615, 18446744073709551615, - 177, - 196, - 177, - 196, - 37, - 39, + 477, + 479, + 477, + 479, + 90, + 91, true, - "considerable amount", - "considerable amount" + "to", + "to" ], [ - "term", - "single-term", - 15072914837937068796, + "conn", + "single-conn", + 7791113385466815951, "TEXT", - "#/texts/80", + "#/texts/78", 1.0, - 9137804915913150128, - 4468637458257639239, + 15441160910541485865, + 14307783789814213462, 18446744073709551615, 18446744073709551615, - 225, - 242, - 225, - 242, - 43, - 45, + 584, + 586, + 584, + 586, + 114, + 115, true, - "external services", - "external services" + "to", + "to" ], [ - "term", - "single-term", - 15072914837937068796, + "expression", + "common", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 2183649553633451561, - 2020007629353495081, + 15441160910541486545, + 3028421707917465902, 18446744073709551615, 18446744073709551615, - 280, - 296, - 280, - 296, - 51, - 53, + 69, + 73, + 69, + 73, + 13, + 14, true, - "multiple options", - "multiple options" + "ie", + "i.e." ], [ - "term", - "single-term", - 15072914837937068796, + "expression", + "common", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 8453966769027728994, - 16465619217972769818, + 12178341415895450733, + 6765936968254123994, 18446744073709551615, 18446744073709551615, - 351, - 373, - 351, - 373, - 64, - 66, + 561, + 565, + 561, + 565, + 98, + 99, true, - "inadequate performance", - "inadequate performance" + "etc", + "etc." ], [ - "term", - "single-term", - 15072914837937068796, + "expression", + "apostrophe", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 5368659910297958112, - 6849806055120467904, + 389609625696231302, + 874520044884738072, 18446744073709551615, 18446744073709551615, - 377, - 396, - 377, - 396, - 67, - 69, + 79, + 84, + 79, + 84, + 15, + 16, true, - "scaling bottlenecks", - "scaling bottlenecks" + "dont", + "don't" ], [ - "term", - "single-term", - 15072914837937068796, + "expression", + "word-concatenation", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 16381206532620919857, - 14992785282357168667, + 5044385734724420019, + 15039915568025682583, 18446744073709551615, 18446744073709551615, - 4, - 10, - 4, - 10, - 1, - 2, + 207, + 223, + 207, + 223, + 40, + 41, true, - "choice", - "choice" + "state-of-the-art", + "state-of-the-art" ], [ - "term", - "single-term", - 15072914837937068796, + "expression", + "word-concatenation", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 14635102416861801722, - 6944138304193469372, + 15169931585135175826, + 5000232017329418031, 18446744073709551615, 18446744073709551615, - 18, - 26, - 18, - 26, - 4, - 5, + 296, + 307, + 296, + 307, + 57, + 58, true, - "services", - "services" + "cloud-based", + "cloud-based" ], [ - "term", - "single-term", - 15072914837937068796, + "expression", + "word-concatenation", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 14814125365076808131, - 259608735306547128, + 17036338369050073511, + 11876619048247685695, 18446744073709551615, 18446744073709551615, - 100, - 108, - 100, - 108, - 17, - 18, + 517, + 529, + 517, + 529, + 92, + 93, true, - "platform", - "platform" + "data-at-rest", + "data-at-rest" ], [ - "term", - "single-term", - 15072914837937068796, + "sentence", + "", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 16381206578503830159, - 5707938631766375304, + 6380046225039059930, + 1331102669406003609, 18446744073709551615, 18446744073709551615, - 119, + 0, 125, - 119, + 0, 125, - 21, - 22, + 0, + 26, true, - "sketch", - "sketch" + "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it.", + "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it." ], [ - "term", - "single-term", - 15072914837937068796, + "sentence", + "", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 12178341415896108354, - 15797295600457378135, + 17685124856943080749, + 17041292620595508182, 18446744073709551615, 18446744073709551615, - 127, - 130, - 127, - 130, - 23, - 24, + 126, + 287, + 126, + 287, + 26, + 55, true, - "Fig", - "Fig" + "This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ.", + "This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ." ], [ "term", - "single-term", - 15072914837937068796, + "enum-term-mark-4", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 10844940863803374990, - 9655000055558243874, + 6417746280621449074, + 4697720481231698323, 18446744073709551615, 18446744073709551615, - 200, - 213, - 200, - 213, - 40, - 41, + 259, + 286, + 259, + 286, + 49, + 54, true, - "communication", - "communication" + "MongoDB, Redis and RabbitMQ", + "MongoDB, Redis and RabbitMQ" ], [ "term", "single-term", - 15072914837937068796, + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 1525875096007260836, - 12799154604316470877, + 5470814617574924291, + 6460878720216756235, 18446744073709551615, 18446744073709551615, - 255, - 266, - 255, - 266, - 48, - 49, + 40, + 53, + 40, + 53, + 8, + 10, true, - "development", - "development" + "compute layer", + "compute layer" ], [ "term", "single-term", - 15072914837937068796, + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 14635102416861801722, - 6944138304193064763, + 1269674564249719737, + 10143729425890314060, 18446744073709551615, 18446744073709551615, - 331, - 339, - 331, - 339, - 61, - 62, + 154, + 174, + 154, + 174, + 32, + 34, true, - "services", - "services" + "additional stability", + "additional stability" ], [ - "verb", - "compound-verb", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 13963960872604267983, - 9741187444941953278, + 1142162931543826722, + 12325457556378121938, 18446744073709551615, 18446744073709551615, - 27, - 37, - 27, - 37, - 5, - 7, + 179, + 199, + 179, + 199, + 35, + 38, true, - "plays also", - "plays also" + "data safety concerns", + "data safety concerns" ], [ - "verb", - "compound-verb", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 3304740881499173399, - 16233133484570305536, + 18398403256162540896, + 17634138153411653985, 18446744073709551615, 18446744073709551615, - 311, - 325, - 311, - 325, - 57, - 60, + 207, + 229, + 207, + 229, + 40, + 42, true, - "had to replace", - "had to replace" + "state-of-the-art tools", + "state-of-the-art tools" ], [ - "verb", - "single-verb", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 15360283586477443351, - 2607233536085319416, + 12206009578906402256, + 16259073236375936761, 18446744073709551615, 18446744073709551615, - 56, - 66, - 56, - 66, - 11, - 12, + 296, + 316, + 296, + 316, + 57, + 59, true, - "addressing", - "addressing" + "cloud-based platform", + "cloud-based platform" ], [ - "verb", - "single-verb", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 15441160910541486535, - 13392635867599855620, + 5741362909955913015, + 11730433319097946883, 18446744073709551615, 18446744073709551615, - 139, - 141, - 139, - 141, - 29, - 30, + 348, + 363, + 348, + 363, + 65, + 67, true, - "is", - "is" + "software assets", + "software assets" ], [ - "verb", - "single-verb", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 12178341415895601584, - 15797287691917345479, + 3042489792150624438, + 10666821026275440814, 18446744073709551615, 18446744073709551615, - 171, - 174, - 171, - 174, - 35, - 36, + 388, + 403, + 388, + 403, + 72, + 74, true, - "has", - "has" + "main deployment", + "main deployment" ], [ - "verb", - "single-verb", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 6172092587891830137, - 6944235926622048042, + 12042865047034155865, + 13701796757400581729, 18446744073709551615, 18446744073709551615, - 270, - 279, - 270, - 279, - 50, - 51, + 424, + 452, + 424, + 452, + 79, + 82, true, - "evaluated", - "evaluated" + "specialised vendors services", + "specialised vendors services" ], [ - "verb", - "single-verb", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 15441160910541487324, - 13392634893759554933, + 3253150599721255436, + 3115708939969640776, 18446744073709551615, 18446744073709551615, - 302, - 307, - 302, - 307, - 55, - 56, + 480, + 508, + 480, + 508, + 87, + 90, true, - "eg", - "e. g." + "latest industry requirements", + "latest industry requirements" ], [ - "conn", - "single-conn", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 2617690495147367356, - 3995802380905838725, + 12361947180622931307, + 6107159509510404051, 18446744073709551615, 18446744073709551615, - 142, - 152, - 142, - 152, - 30, - 32, + 517, + 540, + 517, + 540, + 92, + 94, true, - "clear that", - "clear that" + "data-at-rest encryption", + "data-at-rest encryption" ], [ - "conn", - "single-conn", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 16381206565712212855, - 5357051908763334798, + 13310381990336316505, + 1214145885617522065, 18446744073709551615, 18446744073709551615, - 11, - 17, - 11, - 17, - 2, - 4, + 542, + 559, + 542, + 559, + 95, + 97, true, - "of the", - "of the" + "high availability", + "high availability" ], [ - "conn", - "single-conn", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 15441160910541486538, - 13392635867609642731, + 16381206568241679420, + 4060891951880802543, 18446744073709551615, 18446744073709551615, - 53, - 55, - 53, - 55, - 10, - 11, + 3, + 9, + 3, + 9, + 1, + 2, true, - "in", - "in" + "design", + "design" ], [ - "conn", - "single-conn", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 8106397727991264470, - 7534731816831827800, + 990358581043194791, + 13959011607976637903, 18446744073709551615, 18446744073709551615, - 92, - 99, - 92, - 99, - 15, - 17, + 19, + 32, + 19, + 32, + 5, + 6, true, - "for the", - "for the" + "microservices", + "microservices" ], [ - "conn", - "single-conn", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 14652309564084901216, - 17139314077627797878, + 389609625696431489, + 874321965181516179, 18446744073709551615, 18446744073709551615, - 110, - 118, - 110, - 118, + 96, + 100, + 96, + 100, + 18, 19, - 21, true, - "From the", - "From the" + "data", + "data" ], [ - "conn", - "single-conn", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 15441160910541485670, - 13392635753038274381, + 8106471292843117687, + 12637598480251227160, 18446744073709551615, 18446744073709551615, - 197, - 199, - 197, - 199, - 39, - 40, + 259, + 266, + 259, + 266, + 49, + 50, true, - "of", - "of" + "MongoDB", + "MongoDB" ], [ - "conn", - "single-conn", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 8601401817206609046, - 13002288130139499420, + 329104162172852560, + 4837073618500726237, 18446744073709551615, 18446744073709551615, - 214, - 224, - 214, - 224, - 41, - 43, + 268, + 273, + 268, + 273, + 51, + 52, true, - "with these", - "with these" + "Redis", + "Redis" ], [ - "conn", - "single-conn", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 1703385011780833119, - 14455781933540325166, + 14650252519075350211, + 3711332974386502064, 18446744073709551615, 18446744073709551615, - 244, - 254, - 244, - 254, - 46, - 48, + 278, + 286, + 278, + 286, + 53, + 54, true, - "During the", - "During the" + "RabbitMQ", + "RabbitMQ" ], [ - "conn", - "single-conn", - 15072914837937068796, + "term", + "single-term", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 8106397858129277841, - 5242979235823403228, + 14635106751859230946, + 3279977075117953545, 18446744073709551615, 18446744073709551615, - 340, - 347, - 340, - 347, + 322, + 330, + 322, + 330, + 61, 62, - 63, true, - "because", - "because" + "solution", + "solution" ], [ - "conn", - "single-conn", - 15072914837937068796, + "verb", + "compound-verb", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 15441160910541485670, - 13392635753038231319, + 2116256630331469530, + 14901345236443982027, 18446744073709551615, 18446744073709551615, - 348, - 350, - 348, - 350, - 63, - 64, + 238, + 249, + 238, + 249, + 44, + 46, true, - "of", - "of" + "have chosen", + "have chosen" ], [ - "conn", - "single-conn", - 15072914837937068796, + "verb", + "compound-verb", + 2845012065511066307, "TEXT", - "#/texts/80", + "#/texts/79", 1.0, - 15441160910541485865, - 13392635755399877181, + 17302021957935714782, + 6387910212371789810, 18446744073709551615, 18446744073709551615, - 315, - 317, - 315, - 317, - 58, - 59, + 367, + 378, + 367, + 378, + 68, + 70, true, - "to", - "to" + "be detached", + "be detached" ], [ - "expression", - "apostrophe", - 15263283599394646155, + "verb", + "compound-verb", + 2845012065511066307, "TEXT", - "#/texts/81", + "#/texts/79", 1.0, - 329104162099298038, - 2422170512955612338, + 5947520785570047620, + 9780743873997466930, 18446744073709551615, 18446744073709551615, - 27, - 33, - 27, - 33, - 6, - 7, + 411, + 420, + 411, + 420, + 76, + 78, true, - "didnt", - "didn't" + "be served", + "be served" ], [ - "sentence", - "", - 15263283599394646155, + "verb", + "compound-verb", + 2845012065511066307, "TEXT", - "#/texts/81", + "#/texts/79", 1.0, - 9738445166753142519, - 7077409306408156246, + 13934224220978156333, + 2821183657219865328, 18446744073709551615, 18446744073709551615, - 4, - 87, - 4, - 87, - 1, + 459, + 475, + 459, + 475, + 83, + 86, + true, + "are certified to", + "are certified to" + ], + [ + "verb", + "single-verb", + 2845012065511066307, + "TEXT", + "#/texts/79", + 1.0, + 12178341415895564896, + 6765880715380529817, + 18446744073709551615, + 18446744073709551615, + 54, + 57, + 54, + 57, + 10, + 11, + true, + "are", + "are" + ], + [ + "verb", + "single-verb", + 2845012065511066307, + "TEXT", + "#/texts/79", + 1.0, + 15441160910541486545, + 3028421707917465902, + 18446744073709551615, + 18446744073709551615, + 69, + 73, + 69, + 73, + 13, + 14, + true, + "ie", + "i.e." + ], + [ + "verb", + "single-verb", + 2845012065511066307, + "TEXT", + "#/texts/79", + 1.0, + 16381206594265787492, + 6901918250154643292, + 18446744073709551615, + 18446744073709551615, + 85, + 91, + 85, + 91, 16, + 17, true, - "GridFS storage, but it didn't fit to the constraints of typical cloud environments.", - "GridFS storage, but it didn't fit to the constraints of typical cloud environments." + "manage", + "manage" ], [ - "term", - "single-term", - 15263283599394646155, + "verb", + "single-verb", + 2845012065511066307, "TEXT", - "#/texts/81", + "#/texts/79", 1.0, - 3553616603590296979, - 16097117960287067168, + 8106342542940968443, + 7576392723763470277, 18446744073709551615, 18446744073709551615, - 4, - 18, - 4, - 18, - 1, - 3, + 111, + 118, + 111, + 118, + 22, + 23, true, - "GridFS storage", - "GridFS storage" + "operate", + "operate" ], [ - "term", - "single-term", - 15263283599394646155, + "verb", + "single-verb", + 2845012065511066307, "TEXT", - "#/texts/81", + "#/texts/79", 1.0, - 3164946639114553222, - 7659937814652463492, + 16381206569317834029, + 4041469951276441022, 18446744073709551615, 18446744073709551615, - 60, - 86, - 60, - 86, - 12, - 15, + 131, + 137, + 131, + 137, + 27, + 28, true, - "typical cloud environments", - "typical cloud environments" + "allows", + "allows" ], [ - "term", - "single-term", - 15263283599394646155, + "verb", + "single-verb", + 2845012065511066307, "TEXT", - "#/texts/81", + "#/texts/79", 1.0, - 12178341415895625823, - 10663577172675311427, + 329104159241711190, + 4780637315619849705, 18446744073709551615, 18446744073709551615, - 34, - 37, - 34, - 37, - 7, - 8, + 144, + 149, + 144, + 149, + 30, + 31, true, - "fit", - "fit" + "trust", + "trust" ], [ - "term", - "single-term", - 15263283599394646155, + "verb", + "single-verb", + 2845012065511066307, "TEXT", - "#/texts/81", + "#/texts/79", 1.0, - 2343820404875251124, - 4748486300187076231, + 329104162060230051, + 16723674555525513210, 18446744073709551615, 18446744073709551615, - 45, - 56, - 45, + 288, + 293, + 288, + 293, + 55, 56, - 10, - 11, true, - "constraints", - "constraints" + "Being", + "Being" ], [ "verb", "single-verb", - 15263283599394646155, + 2845012065511066307, "TEXT", - "#/texts/81", + "#/texts/79", 1.0, - 329104162099298038, - 2422170512955612338, + 16381206569317834029, + 4041469951276436768, 18446744073709551615, 18446744073709551615, - 27, - 33, - 27, - 33, - 6, - 7, + 331, + 337, + 331, + 337, + 62, + 63, true, - "didnt", - "didn't" + "allows", + "allows" ], [ "conn", "single-conn", - 15263283599394646155, + 2845012065511066307, "TEXT", - "#/texts/81", + "#/texts/79", 1.0, - 15441160910541485670, - 15469104452822855430, + 8106478685702231057, + 6290141210502214270, 18446744073709551615, 18446744073709551615, - 57, - 59, - 57, - 59, - 11, - 12, + 251, + 258, + 251, + 258, + 47, + 49, true, - "of", - "of" + "such as", + "such as" ], [ "conn", "single-conn", - 15263283599394646155, + 2845012065511066307, "TEXT", - "#/texts/81", + "#/texts/79", 1.0, - 16381206519425733256, - 10289373630862252080, + 8106478685702231057, + 6290141210502197413, 18446744073709551615, 18446744073709551615, - 38, - 44, - 38, - 44, - 8, - 10, + 509, + 516, + 509, + 516, + 90, + 92, true, - "to the", - "to the" + "such as", + "such as" ], [ - "numval", - "fval", - 11417717357379295278, + "conn", + "single-conn", + 2845012065511066307, "TEXT", - "#/texts/82", + "#/texts/79", 1.0, - 12178341415896306586, - 2376192024093454144, + 15441160910541480853, + 3028421248297502894, 18446744073709551615, 18446744073709551615, 0, - 3, + 2, 0, - 3, + 2, 0, 1, true, - "4.2", - "4.2" + "By", + "By" ], [ - "numval", - "ival", - 9031137420247852045, + "conn", + "single-conn", + 2845012065511066307, "TEXT", - "#/texts/83", + "#/texts/79", 1.0, - 15441160910541481862, - 10500741044532715512, + 16381206560518651853, + 5658730714977917939, 18446744073709551615, 18446744073709551615, - 50, - 52, - 50, - 52, - 7, + 33, + 39, + 33, + 39, + 6, 8, true, - "18", - "18" + "in the", + "in the" ], [ - "numval", - "ival", - 9031137420247852045, + "conn", + "single-conn", + 2845012065511066307, "TEXT", - "#/texts/83", + "#/texts/79", 1.0, - 15441160910541481863, - 10500741044517231196, + 15441160910541485678, + 3028421580747422282, 18446744073709551615, 18446744073709551615, - 155, - 157, - 155, - 157, + 119, + 121, + 119, + 121, + 23, 24, - 25, true, - "19", - "19" + "on", + "on" ], [ - "expression", - "common", - 9031137420247852045, + "conn", + "single-conn", + 2845012065511066307, "TEXT", - "#/texts/83", + "#/texts/79", 1.0, - 15441160910541487324, - 10500757812195718645, + 389609625631229034, + 1006213453265744340, 18446744073709551615, 18446744073709551615, - 121, - 126, - 121, - 126, - 18, - 19, + 230, + 234, + 230, + 234, + 42, + 43, true, - "eg", - "e. g." + "that", + "that" ], [ - "expression", - "word-concatenation", - 9031137420247852045, + "conn", + "single-conn", + 2845012065511066307, "TEXT", - "#/texts/83", + "#/texts/79", 1.0, - 14042857724397157868, - 17436499209420645038, + 6187534615926030665, + 2333941156720616523, 18446744073709551615, 18446744073709551615, - 95, - 105, - 95, - 105, - 15, - 16, + 338, + 347, + 338, + 347, + 63, + 65, true, - "on-premise", - "on-premise" + "for these", + "for these" ], [ - "sentence", - "", - 9031137420247852045, + "conn", + "single-conn", + 2845012065511066307, "TEXT", - "#/texts/83", + "#/texts/79", 1.0, - 16473487772931696221, - 1361496787505182232, + 14637917359887717745, + 7863405527781876570, 18446744073709551615, 18446744073709551615, - 0, - 171, - 0, - 171, - 0, - 27, + 379, + 387, + 379, + 387, + 70, + 72, true, - "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution.", - "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution." + "from the", + "from the" ], [ - "sentence", - "", - 9031137420247852045, + "conn", + "single-conn", + 2845012065511066307, "TEXT", - "#/texts/83", + "#/texts/79", 1.0, - 13604474430867440219, - 15920079442920273776, + 15441160910541486989, + 3028421693438408488, 18446744073709551615, 18446744073709551615, - 172, - 302, - 172, - 302, - 27, - 48, + 421, + 423, + 421, + 423, + 78, + 79, true, - "Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", - "Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints." + "by", + "by" ], [ - "term", - "single-term", - 9031137420247852045, + "conn", + "single-conn", + 2845012065511066307, "TEXT", - "#/texts/83", + "#/texts/79", 1.0, - 4315218641775224883, - 3783623336096074444, + 15441160910541485865, + 3028421580029648804, 18446744073709551615, 18446744073709551615, + 141, + 143, + 141, + 143, + 29, 30, - 49, - 30, - 49, - 5, - 7, true, - "Kubernetes clusters", - "Kubernetes clusters" + "to", + "to" ], [ - "term", - "single-term", - 9031137420247852045, + "conn", + "single-conn", + 2845012065511066307, "TEXT", - "#/texts/83", + "#/texts/79", 1.0, - 7578678502347528407, - 16606690075113593003, + 16381206519425733256, + 14507313859404429169, 18446744073709551615, 18446744073709551615, - 66, - 86, - 66, - 86, - 10, - 13, + 200, + 206, + 200, + 206, + 38, + 40, true, - "many cloud providers", - "many cloud providers" + "to the", + "to the" ], [ - "term", - "single-term", - 9031137420247852045, + "conn", + "single-conn", + 2845012065511066307, "TEXT", - "#/texts/83", + "#/texts/79", 1.0, - 17157390005033639285, - 14551521890127263578, + 15441160910541485865, + 3028421580029650856, 18446744073709551615, 18446744073709551615, - 95, - 119, - 95, - 119, - 15, - 17, - true, - "on-premise installations", - "on-premise installations" + 364, + 366, + 364, + 366, + 67, + 68, + true, + "to", + "to" ], [ - "term", - "single-term", - 9031137420247852045, + "conn", + "single-conn", + 2845012065511066307, "TEXT", - "#/texts/83", + "#/texts/79", 1.0, - 15250872047548077430, - 7534455339628786157, + 15441160910541485865, + 3028421580029649693, 18446744073709551615, 18446744073709551615, - 137, - 154, - 137, - 154, - 21, - 24, + 408, + 410, + 408, + 410, + 75, + 76, true, - "IBM Cloud Private", - "IBM Cloud Private" + "to", + "to" ], [ - "term", - "single-term", - 9031137420247852045, + "conn", + "single-conn", + 2845012065511066307, "TEXT", - "#/texts/83", + "#/texts/79", 1.0, - 17140401278227586491, - 11321802952630178709, + 16381206519425733256, + 14507313859404314997, 18446744073709551615, 18446744073709551615, - 207, - 223, - 207, - 223, - 33, - 35, + 473, + 479, + 473, + 479, + 85, + 87, true, - "storage services", - "storage services" + "to the", + "to the" ], [ - "term", - "single-term", - 9031137420247852045, + "numval", + "ival", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 4047423525975715659, - 7947778581648084546, + 17767354399704235158, + 2662324577726766030, 18446744073709551615, 18446744073709551615, - 248, - 260, - 248, - 260, - 39, - 41, + 132, + 133, + 132, + 133, + 25, + 26, true, - "same cluster", - "same cluster" + "6", + "6" ], [ - "term", - "single-term", - 9031137420247852045, + "parenthesis", + "round brackets", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 14814125365076808131, - 2443639570324462603, + 14654063839594813536, + 538351782817809335, 18446744073709551615, 18446744073709551615, - 4, - 12, - 4, - 12, - 1, - 2, + 126, + 134, + 126, + 134, + 22, + 27, true, - "platform", - "platform" + "(Fig. 6)", + "(Fig. 6)" ], [ - "term", - "single-term", - 9031137420247852045, + "expression", + "common", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, 15441160910541487324, - 10500757812195718645, + 13392634893759554933, 18446744073709551615, 18446744073709551615, - 121, - 126, - 121, - 126, - 18, - 19, + 302, + 307, + 302, + 307, + 55, + 56, true, "eg", "e. g." ], [ - "term", - "single-term", - 9031137420247852045, + "expression", + "apostrophe", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 16659280385198228594, - 13641186927945667101, + 329104162099298038, + 4763741573118152283, 18446744073709551615, 18446744073709551615, - 158, - 170, - 158, - 170, - 25, - 26, + 432, + 438, + 432, + 438, + 74, + 75, true, - "distribution", - "distribution" + "didnt", + "didn't" ], [ - "term", - "single-term", - 9031137420247852045, + "expression", + "word-concatenation", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 13240311013633905449, - 2445508371176550978, + 12953096966692611490, + 12141023217086338960, 18446744073709551615, 18446744073709551615, - 189, - 201, - 189, - 201, - 30, - 31, + 416, + 431, + 416, + 431, + 73, + 74, true, - "requirements", - "requirements" + "result-backends", + "result-backends" ], [ - "term", - "single-term", - 9031137420247852045, + "expression", + "word-concatenation", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 6165987386346442673, - 17011861032528540321, + 15312996304332666827, + 6035818222047083309, 18446744073709551615, 18446744073709551615, - 292, - 301, - 292, - 301, - 46, - 47, + 449, + 462, + 449, + 462, + 77, + 78, true, - "endpoints", - "endpoints" + "auto-cleaning", + "auto-cleaning" ], [ - "verb", - "compound-verb", - 9031137420247852045, + "sentence", + "", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 12677136892665844646, - 2032089139232006155, + 530368145582943314, + 13295761460699684392, 18446744073709551615, 18446744073709551615, - 224, - 236, - 224, - 236, - 35, - 37, + 0, + 109, + 0, + 109, + 0, + 19, true, - "are launched", - "are launched" + "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform.", + "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform." ], [ - "verb", - "compound-verb", - 9031137420247852045, + "sentence", + "", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 12855573301475655422, - 1573892996858218554, + 13690589465324431830, + 729303492509750058, 18446744073709551615, 18446744073709551615, - 264, - 291, - 264, - 291, - 42, + 110, + 243, + 110, + 243, + 19, 46, true, - "linked to externally hosted", - "linked to externally hosted" + "From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services.", + "From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services." ], [ - "verb", - "single-verb", - 9031137420247852045, + "sentence", + "", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 15441160910541486535, - 10500757786703375297, + 3799550530227447837, + 10235095163850190658, 18446744073709551615, 18446744073709551615, - 13, - 15, - 13, - 15, - 2, - 3, + 244, + 397, + 244, + 397, + 46, + 70, true, - "is", - "is" + "During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks.", + "During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks." ], [ - "verb", - "single-verb", - 9031137420247852045, + "term", + "enum-term-mark-2", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 329104159157798023, - 6671752901319384085, + 17670119798254759554, + 11044523659752658873, 18446744073709551615, 18446744073709551615, - 127, - 132, - 127, - 132, - 19, - 20, + 362, + 384, + 362, + 384, + 65, + 68, true, - "using", - "using" + "performance or scaling", + "performance or scaling" ], [ - "verb", - "single-verb", - 9031137420247852045, + "term", + "single-term", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 2906423210345501303, - 16309307450075852923, + 14228775800347505852, + 1270934594966554784, 18446744073709551615, 18446744073709551615, - 172, - 181, - 172, - 181, - 27, - 28, + 40, + 52, + 40, + 52, + 8, + 10, true, - "Depending", - "Depending" + "crucial role", + "crucial role" ], [ - "conn", - "single-conn", - 9031137420247852045, + "term", + "single-term", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 3013597407861734098, - 13645835485872550225, + 7308464677014704448, + 5082168475013008068, 18446744073709551615, 18446744073709551615, - 16, - 29, - 16, - 29, - 3, - 5, + 71, + 91, + 71, + 91, + 13, + 15, true, - "deployable on", - "deployable on" + "scaling requirements", + "scaling requirements" ], [ - "conn", - "single-conn", - 9031137420247852045, + "term", + "single-term", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 15601168207941439665, - 15242156125190384917, + 5470814617574924291, + 14078314495213552738, 18446744073709551615, 18446744073709551615, - 53, - 65, - 53, - 65, - 8, - 10, + 157, + 170, + 157, + 170, + 33, + 35, true, - "available on", - "available on" + "compute layer", + "compute layer" ], [ - "conn", - "single-conn", - 9031137420247852045, + "term", + "single-term", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 16381206566339127348, - 12939125612892018463, + 2732848371272418679, + 13963794921672736533, 18446744073709551615, 18446744073709551615, - 182, - 188, - 182, - 188, - 28, - 30, + 177, + 196, + 177, + 196, + 37, + 39, true, - "on the", - "on the" + "considerable amount", + "considerable amount" ], [ - "conn", - "single-conn", - 9031137420247852045, + "term", + "single-term", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 5386255170026914598, - 10161453367619815898, + 9137804915913150128, + 4468637458257639239, 18446744073709551615, 18446744073709551615, - 237, - 247, - 237, - 247, - 37, - 39, + 225, + 242, + 225, + 242, + 43, + 45, true, - "inside the", - "inside the" + "external services", + "external services" ], [ - "conn", - "single-conn", - 9031137420247852045, + "term", + "single-term", + 15072914837937068796, "TEXT", - "#/texts/83", + "#/texts/80", 1.0, - 15441160910541485865, - 10500757793681888901, + 2183649553633451561, + 2020007629353495081, 18446744073709551615, 18446744073709551615, - 271, - 273, - 271, - 273, - 43, - 44, + 280, + 296, + 280, + 296, + 51, + 53, true, - "to", - "to" + "multiple options", + "multiple options" ], [ - "expression", - "word-concatenation", - 18436578077535696718, + "term", + "single-term", + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 14352418754681794071, - 3129213562618289639, + 8453966769027728994, + 16465619217972769818, 18446744073709551615, 18446744073709551615, - 192, - 212, - 192, - 212, - 35, - 36, + 351, + 373, + 351, + 373, + 64, + 66, true, - "parsing-microservice", - "parsing-microservice" + "inadequate performance", + "inadequate performance" ], [ - "sentence", - "", - 18436578077535696718, + "term", + "single-term", + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 13622028562599160608, - 11838629416984110843, + 5368659910297958112, + 6849806055120467904, 18446744073709551615, 18446744073709551615, - 0, - 76, - 0, - 76, - 0, - 14, + 377, + 396, + 377, + 396, + 67, + 69, true, - "The common parts of all deployments are the interface and the compute layer.", - "The common parts of all deployments are the interface and the compute layer." + "scaling bottlenecks", + "scaling bottlenecks" ], [ - "sentence", - "", - 18436578077535696718, + "term", + "single-term", + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 8982322851077994049, - 14574053650340581887, + 15755629669707778659, + 16473859664425870651, 18446744073709551615, 18446744073709551615, - 77, - 173, - 77, - 173, - 14, - 31, + 402, + 438, + 402, + 438, + 71, + 75, true, - "The compute layer is designed for dynamically adapt the number of resources on the current load.", - "The compute layer is designed for dynamically adapt the number of resources on the current load." + "example other result-backends didnt", + "example other result-backends didn't" ], [ - "sentence", - "", - 18436578077535696718, + "term", + "single-term", + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 8231516465306254293, - 5376868082789191066, + 17990474265978324021, + 4558094210649300135, 18446744073709551615, 18446744073709551615, - 174, - 445, - 174, - 445, - 31, + 449, + 476, + 449, + 476, 77, + 79, true, - "For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents.", - "For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents." + "auto-cleaning functionality", + "auto-cleaning functionality" ], [ "term", "single-term", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 4575700335406167488, - 1041889512884127769, + 17678246672778617788, + 10086356137613940726, 18446744073709551615, 18446744073709551615, - 4, - 16, - 4, - 16, - 1, - 3, + 519, + 534, + 519, + 534, + 88, + 90, true, - "common parts", - "common parts" + "custom solution", + "custom solution" ], [ "term", "single-term", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 5470814617574924291, - 1119950227308354530, + 9431890275510275757, + 3557840365711048098, 18446744073709551615, 18446744073709551615, - 62, - 75, - 62, - 75, - 11, - 13, + 558, + 572, + 558, + 572, + 94, + 96, true, - "compute layer", - "compute layer" + "object storage", + "object storage" ], [ "term", "single-term", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 5470814617574924291, - 1119950227308355535, + 3103749419018501051, + 16238812461129121885, 18446744073709551615, 18446744073709551615, - 81, - 94, - 81, - 94, - 15, - 17, + 587, + 602, + 587, + 602, + 99, + 101, true, - "compute layer", - "compute layer" + "other solutions", + "other solutions" ], [ "term", "single-term", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 5679217233562387039, - 10306293013634943918, + 16381206532620919857, + 14992785282357168667, 18446744073709551615, 18446744073709551615, - 160, - 172, - 160, - 172, - 28, - 30, + 4, + 10, + 4, + 10, + 1, + 2, true, - "current load", - "current load" + "choice", + "choice" ], [ "term", "single-term", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 7165121732645597150, - 17919093041593160462, + 14635102416861801722, + 6944138304193469372, 18446744073709551615, 18446744073709551615, - 192, - 222, - 192, - 222, - 35, - 37, + 18, + 26, + 18, + 26, + 4, + 5, true, - "parsing-microservice instances", - "parsing-microservice instances" + "services", + "services" ], [ "term", "single-term", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 11579811611053762862, - 5792740568999225626, + 14814125365076808131, + 259608735306547128, 18446744073709551615, 18446744073709551615, - 247, - 261, - 247, - 261, - 42, - 44, + 100, + 108, + 100, + 108, + 17, + 18, true, - "large document", - "large document" + "platform", + "platform" ], [ "term", "single-term", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 5574297910769420540, - 7415408366124113138, + 16381206578503830159, + 5707938631766375304, 18446744073709551615, 18446744073709551615, - 374, - 390, - 374, - 390, - 66, - 68, + 119, + 125, + 119, + 125, + 21, + 22, true, - "other components", - "other components" + "sketch", + "sketch" ], [ "term", "single-term", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 1526165531385019099, - 3702094867294947886, + 12178341415896108354, + 15797295600457378135, 18446744073709551615, 18446744073709551615, + 127, + 130, + 127, + 130, + 23, 24, - 35, - 24, - 35, - 5, - 6, true, - "deployments", - "deployments" + "Fig", + "Fig" ], [ "term", "single-term", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 6182600923960960908, - 8662044929949820827, + 10844940863803374990, + 9655000055558243874, 18446744073709551615, 18446744073709551615, - 44, - 53, - 44, - 53, - 8, - 9, + 200, + 213, + 200, + 213, + 40, + 41, true, - "interface", - "interface" + "communication", + "communication" ], [ "term", "single-term", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 16381206574973295053, - 6957832894321474609, + 1525875096007260836, + 12799154604316470877, 18446744073709551615, 18446744073709551615, - 133, - 139, - 133, - 139, - 23, - 24, + 255, + 266, + 255, + 266, + 48, + 49, true, - "number", - "number" + "development", + "development" ], [ "term", "single-term", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 6168338487309432467, - 14015407302848006245, + 14635102416861801722, + 6944138304193064763, 18446744073709551615, 18446744073709551615, - 143, - 152, - 143, - 152, - 25, - 26, + 331, + 339, + 331, + 339, + 61, + 62, true, - "resources", - "resources" + "services", + "services" ], [ "term", "single-term", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 8106397496085150773, - 1241946393555377686, + 329104162172852560, + 4801873179480423203, 18446744073709551615, 18446744073709551615, - 178, - 185, - 178, - 185, - 32, - 33, + 488, + 493, + 488, + 493, + 81, + 82, true, - "example", - "example" + "Redis", + "Redis" ], [ "term", "single-term", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 12178341415895456504, - 15511050211190565407, + 8106471292843117687, + 4264742177449418379, 18446744073709551615, 18446744073709551615, - 320, - 323, - 320, - 323, - 54, - 55, + 542, + 549, + 542, + 549, + 91, + 92, true, - "end", - "end" + "MongoDB", + "MongoDB" ], [ - "term", - "single-term", - 18436578077535696718, + "verb", + "compound-verb", + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 389609625631210899, - 6431357524637287554, + 13963960872604267983, + 9741187444941953278, 18446744073709551615, 18446744073709551615, - 331, - 335, - 331, - 335, - 57, - 58, + 27, + 37, + 27, + 37, + 5, + 7, true, - "task", - "task" + "plays also", + "plays also" ], [ - "term", - "single-term", - 18436578077535696718, + "verb", + "compound-verb", + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 6168338487309432467, - 14015407302847964601, + 3304740881499173399, + 16233133484570305536, 18446744073709551615, 18446744073709551615, - 351, - 360, - 351, - 360, - 62, - 63, + 311, + 325, + 311, + 325, + 57, + 60, true, - "resources", - "resources" + "had to replace", + "had to replace" ], [ - "term", - "single-term", - 18436578077535696718, + "verb", + "single-verb", + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 14634153919632515335, - 2667013412527336630, + 15360283586477443351, + 2607233536085319416, 18446744073709551615, 18446744073709551615, - 397, - 405, - 397, - 405, - 70, - 71, + 56, + 66, + 56, + 66, + 11, + 12, true, - "training", - "training" + "addressing", + "addressing" ], [ - "term", - "single-term", - 18436578077535696718, + "verb", + "single-verb", + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 6167933651658664291, - 16134555370198793815, + 15441160910541486535, + 13392635867599855620, 18446744073709551615, 18446744073709551615, - 435, - 444, - 435, - 444, - 75, - 76, + 139, + 141, + 139, + 141, + 29, + 30, true, - "documents", - "documents" + "is", + "is" ], [ "verb", - "compound-verb", - 18436578077535696718, + "single-verb", + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 9165036765200707500, - 14015747365861821834, + 12178341415895601584, + 15797287691917345479, 18446744073709551615, 18446744073709551615, - 95, - 106, - 95, - 106, - 17, - 19, + 171, + 174, + 171, + 174, + 35, + 36, true, - "is designed", - "is designed" + "has", + "has" ], [ "verb", - "compound-verb", - 18436578077535696718, + "single-verb", + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 14891459320562646805, - 9156075874686645300, + 6172092587891830137, + 6944235926622048042, 18446744073709551615, 18446744073709551615, - 223, - 239, - 223, - 239, - 37, - 40, + 270, + 279, + 270, + 279, + 50, + 51, true, - "could be spawned", - "could be spawned" + "evaluated", + "evaluated" ], [ "verb", - "compound-verb", - 18436578077535696718, + "single-verb", + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 9165313840036679968, - 18156907374285878295, + 15441160910541487324, + 13392634893759554933, 18446744073709551615, 18446744073709551615, - 262, - 273, - 262, - 273, - 44, - 46, + 302, + 307, + 302, + 307, + 55, + 56, true, - "is uploaded", - "is uploaded" + "eg", + "e. g." ], [ "verb", "single-verb", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 12178341415895564896, - 15510992411047215180, + 329104161714029917, + 4812815912515288423, 18446744073709551615, 18446744073709551615, - 36, - 39, - 36, - 39, - 6, - 7, + 439, + 444, + 439, + 444, + 75, + 76, true, - "are", - "are" + "offer", + "offer" ], [ "verb", "single-verb", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 329104159173548808, - 1957182172439438990, + 8106342926111180787, + 3313437457918926336, 18446744073709551615, 18446744073709551615, - 123, - 128, - 123, - 128, - 21, - 22, + 477, + 484, + 477, + 484, + 79, + 80, true, - "adapt", - "adapt" + "offered", + "offered" ], [ "verb", "single-verb", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 16381206579178669319, - 6938028188806274364, + 16381206512275563974, + 17971932688596796644, 18446744073709551615, 18446744073709551615, - 301, - 307, - 301, - 307, - 50, - 51, + 506, + 512, + 506, + 512, + 85, + 86, true, - "scaled", - "scaled" + "opting", + "opting" ], [ "verb", "single-verb", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 12178341415895564896, - 15510992411043340629, + 16381206594558792283, + 4831452492039043525, 18446744073709551615, 18446744073709551615, - 361, - 364, - 361, - 364, - 63, - 64, + 535, + 541, + 535, + 541, + 90, + 91, true, - "are", - "are" + "mixing", + "mixing" ], [ "verb", "single-verb", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 5615554093848987331, - 14336271604284028764, + 6172092587891830137, + 6944235926622064431, 18446744073709551615, 18446744073709551615, - 410, - 420, - 410, - 420, - 72, - 73, + 577, + 586, + 577, + 586, + 98, + 99, true, - "assembling", - "assembling" + "evaluated", + "evaluated" ], [ - "verb", - "single-verb", - 18436578077535696718, + "conn", + "single-conn", + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 6171728176299542016, - 3957840262356218692, + 2617690495147367356, + 3995802380905838725, 18446744073709551615, 18446744073709551615, - 425, - 434, - 425, - 434, - 74, - 75, + 142, + 152, + 142, + 152, + 30, + 32, true, - "processed", - "processed" + "clear that", + "clear that" ], [ "conn", "single-conn", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 6165459236568015364, - 6061326913510427821, + 16381206565712212855, + 5357051908763334798, 18446744073709551615, 18446744073709551615, - 337, - 346, - 337, - 346, - 59, - 61, + 11, + 17, + 11, + 17, + 2, + 4, true, - "such that", - "such that" + "of the", + "of the" ], [ "conn", "single-conn", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 14637917385401805410, - 5428309875437807875, + 15441160910541486538, + 13392635867609642731, 18446744073709551615, 18446744073709551615, - 365, - 373, - 365, - 373, - 64, - 66, + 53, + 55, + 53, + 55, + 10, + 11, true, - "free for", - "free for" + "in", + "in" ], [ "conn", "single-conn", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 16381206565712007226, - 7724692166486276181, + 8106397727991264470, + 7534731816831827800, 18446744073709551615, 18446744073709551615, + 92, + 99, + 92, + 99, + 15, 17, - 23, - 17, - 23, - 3, - 5, true, - "of all", - "of all" + "for the", + "for the" ], [ "conn", "single-conn", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 12178341415895625940, - 15510989710886502910, + 14652309564084901216, + 17139314077627797878, 18446744073709551615, 18446744073709551615, - 107, 110, - 107, + 118, 110, + 118, 19, - 20, + 21, true, - "for", - "for" + "From the", + "From the" ], [ "conn", "single-conn", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, 15441160910541485670, - 18358916429929728660, + 13392635753038274381, 18446744073709551615, 18446744073709551615, - 140, - 142, - 140, - 142, - 24, - 25, + 197, + 199, + 197, + 199, + 39, + 40, true, "of", "of" @@ -68414,3086 +70354,3191 @@ [ "conn", "single-conn", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 16381206566339127348, - 7820316500598827267, + 8601401817206609046, + 13002288130139499420, 18446744073709551615, 18446744073709551615, - 153, - 159, - 153, - 159, - 26, - 28, + 214, + 224, + 214, + 224, + 41, + 43, true, - "on the", - "on the" + "with these", + "with these" ], [ "conn", "single-conn", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 12178341415896108722, - 15510983418444691685, + 1703385011780833119, + 14455781933540325166, 18446744073709551615, 18446744073709551615, - 174, - 177, - 174, - 177, - 31, - 32, + 244, + 254, + 244, + 254, + 46, + 48, true, - "For", - "For" + "During the", + "During the" ], [ "conn", "single-conn", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 16381206568372064271, - 10868552521626828999, + 8106397858129277841, + 5242979235823403228, 18446744073709551615, 18446744073709551615, - 313, - 319, - 313, - 319, - 52, - 54, + 340, + 347, + 340, + 347, + 62, + 63, true, - "at the", - "at the" + "because", + "because" ], [ "conn", "single-conn", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 16381206565712212855, - 7724660172286794609, + 15441160910541485670, + 13392635753038231319, 18446744073709551615, 18446744073709551615, - 324, - 330, - 324, - 330, - 55, - 57, + 348, + 350, + 348, + 350, + 63, + 64, true, - "of the", - "of the" + "of", + "of" ], [ "conn", "single-conn", - 18436578077535696718, + 15072914837937068796, "TEXT", - "#/texts/84", + "#/texts/80", 1.0, - 389609625633313393, - 6432600486678620238, + 12178341415896108722, + 15797298594107170267, 18446744073709551615, 18446744073709551615, - 392, - 396, - 392, - 396, - 69, + 398, + 401, + 398, + 401, 70, + 71, true, - "like", - "like" + "For", + "For" ], [ - "sentence", - "", - 11734907767490759865, + "conn", + "single-conn", + 15072914837937068796, "TEXT", - "#/texts/85", + "#/texts/80", 1.0, - 9089347946185436978, - 12322779939098932937, + 15441160910541486989, + 13392635875319467060, 18446744073709551615, 18446744073709551615, - 0, - 223, - 0, - 223, - 0, - 34, + 485, + 487, + 485, + 487, + 80, + 81, true, - "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements.", - "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements." + "by", + "by" ], [ - "sentence", - "", - 11734907767490759865, + "conn", + "single-conn", + 15072914837937068796, "TEXT", - "#/texts/85", + "#/texts/80", 1.0, - 5619374035914941215, - 13388056321170730470, + 16381206569837301772, + 4752428903634883173, 18446744073709551615, 18446744073709551615, - 224, - 307, - 224, - 307, - 34, - 47, + 499, + 505, + 499, + 505, + 84, + 85, true, - "The parse component is indeed more demanding than the simple annotation components.", - "The parse component is indeed more demanding than the simple annotation components." + "before", + "before" ], [ - "term", - "single-term", - 11734907767490759865, + "conn", + "single-conn", + 15072914837937068796, "TEXT", - "#/texts/85", + "#/texts/80", 1.0, - 5470814617574924291, - 8565468289586536983, + 329104161711024499, + 4813260785890145002, 18446744073709551615, 18446744073709551615, - 30, - 43, - 30, - 43, - 5, - 7, + 513, + 518, + 513, + 518, + 86, + 88, true, - "compute layer", - "compute layer" + "for a", + "for a" ], [ - "term", - "single-term", - 11734907767490759865, + "conn", + "single-conn", + 15072914837937068796, "TEXT", - "#/texts/85", + "#/texts/80", 1.0, - 220880112941331342, - 10757099033331540513, + 8106477988572616406, + 8588614024698076211, 18446744073709551615, 18446744073709551615, - 69, - 85, - 69, - 85, - 11, - 13, + 550, + 557, + 550, + 557, + 92, + 94, true, - "different queues", - "different queues" + "with an", + "with an" ], [ - "term", - "single-term", - 11734907767490759865, + "conn", + "single-conn", + 15072914837937068796, "TEXT", - "#/texts/85", + "#/texts/80", 1.0, - 5487575286069153569, - 13394910817957544454, + 15441160910541487053, + 13392635876351472074, 18446744073709551615, 18446744073709551615, - 157, - 176, - 157, - 176, - 26, - 28, + 603, + 605, + 603, + 605, + 101, + 102, true, - "different component", - "different component" + "as", + "as" ], [ - "term", - "single-term", - 11734907767490759865, + "conn", + "single-conn", + 15072914837937068796, "TEXT", - "#/texts/85", + "#/texts/80", 1.0, - 8988400645948795194, - 16485622837841306210, + 15441160910541485865, + 13392635755399877181, 18446744073709551615, 18446744073709551615, - 196, - 222, - 196, - 222, - 31, - 33, + 315, + 317, + 315, + 317, + 58, + 59, true, - "computational requirements", - "computational requirements" + "to", + "to" ], [ - "term", - "single-term", - 11734907767490759865, + "expression", + "apostrophe", + 15263283599394646155, "TEXT", - "#/texts/85", + "#/texts/81", 1.0, - 5037855592482871690, - 16562149494420733590, + 329104162099298038, + 2422170512955612338, 18446744073709551615, 18446744073709551615, - 228, - 243, - 228, - 243, - 35, - 37, + 27, + 33, + 27, + 33, + 6, + 7, true, - "parse component", - "parse component" + "didnt", + "didn't" ], [ - "term", - "single-term", - 11734907767490759865, + "sentence", + "", + 15263283599394646155, "TEXT", - "#/texts/85", + "#/texts/81", 1.0, - 1786684417185012154, - 17458883524654231766, + 9738445166753142519, + 7077409306408156246, 18446744073709551615, 18446744073709551615, - 278, - 306, - 278, - 306, - 43, - 46, + 4, + 87, + 4, + 87, + 1, + 16, true, - "simple annotation components", - "simple annotation components" + "GridFS storage, but it didn't fit to the constraints of typical cloud environments.", + "GridFS storage, but it didn't fit to the constraints of typical cloud environments." ], [ "term", "single-term", - 11734907767490759865, + 15263283599394646155, "TEXT", - "#/texts/85", + "#/texts/81", 1.0, - 2703018952916355661, - 2103701956309679472, + 3553616603590296979, + 16097117960287067168, 18446744073709551615, 18446744073709551615, 4, - 14, + 18, 4, - 14, + 18, 1, - 2, + 3, true, - "components", - "components" + "GridFS storage", + "GridFS storage" ], [ "term", "single-term", - 11734907767490759865, + 15263283599394646155, "TEXT", - "#/texts/85", + "#/texts/81", 1.0, - 14637917407223052431, - 233568237166781340, + 3164946639114553222, + 7659937814652463492, 18446744073709551615, 18446744073709551615, - 116, - 124, - 116, - 124, - 20, - 21, + 60, + 86, + 60, + 86, + 12, + 15, true, - "fraction", - "fraction" + "typical cloud environments", + "typical cloud environments" ], [ "term", "single-term", - 11734907767490759865, + 15263283599394646155, "TEXT", - "#/texts/85", + "#/texts/81", 1.0, - 6168338487309432467, - 7308226084005327662, + 12178341415895625823, + 10663577172675311427, 18446744073709551615, 18446744073709551615, - 128, - 137, - 128, - 137, - 22, - 23, + 34, + 37, + 34, + 37, + 7, + 8, true, - "resources", - "resources" + "fit", + "fit" ], [ - "verb", - "compound-verb", - 11734907767490759865, + "term", + "single-term", + 15263283599394646155, "TEXT", - "#/texts/85", + "#/texts/81", 1.0, - 6181919818947346503, - 7425024011998385797, + 2343820404875251124, + 4748486300187076231, 18446744073709551615, 18446744073709551615, - 244, - 253, - 244, - 253, - 37, - 39, + 45, + 56, + 45, + 56, + 10, + 11, true, - "is indeed", - "is indeed" + "constraints", + "constraints" ], [ "verb", "single-verb", - 11734907767490759865, + 15263283599394646155, "TEXT", - "#/texts/85", + "#/texts/81", 1.0, - 8106478500389476193, - 7333002514628894973, + 329104162099298038, + 2422170512955612338, 18446744073709551615, 18446744073709551615, - 15, - 22, - 15, - 22, - 2, - 3, + 27, + 33, + 27, + 33, + 6, + 7, true, - "running", - "running" + "didnt", + "didn't" ], [ - "verb", - "single-verb", - 11734907767490759865, + "conn", + "single-conn", + 15263283599394646155, "TEXT", - "#/texts/85", + "#/texts/81", 1.0, - 12178341415895564896, - 2069865895983944783, + 15441160910541485670, + 15469104452822855430, 18446744073709551615, 18446744073709551615, - 44, - 47, - 44, - 47, - 7, - 8, + 57, + 59, + 57, + 59, + 11, + 12, true, - "are", - "are" + "of", + "of" ], [ - "verb", - "single-verb", - 11734907767490759865, + "conn", + "single-conn", + 15263283599394646155, "TEXT", - "#/texts/85", + "#/texts/81", 1.0, - 6167774653473311671, - 6290589207758732495, + 16381206519425733256, + 10289373630862252080, 18446744073709551615, 18446744073709551615, - 56, - 65, - 56, - 65, - 9, + 38, + 44, + 38, + 44, + 8, 10, true, - "organized", - "organized" + "to the", + "to the" ], [ - "verb", - "single-verb", - 11734907767490759865, + "numval", + "fval", + 11417717357379295278, "TEXT", - "#/texts/85", + "#/texts/82", 1.0, - 14892592691709012982, - 6239765786557836013, + 12178341415896306586, + 2376192024093454144, 18446744073709551615, 18446744073709551615, - 100, - 111, - 100, - 111, - 17, - 19, + 0, + 3, + 0, + 3, + 0, + 1, true, - "can control", - "can control" + "4.2", + "4.2" ], [ - "verb", - "single-verb", - 11734907767490759865, + "numval", + "ival", + 9031137420247852045, "TEXT", - "#/texts/85", + "#/texts/83", 1.0, - 5946734708345938643, - 10385794168888707641, + 15441160910541481862, + 10500741044532715512, 18446744073709551615, 18446744073709551615, - 138, - 147, - 138, - 147, - 23, - 24, + 50, + 52, + 50, + 52, + 7, + 8, true, - "allocated", - "allocated" + "18", + "18" ], [ - "verb", - "single-verb", - 11734907767490759865, + "numval", + "ival", + 9031137420247852045, "TEXT", - "#/texts/85", + "#/texts/83", 1.0, - 6180152660545840784, - 10572035524213656485, + 15441160910541481863, + 10500741044517231196, 18446744073709551615, 18446744073709551615, - 177, - 186, - 177, - 186, - 28, - 29, + 155, + 157, + 155, + 157, + 24, + 25, true, - "depending", - "depending" + "19", + "19" ], [ - "verb", - "single-verb", - 11734907767490759865, + "expression", + "common", + 9031137420247852045, "TEXT", - "#/texts/85", + "#/texts/83", 1.0, - 6180164155127649426, - 12351505444317336995, + 15441160910541487324, + 10500757812195718645, 18446744073709551615, 18446744073709551615, - 259, - 268, - 259, - 268, - 40, - 41, + 121, + 126, + 121, + 126, + 18, + 19, true, - "demanding", - "demanding" + "eg", + "e. g." ], [ - "conn", - "single-conn", - 11734907767490759865, + "expression", + "word-concatenation", + 9031137420247852045, "TEXT", - "#/texts/85", + "#/texts/83", 1.0, - 6165459236568015364, - 15229237459031979782, + 14042857724397157868, + 17436499209420645038, 18446744073709551615, 18446744073709551615, - 87, - 96, - 87, - 96, - 14, + 95, + 105, + 95, + 105, + 15, 16, true, - "such that", - "such that" + "on-premise", + "on-premise" ], [ - "conn", - "single-conn", - 11734907767490759865, + "sentence", + "", + 9031137420247852045, "TEXT", - "#/texts/85", + "#/texts/83", 1.0, - 16381206560518651853, - 15245488941580331570, + 16473487772931696221, + 1361496787505182232, 18446744073709551615, 18446744073709551615, - 23, - 29, - 23, - 29, - 3, - 5, + 0, + 171, + 0, + 171, + 0, + 27, true, - "in the", - "in the" + "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution.", + "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution." ], [ - "conn", - "single-conn", - 11734907767490759865, + "sentence", + "", + 9031137420247852045, "TEXT", - "#/texts/85", + "#/texts/83", 1.0, - 15441160910541486538, - 16667658716295011841, + 13604474430867440219, + 15920079442920273776, 18446744073709551615, 18446744073709551615, - 66, - 68, - 66, - 68, - 10, - 11, + 172, + 302, + 172, + 302, + 27, + 48, true, - "in", - "in" + "Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", + "Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints." ], [ - "conn", - "single-conn", - 11734907767490759865, + "term", + "single-term", + 9031137420247852045, "TEXT", - "#/texts/85", + "#/texts/83", 1.0, - 15441160910541485670, - 16667656100672477854, + 4315218641775224883, + 3783623336096074444, 18446744073709551615, 18446744073709551615, - 125, - 127, - 125, - 127, - 21, - 22, + 30, + 49, + 30, + 49, + 5, + 7, true, - "of", - "of" + "Kubernetes clusters", + "Kubernetes clusters" ], [ - "conn", - "single-conn", - 11734907767490759865, + "term", + "single-term", + 9031137420247852045, "TEXT", - "#/texts/85", + "#/texts/83", 1.0, - 14637917333167503367, - 16189411727226984898, + 7578678502347528407, + 16606690075113593003, 18446744073709551615, 18446744073709551615, - 148, - 156, - 148, - 156, - 24, - 26, + 66, + 86, + 66, + 86, + 10, + 13, true, - "for each", - "for each" + "many cloud providers", + "many cloud providers" ], [ - "conn", - "single-conn", - 11734907767490759865, + "term", + "single-term", + 9031137420247852045, "TEXT", - "#/texts/85", + "#/texts/83", 1.0, - 15441160910541485678, - 16667656110763672808, + 17157390005033639285, + 14551521890127263578, 18446744073709551615, 18446744073709551615, - 187, - 189, - 187, - 189, - 29, - 30, + 95, + 119, + 95, + 119, + 15, + 17, true, - "on", - "on" + "on-premise installations", + "on-premise installations" ], [ - "conn", - "single-conn", - 11734907767490759865, + "term", + "single-term", + 9031137420247852045, "TEXT", - "#/texts/85", + "#/texts/83", 1.0, - 14634130760851708851, - 14161918089512738998, + 15250872047548077430, + 7534455339628786157, 18446744073709551615, 18446744073709551615, - 269, - 277, - 269, - 277, - 41, - 43, + 137, + 154, + 137, + 154, + 21, + 24, true, - "than the", - "than the" + "IBM Cloud Private", + "IBM Cloud Private" ], [ - "numval", - "ival", - 7845460979782401889, + "term", + "single-term", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 17767354399704235157, - 4252787363852102188, + 17140401278227586491, + 11321802952630178709, 18446744073709551615, 18446744073709551615, + 207, + 223, + 207, + 223, + 33, + 35, + true, + "storage services", + "storage services" + ], + [ + "term", + "single-term", + 9031137420247852045, + "TEXT", + "#/texts/83", + 1.0, + 4047423525975715659, + 7947778581648084546, + 18446744073709551615, + 18446744073709551615, + 248, + 260, + 248, + 260, 39, - 40, - 39, - 40, - 7, - 8, + 41, true, - "5", - "5" + "same cluster", + "same cluster" ], [ - "numval", - "ival", - 7845460979782401889, + "term", + "single-term", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 17767354399704235156, - 4252787353929282716, + 14814125365076808131, + 2443639570324462603, 18446744073709551615, 18446744073709551615, - 63, - 64, - 63, - 64, - 11, + 4, 12, + 4, + 12, + 1, + 2, true, - "4", - "4" + "platform", + "platform" ], [ - "numval", - "ival", - 7845460979782401889, + "term", + "single-term", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 17767354399704235152, - 4252787363047610230, + 15441160910541487324, + 10500757812195718645, 18446744073709551615, 18446744073709551615, - 79, - 80, - 79, - 80, - 15, - 16, + 121, + 126, + 121, + 126, + 18, + 19, true, - "8", - "8" + "eg", + "e. g." ], [ - "numval", - "ival", - 7845460979782401889, + "term", + "single-term", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 17767354399704235152, - 4252787363047621876, + 16659280385198228594, + 13641186927945667101, 18446744073709551615, 18446744073709551615, - 132, - 133, - 132, - 133, + 158, + 170, + 158, + 170, + 25, 26, - 27, true, - "8", - "8" + "distribution", + "distribution" ], [ - "numval", - "ival", - 7845460979782401889, + "term", + "single-term", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 17767354399704235157, - 4252787363852130202, + 13240311013633905449, + 2445508371176550978, 18446744073709551615, 18446744073709551615, - 342, - 343, - 342, - 343, - 63, - 64, + 189, + 201, + 189, + 201, + 30, + 31, true, - "5", - "5" + "requirements", + "requirements" ], [ - "sentence", - "", - 7845460979782401889, + "term", + "single-term", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 6583879206628208074, - 18118287548868180601, + 6165987386346442673, + 17011861032528540321, 18446744073709551615, 18446744073709551615, - 0, - 218, - 0, - 218, - 0, - 42, + 292, + 301, + 292, + 301, + 46, + 47, true, - "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks.", - "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks." + "endpoints", + "endpoints" ], [ - "sentence", - "", - 7845460979782401889, + "verb", + "compound-verb", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 4563023981242652318, - 13772109345187795922, + 12677136892665844646, + 2032089139232006155, 18446744073709551615, 18446744073709551615, - 219, - 331, - 219, - 331, - 42, - 61, + 224, + 236, + 224, + 236, + 35, + 37, true, - "Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment.", - "Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment." + "are launched", + "are launched" ], [ - "sentence", - "", - 7845460979782401889, + "verb", + "compound-verb", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 14978678634121360006, - 2618788565151427954, + 12855573301475655422, + 1573892996858218554, 18446744073709551615, 18446744073709551615, - 332, - 438, - 332, - 438, - 61, - 80, + 264, + 291, + 264, + 291, + 42, + 46, true, - "Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer.", - "Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer." + "linked to externally hosted", + "linked to externally hosted" ], [ - "term", - "enum-term-mark-2", - 7845460979782401889, + "verb", + "single-verb", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 2528135788265244608, - 5344652883238296137, + 15441160910541486535, + 10500757786703375297, 18446744073709551615, 18446744073709551615, - 179, - 211, - 179, - 211, - 36, - 40, + 13, + 15, + 13, + 15, + 2, + 3, true, - "learning training and prediction", - "learning training and prediction" + "is", + "is" ], [ - "term", - "enum-term-mark-2", - 7845460979782401889, + "verb", + "single-verb", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 14743433718696772273, - 15154440347503938098, + 329104159157798023, + 6671752901319384085, 18446744073709551615, 18446744073709551615, - 408, - 437, - 408, - 437, - 75, - 79, + 127, + 132, + 127, + 132, + 19, + 20, true, - "orchestration and store layer", - "orchestration and store layer" + "using", + "using" ], [ - "term", - "single-term", - 7845460979782401889, + "verb", + "single-verb", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 16269569412982647766, - 2024757386881102351, + 2906423210345501303, + 16309307450075852923, 18446744073709551615, 18446744073709551615, - 15, - 26, - 15, - 26, - 3, - 5, + 172, + 181, + 172, + 181, + 27, + 28, true, - "main system", - "main system" + "Depending", + "Depending" ], [ - "term", - "single-term", - 7845460979782401889, + "conn", + "single-conn", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 13444630328481412471, - 2581523576540413652, + 3013597407861734098, + 13645835485872550225, 18446744073709551615, 18446744073709551615, - 41, - 57, - 41, - 57, - 8, - 10, + 16, + 29, + 16, + 29, + 3, + 5, true, - "Kubernetes nodes", - "Kubernetes nodes" + "deployable on", + "deployable on" ], [ - "term", - "single-term", - 7845460979782401889, + "conn", + "single-conn", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 6563416156472488864, - 2213523264527249618, + 15601168207941439665, + 15242156125190384917, 18446744073709551615, 18446744073709551615, + 53, 65, - 74, + 53, 65, - 74, - 12, - 14, + 8, + 10, true, - "CPU cores", - "CPU cores" + "available on", + "available on" ], [ - "term", - "single-term", - 7845460979782401889, + "conn", + "single-conn", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 16269569728729474655, - 9476002452497745608, + 16381206566339127348, + 12939125612892018463, 18446744073709551615, 18446744073709551615, - 87, - 98, - 87, - 98, - 18, - 20, + 182, + 188, + 182, + 188, + 28, + 30, true, - "main memory", - "main memory" + "on the", + "on the" ], [ - "term", - "single-term", - 7845460979782401889, + "conn", + "single-conn", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 3613081198034507866, - 9231793791806678387, + 5386255170026914598, + 10161453367619815898, 18446744073709551615, 18446744073709551615, - 174, - 196, - 174, - 196, - 35, - 38, + 237, + 247, + 237, + 247, + 37, + 39, true, - "deep learning training", - "deep learning training" + "inside the", + "inside the" ], [ - "term", - "single-term", - 7845460979782401889, + "conn", + "single-conn", + 9031137420247852045, "TEXT", - "#/texts/86", + "#/texts/83", 1.0, - 11816234786078857760, - 13747687809735994367, + 15441160910541485865, + 10500757793681888901, 18446744073709551615, 18446744073709551615, - 201, - 217, - 201, - 217, - 39, - 41, + 271, + 273, + 271, + 273, + 43, + 44, true, - "prediction tasks", - "prediction tasks" + "to", + "to" ], [ - "term", - "single-term", - 7845460979782401889, + "expression", + "word-concatenation", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 1277611218502696979, - 1791114475320884340, + 14352418754681794071, + 3129213562618289639, 18446744073709551615, 18446744073709551615, - 229, - 245, - 229, - 245, - 45, - 47, + 192, + 212, + 192, + 212, + 35, + 36, true, - "flexible binding", - "flexible binding" + "parsing-microservice", + "parsing-microservice" ], [ - "term", - "single-term", - 7845460979782401889, + "sentence", + "", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 15130402117130351741, - 1356464078922122453, + 13622028562599160608, + 11838629416984110843, 18446744073709551615, 18446744073709551615, - 266, - 280, - 266, - 280, - 50, - 52, + 0, + 76, + 0, + 76, + 0, + 14, true, - "specific nodes", - "specific nodes" + "The common parts of all deployments are the interface and the compute layer.", + "The common parts of all deployments are the interface and the compute layer." ], [ - "term", - "single-term", - 7845460979782401889, + "sentence", + "", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 5422119649868232113, - 9343245046888260619, + 8982322851077994049, + 14574053650340581887, 18446744073709551615, 18446744073709551615, - 286, - 301, - 286, - 301, - 54, - 56, + 77, + 173, + 77, + 173, + 14, + 31, true, - "great advantage", - "great advantage" + "The compute layer is designed for dynamically adapt the number of resources on the current load.", + "The compute layer is designed for dynamically adapt the number of resources on the current load." ], [ - "term", - "single-term", - 7845460979782401889, + "sentence", + "", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 3499436126781074633, - 11463332423358967580, + 8231516465306254293, + 5376868082789191066, 18446744073709551615, 18446744073709551615, - 309, - 330, - 309, - 330, - 58, - 60, + 174, + 445, + 174, + 445, + 31, + 77, true, - "Kubernetes deployment", - "Kubernetes deployment" + "For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents.", + "For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents." ], [ "term", "single-term", - 7845460979782401889, + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 6641605220774829847, - 5925222761573375364, + 4575700335406167488, + 1041889512884127769, 18446744073709551615, 18446744073709551615, - 344, - 366, - 344, - 366, - 64, - 67, + 4, + 16, + 4, + 16, + 1, + 3, true, - "other virtual machines", - "other virtual machines" + "common parts", + "common parts" ], [ "term", "single-term", - 7845460979782401889, + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 3081516039280483029, - 12956841069716270529, + 5470814617574924291, + 1119950227308354530, 18446744073709551615, 18446744073709551615, - 426, - 437, - 426, - 437, - 77, - 79, + 62, + 75, + 62, + 75, + 11, + 13, true, - "store layer", - "store layer" + "compute layer", + "compute layer" ], [ "term", "single-term", - 7845460979782401889, + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 15441160910541479948, - 9352056469740640944, + 5470814617574924291, + 1119950227308355535, 18446744073709551615, 18446744073709551615, 81, - 83, + 94, 81, - 83, - 16, + 94, + 15, 17, true, - "GB", - "GB" + "compute layer", + "compute layer" ], [ "term", "single-term", - 7845460979782401889, + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 329104162118942300, - 7223786837846062912, + 5679217233562387039, + 10306293013634943918, 18446744073709551615, 18446744073709551615, - 126, - 131, - 126, - 131, - 25, - 26, + 160, + 172, + 160, + 172, + 28, + 30, true, - "POWER", - "POWER" + "current load", + "current load" ], [ "term", "single-term", - 7845460979782401889, + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 389609625621164460, - 9971368100247265020, + 7165121732645597150, + 17919093041593160462, 18446744073709551615, 18446744073709551615, - 134, - 138, - 134, - 138, - 27, - 28, + 192, + 222, + 192, + 222, + 35, + 37, true, - "node", - "node" + "parsing-microservice instances", + "parsing-microservice instances" ], [ "term", "single-term", - 7845460979782401889, + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 389609625538377862, - 10024562876350301328, + 11579811611053762862, + 5792740568999225626, 18446744073709551615, 18446744073709551615, - 149, - 153, - 149, - 153, - 30, - 31, + 247, + 261, + 247, + 261, + 42, + 44, true, - "GPUs", - "GPUs" + "large document", + "large document" ], [ "term", "single-term", - 7845460979782401889, + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 990358581043194791, - 3870495549619153573, + 5574297910769420540, + 7415408366124113138, 18446744073709551615, 18446744073709551615, - 249, - 262, - 249, - 262, - 48, - 49, + 374, + 390, + 374, + 390, + 66, + 68, true, - "microservices", - "microservices" + "other components", + "other components" ], [ "term", "single-term", - 7845460979782401889, + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 14635102416861801722, - 13295514760162013366, + 1526165531385019099, + 3702094867294947886, 18446744073709551615, 18446744073709551615, - 392, - 400, - 392, - 400, - 72, - 73, + 24, + 35, + 24, + 35, + 5, + 6, true, - "services", - "services" + "deployments", + "deployments" ], [ "term", "single-term", - 7845460979782401889, + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 4327709553742697698, - 1943965779924640606, + 6182600923960960908, + 8662044929949820827, 18446744073709551615, 18446744073709551615, - 408, - 421, - 408, - 421, - 75, - 76, + 44, + 53, + 44, + 53, + 8, + 9, true, - "orchestration", - "orchestration" + "interface", + "interface" ], [ - "verb", - "compound-verb", - 7845460979782401889, + "term", + "single-term", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 16914551794403134749, - 14289767005593609982, + 16381206574973295053, + 6957832894321474609, 18446744073709551615, 18446744073709551615, - 154, - 169, - 154, - 169, - 31, - 34, + 133, + 139, + 133, + 139, + 23, + 24, true, - "is dedicated to", - "is dedicated to" + "number", + "number" ], [ - "verb", - "compound-verb", - 7845460979782401889, + "term", + "single-term", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 556334503717216086, - 14184460239514695626, + 6168338487309432467, + 14015407302848006245, 18446744073709551615, 18446744073709551615, - 367, - 387, - 367, - 387, - 67, - 71, + 143, + 152, + 143, + 152, + 25, + 26, true, - "are employed to host", - "are employed to host" + "resources", + "resources" ], [ - "verb", - "single-verb", - 7845460979782401889, + "term", + "single-term", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 14814150617868433693, - 2828843033811804789, + 8106397496085150773, + 1241946393555377686, 18446744073709551615, 18446744073709551615, - 27, - 35, - 27, - 35, - 5, - 6, + 178, + 185, + 178, + 185, + 32, + 33, true, - "operates", - "operates" + "example", + "example" ], [ - "verb", - "single-verb", - 7845460979782401889, + "term", + "single-term", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 15441160910541486535, - 9351998670672415825, + 12178341415895456504, + 15511050211190565407, 18446744073709551615, 18446744073709551615, - 281, - 283, - 281, - 283, - 52, - 53, + 320, + 323, + 320, + 323, + 54, + 55, true, - "is", - "is" + "end", + "end" ], [ - "conn", - "single-conn", - 7845460979782401889, + "term", + "single-term", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 15441160910541485678, - 9351998466637895866, + 389609625631210899, + 6431357524637287554, 18446744073709551615, 18446744073709551615, - 36, - 38, - 36, - 38, - 6, - 7, + 331, + 335, + 331, + 335, + 57, + 58, true, - "on", - "on" + "task", + "task" ], [ - "conn", - "single-conn", - 7845460979782401889, + "term", + "single-term", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 389609625618037948, - 9971074391070552914, + 6168338487309432467, + 14015407302847964601, 18446744073709551615, 18446744073709551615, - 58, - 62, - 58, + 351, + 360, + 351, + 360, 62, - 10, - 11, + 63, true, - "with", - "with" + "resources", + "resources" ], [ - "conn", - "single-conn", - 7845460979782401889, + "term", + "single-term", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 15441160910541485670, - 9351998465977112201, + 14634153919632515335, + 2667013412527336630, 18446744073709551615, 18446744073709551615, - 84, - 86, - 84, - 86, - 17, - 18, + 397, + 405, + 397, + 405, + 70, + 71, true, - "of", - "of" + "training", + "training" ], [ - "conn", - "single-conn", - 7845460979782401889, + "term", + "single-term", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 389609625618037948, - 9971074391070570820, + 6167933651658664291, + 16134555370198793815, 18446744073709551615, 18446744073709551615, - 139, - 143, - 139, - 143, - 28, - 29, + 435, + 444, + 435, + 444, + 75, + 76, true, - "with", - "with" + "documents", + "documents" ], [ - "conn", - "single-conn", - 7845460979782401889, + "verb", + "compound-verb", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 15441160910541485670, - 9351998465978041953, + 9165036765200707500, + 14015747365861821834, 18446744073709551615, 18446744073709551615, - 246, - 248, - 246, - 248, - 47, - 48, + 95, + 106, + 95, + 106, + 17, + 19, true, - "of", - "of" + "is designed", + "is designed" ], [ - "conn", - "single-conn", - 7845460979782401889, + "verb", + "compound-verb", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 16381206565712212855, - 15798433650653459254, + 14891459320562646805, + 9156075874686645300, 18446744073709551615, 18446744073709551615, - 302, - 308, - 302, - 308, - 56, - 58, + 223, + 239, + 223, + 239, + 37, + 40, true, - "of the", - "of the" + "could be spawned", + "could be spawned" ], [ - "conn", - "single-conn", - 7845460979782401889, + "verb", + "compound-verb", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 16381206560518651853, - 15995808466227689457, + 9165313840036679968, + 18156907374285878295, 18446744073709551615, 18446744073709551615, - 401, - 407, - 401, - 407, - 73, - 75, + 262, + 273, + 262, + 273, + 44, + 46, true, - "in the", - "in the" + "is uploaded", + "is uploaded" ], [ - "conn", - "single-conn", - 7845460979782401889, + "verb", + "single-verb", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 16381206519425733256, - 10817242341263362701, + 12178341415895564896, + 15510992411047215180, 18446744073709551615, 18446744073709551615, - 167, - 173, - 167, - 173, - 33, - 35, + 36, + 39, + 36, + 39, + 6, + 7, true, - "to the", - "to the" + "are", + "are" ], [ - "conn", - "single-conn", - 7845460979782401889, + "verb", + "single-verb", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 15441160910541485865, - 9351998575526350776, + 329104159173548808, + 1957182172439438990, 18446744073709551615, 18446744073709551615, - 263, - 265, - 263, - 265, - 49, - 50, + 123, + 128, + 123, + 128, + 21, + 22, true, - "to", - "to" + "adapt", + "adapt" ], [ - "conn", - "single-conn", - 7845460979782401889, + "verb", + "single-verb", + 18436578077535696718, "TEXT", - "#/texts/86", + "#/texts/84", 1.0, - 15441160910541485865, - 9351998575526358427, + 16381206579178669319, + 6938028188806274364, 18446744073709551615, 18446744073709551615, - 380, - 382, - 380, - 382, - 69, - 70, + 301, + 307, + 301, + 307, + 50, + 51, true, - "to", - "to" + "scaled", + "scaled" ], [ - "numval", - "fval", - 17769988780693768120, + "verb", + "single-verb", + 18436578077535696718, "TEXT", - "#/texts/87", + "#/texts/84", 1.0, - 12178341415896306587, - 11831950895164487341, + 12178341415895564896, + 15510992411043340629, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 361, + 364, + 361, + 364, + 63, + 64, true, - "4.3", - "4.3" + "are", + "are" ], [ - "numval", - "ival", - 12387489643011067991, + "verb", + "single-verb", + 18436578077535696718, "TEXT", - "#/texts/88", + "#/texts/84", 1.0, - 17767354399704235159, - 6323623135901186785, + 5615554093848987331, + 14336271604284028764, 18446744073709551615, 18446744073709551615, - 258, - 259, - 258, - 259, - 48, - 49, + 410, + 420, + 410, + 420, + 72, + 73, true, - "7", - "7" + "assembling", + "assembling" ], [ - "sentence", - "", - 12387489643011067991, + "verb", + "single-verb", + 18436578077535696718, "TEXT", - "#/texts/88", + "#/texts/84", 1.0, - 11703520391970010536, - 357834892882144608, + 6171728176299542016, + 3957840262356218692, 18446744073709551615, 18446744073709551615, - 0, - 56, - 0, - 56, - 0, - 11, + 425, + 434, + 425, + 434, + 74, + 75, true, - "Let us now discuss some scaling results on our platform.", - "Let us now discuss some scaling results on our platform." + "processed", + "processed" ], [ - "sentence", - "", - 12387489643011067991, + "conn", + "single-conn", + 18436578077535696718, "TEXT", - "#/texts/88", + "#/texts/84", 1.0, - 8157900891627315247, - 15072204804485786404, + 6165459236568015364, + 6061326913510427821, 18446744073709551615, 18446744073709551615, - 57, - 247, - 57, - 247, - 11, - 46, + 337, + 346, + 337, + 346, + 59, + 61, true, - "As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources.", - "As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources." + "such that", + "such that" ], [ - "term", - "single-term", - 12387489643011067991, + "conn", + "single-conn", + 18436578077535696718, "TEXT", - "#/texts/88", + "#/texts/84", 1.0, - 4421383392096991748, - 9783447214836928971, + 14637917385401805410, + 5428309875437807875, 18446744073709551615, 18446744073709551615, - 229, - 246, - 229, - 246, - 43, - 45, + 365, + 373, + 365, + 373, + 64, + 66, true, - "compute resources", - "compute resources" + "free for", + "free for" ], [ - "term", - "single-term", - 12387489643011067991, + "conn", + "single-conn", + 18436578077535696718, "TEXT", - "#/texts/88", + "#/texts/84", 1.0, - 8106478445190161533, - 16203321325185840639, + 16381206565712007226, + 7724692166486276181, 18446744073709551615, 18446744073709551615, - 32, - 39, - 32, - 39, - 6, - 7, + 17, + 23, + 17, + 23, + 3, + 5, true, - "results", - "results" + "of all", + "of all" ], [ - "term", - "single-term", - 12387489643011067991, + "conn", + "single-conn", + 18436578077535696718, "TEXT", - "#/texts/88", + "#/texts/84", 1.0, - 14814125365076808131, - 8660743237002823027, + 12178341415895625940, + 15510989710886502910, 18446744073709551615, 18446744073709551615, - 47, - 55, - 47, - 55, - 9, - 10, + 107, + 110, + 107, + 110, + 19, + 20, true, - "platform", - "platform" + "for", + "for" ], [ - "term", - "single-term", - 12387489643011067991, + "conn", + "single-conn", + 18436578077535696718, "TEXT", - "#/texts/88", + "#/texts/84", 1.0, - 5948159060234732715, - 1856499645219012237, + 15441160910541485670, + 18358916429929728660, 18446744073709551615, 18446744073709551615, - 82, - 91, - 82, - 91, - 17, - 18, + 140, + 142, + 140, + 142, + 24, + 25, true, - "beginning", - "beginning" + "of", + "of" ], [ - "term", - "single-term", - 12387489643011067991, + "conn", + "single-conn", + 18436578077535696718, "TEXT", - "#/texts/88", + "#/texts/84", 1.0, - 8106478708629288965, - 9823809706360263062, + 16381206566339127348, + 7820316500598827267, 18446744073709551615, 18446744073709551615, - 99, - 106, - 99, - 106, - 20, - 21, + 153, + 159, + 153, + 159, + 26, + 28, true, - "section", - "section" + "on the", + "on the" ], [ - "term", - "single-term", - 12387489643011067991, + "conn", + "single-conn", + 18436578077535696718, "TEXT", - "#/texts/88", + "#/texts/84", 1.0, - 13240311013633905449, - 13295854927356281099, + 12178341415896108722, + 15510983418444691685, 18446744073709551615, 18446744073709551615, - 112, - 124, - 112, - 124, - 23, - 24, + 174, + 177, + 174, + 177, + 31, + 32, true, - "requirements", - "requirements" + "For", + "For" ], [ - "term", - "single-term", - 12387489643011067991, + "conn", + "single-conn", + 18436578077535696718, "TEXT", - "#/texts/88", + "#/texts/84", 1.0, - 14814125365076808131, - 8660743237002811080, + 16381206568372064271, + 10868552521626828999, 18446744073709551615, 18446744073709551615, - 133, - 141, - 133, - 141, - 26, - 27, + 313, + 319, + 313, + 319, + 52, + 54, true, - "platform", - "platform" + "at the", + "at the" + ], + [ + "conn", + "single-conn", + 18436578077535696718, + "TEXT", + "#/texts/84", + 1.0, + 16381206565712212855, + 7724660172286794609, + 18446744073709551615, + 18446744073709551615, + 324, + 330, + 324, + 330, + 55, + 57, + true, + "of the", + "of the" + ], + [ + "conn", + "single-conn", + 18436578077535696718, + "TEXT", + "#/texts/84", + 1.0, + 389609625633313393, + 6432600486678620238, + 18446744073709551615, + 18446744073709551615, + 392, + 396, + 392, + 396, + 69, + 70, + true, + "like", + "like" + ], + [ + "sentence", + "", + 11734907767490759865, + "TEXT", + "#/texts/85", + 1.0, + 9089347946185436978, + 12322779939098932937, + 18446744073709551615, + 18446744073709551615, + 0, + 223, + 0, + 223, + 0, + 34, + true, + "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements.", + "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements." + ], + [ + "sentence", + "", + 11734907767490759865, + "TEXT", + "#/texts/85", + 1.0, + 5619374035914941215, + 13388056321170730470, + 18446744073709551615, + 18446744073709551615, + 224, + 307, + 224, + 307, + 34, + 47, + true, + "The parse component is indeed more demanding than the simple annotation components.", + "The parse component is indeed more demanding than the simple annotation components." ], [ "term", "single-term", - 12387489643011067991, + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 16381206521526353544, - 17652704280141269029, + 5470814617574924291, + 8565468289586536983, 18446744073709551615, 18446744073709551615, - 160, - 166, - 160, - 166, 30, - 31, + 43, + 30, + 43, + 5, + 7, true, - "regard", - "regard" + "compute layer", + "compute layer" ], [ "term", "single-term", - 12387489643011067991, + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 16381206574973295053, - 10204329654469875979, + 220880112941331342, + 10757099033331540513, 18446744073709551615, 18446744073709551615, - 174, - 180, - 174, - 180, - 33, - 34, + 69, + 85, + 69, + 85, + 11, + 13, true, - "number", - "number" + "different queues", + "different queues" ], [ "term", "single-term", - 12387489643011067991, + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 329104159157820437, - 7766615889727787026, + 5487575286069153569, + 13394910817957544454, 18446744073709551615, 18446744073709551615, - 184, - 189, - 184, - 189, - 35, - 36, + 157, + 176, + 157, + 176, + 26, + 28, true, - "users", - "users" + "different component", + "different component" ], [ "term", "single-term", - 12387489643011067991, + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 16381206574973295053, - 10204329654469873734, + 8988400645948795194, + 16485622837841306210, 18446744073709551615, 18446744073709551615, - 195, - 201, - 195, - 201, - 38, - 39, + 196, + 222, + 196, + 222, + 31, + 33, true, - "number", - "number" + "computational requirements", + "computational requirements" ], [ "term", "single-term", - 12387489643011067991, + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 6167933651658664291, - 11552308682759832261, + 5037855592482871690, + 16562149494420733590, 18446744073709551615, 18446744073709551615, - 215, - 224, - 215, - 224, - 41, - 42, + 228, + 243, + 228, + 243, + 35, + 37, true, - "documents", - "documents" + "parse component", + "parse component" ], [ - "verb", - "compound-verb", - 12387489643011067991, + "term", + "single-term", + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 17858839733535377008, - 1920907252286073734, + 1786684417185012154, + 17458883524654231766, 18446744073709551615, 18446744073709551615, - 142, - 154, - 142, - 154, - 27, - 29, + 278, + 306, + 278, + 306, + 43, + 46, true, - "were scaling", - "were scaling" + "simple annotation components", + "simple annotation components" ], [ - "verb", - "single-verb", - 12387489643011067991, + "term", + "single-term", + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 12178341415896275389, - 7943964340963228966, + 2703018952916355661, + 2103701956309679472, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, + 4, + 14, + 4, + 14, 1, + 2, true, - "Let", - "Let" + "components", + "components" ], [ - "verb", - "single-verb", - 12387489643011067991, + "term", + "single-term", + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 8106397868479560363, - 17872032239065412285, + 14637917407223052431, + 233568237166781340, 18446744073709551615, 18446744073709551615, - 11, - 18, - 11, - 18, - 3, - 4, + 116, + 124, + 116, + 124, + 20, + 21, true, - "discuss", - "discuss" + "fraction", + "fraction" ], [ - "verb", - "single-verb", - 12387489643011067991, + "term", + "single-term", + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 8106478648771436891, - 8278854775081679845, + 6168338487309432467, + 7308226084005327662, 18446744073709551615, 18446744073709551615, - 24, - 31, - 24, - 31, - 5, - 6, + 128, + 137, + 128, + 137, + 22, + 23, true, - "scaling", - "scaling" + "resources", + "resources" ], [ "verb", - "single-verb", - 12387489643011067991, + "compound-verb", + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 8106476015433464060, - 11919317671569973370, + 6181919818947346503, + 7425024011998385797, 18446744073709551615, 18446744073709551615, - 63, - 70, - 63, - 70, - 13, - 14, + 244, + 253, + 244, + 253, + 37, + 39, true, - "pointed", - "pointed" + "is indeed", + "is indeed" ], [ "verb", "single-verb", - 12387489643011067991, + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 6171728176299542016, - 13908821060191020107, + 8106478500389476193, + 7333002514628894973, 18446744073709551615, 18446744073709551615, - 205, - 214, - 205, - 214, - 40, - 41, + 15, + 22, + 15, + 22, + 2, + 3, true, - "processed", - "processed" + "running", + "running" ], [ - "conn", - "single-conn", - 12387489643011067991, + "verb", + "single-verb", + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 15441160910541485678, - 1498640040591994871, + 12178341415895564896, + 2069865895983944783, 18446744073709551615, 18446744073709551615, - 40, - 42, - 40, - 42, + 44, + 47, + 44, + 47, 7, 8, true, - "on", - "on" + "are", + "are" ], [ - "conn", - "single-conn", - 12387489643011067991, + "verb", + "single-verb", + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 15441160910541480533, - 1498641809232943552, + 6167774653473311671, + 6290589207758732495, 18446744073709551615, 18446744073709551615, - 57, - 59, - 57, - 59, - 11, - 12, + 56, + 65, + 56, + 65, + 9, + 10, true, - "As", - "As" + "organized", + "organized" ], [ - "conn", - "single-conn", - 12387489643011067991, + "verb", + "single-verb", + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 16381206560518651853, - 18249880271754047870, + 14892592691709012982, + 6239765786557836013, 18446744073709551615, 18446744073709551615, - 75, - 81, - 75, - 81, - 15, + 100, + 111, + 100, + 111, 17, + 19, true, - "in the", - "in the" + "can control", + "can control" ], [ - "conn", - "single-conn", - 12387489643011067991, + "verb", + "single-verb", + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 16381206565712212855, - 14962824842694991931, + 5946734708345938643, + 10385794168888707641, 18446744073709551615, 18446744073709551615, - 92, - 98, - 92, - 98, - 18, - 20, + 138, + 147, + 138, + 147, + 23, + 24, true, - "of the", - "of the" + "allocated", + "allocated" ], [ - "conn", - "single-conn", - 12387489643011067991, + "verb", + "single-verb", + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 8106397727991264470, - 14674040507008400644, + 6180152660545840784, + 10572035524213656485, 18446744073709551615, 18446744073709551615, - 125, - 132, - 125, - 132, - 24, - 26, + 177, + 186, + 177, + 186, + 28, + 29, true, - "for the", - "for the" + "depending", + "depending" ], [ - "conn", - "single-conn", - 12387489643011067991, + "verb", + "single-verb", + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 389609625618037948, - 16164147193980002015, + 6180164155127649426, + 12351505444317336995, 18446744073709551615, 18446744073709551615, - 155, - 159, - 155, - 159, - 29, - 30, + 259, + 268, + 259, + 268, + 40, + 41, true, - "with", - "with" + "demanding", + "demanding" ], [ "conn", "single-conn", - 12387489643011067991, + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 15441160910541485670, - 1498640040717785247, + 6165459236568015364, + 15229237459031979782, 18446744073709551615, 18446744073709551615, - 181, - 183, - 181, - 183, - 34, - 35, + 87, + 96, + 87, + 96, + 14, + 16, true, - "of", - "of" + "such that", + "such that" ], [ "conn", "single-conn", - 12387489643011067991, + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 15441160910541485670, - 1498640040717786992, + 16381206560518651853, + 15245488941580331570, 18446744073709551615, 18446744073709551615, - 202, - 204, - 202, - 204, - 39, - 40, + 23, + 29, + 23, + 29, + 3, + 5, true, - "of", - "of" + "in the", + "in the" ], [ "conn", "single-conn", - 12387489643011067991, + 11734907767490759865, "TEXT", - "#/texts/88", + "#/texts/85", 1.0, - 16381206519425733256, - 17614093764484085203, + 15441160910541486538, + 16667658716295011841, 18446744073709551615, 18446744073709551615, - 167, - 173, - 167, - 173, - 31, - 33, + 66, + 68, + 66, + 68, + 10, + 11, true, - "to the", - "to the" + "in", + "in" ], [ - "numval", - "year", - 10375772475809458895, + "conn", + "single-conn", + 11734907767490759865, "TEXT", - "#/texts/89", + "#/texts/85", 1.0, - 389609625548777057, - 52173736134253972, + 15441160910541485670, + 16667656100672477854, 18446744073709551615, 18446744073709551615, - 172, - 176, - 172, - 176, - 35, - 36, + 125, + 127, + 125, + 127, + 21, + 22, true, - "2017", - "2017" + "of", + "of" ], [ - "numval", - "ival", - 10375772475809458895, + "conn", + "single-conn", + 11734907767490759865, "TEXT", - "#/texts/89", + "#/texts/85", 1.0, - 15441160910541481786, - 6866904732321432818, + 14637917333167503367, + 16189411727226984898, 18446744073709551615, 18446744073709551615, - 6, - 8, - 6, - 8, - 1, - 2, + 148, + 156, + 148, + 156, + 24, + 26, true, - "20", - "20" + "for each", + "for each" ], [ - "sentence", - "", - 10375772475809458895, + "conn", + "single-conn", + 11734907767490759865, "TEXT", - "#/texts/89", + "#/texts/85", 1.0, - 7518545641936550538, - 13327477621696107517, + 15441160910541485678, + 16667656110763672808, 18446744073709551615, 18446744073709551615, - 32, - 177, - 32, - 177, - 8, - 37, + 187, + 189, + 187, + 189, + 29, + 30, true, - "As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017.", - "As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017." + "on", + "on" ], [ - "sentence", - "", - 10375772475809458895, + "conn", + "single-conn", + 11734907767490759865, "TEXT", - "#/texts/89", + "#/texts/85", 1.0, - 10434596786350098942, - 4246700910361462765, + 14634130760851708851, + 14161918089512738998, 18446744073709551615, 18446744073709551615, - 178, - 363, - 178, - 363, - 37, - 71, + 269, + 277, + 269, + 277, + 41, + 43, true, - "It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time.", - "It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time." + "than the", + "than the" ], [ - "sentence", - "", - 10375772475809458895, + "numval", + "ival", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 15858606414680046310, - 9355067359629881245, + 17767354399704235157, + 4252787363852102188, 18446744073709551615, 18446744073709551615, - 364, - 504, - 364, - 504, - 71, - 99, + 39, + 40, + 39, + 40, + 7, + 8, true, - "Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", - "Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity." + "5", + "5" ], [ - "term", - "single-term", - 10375772475809458895, + "numval", + "ival", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 2903324788977241891, - 10047065559827135054, + 17767354399704235156, + 4252787353929282716, 18446744073709551615, 18446744073709551615, - 82, - 91, - 82, - 91, - 19, - 21, + 63, + 64, + 63, + 64, + 11, + 12, true, - "PDF pages", - "PDF pages" + "4", + "4" ], [ - "term", - "single-term", - 10375772475809458895, + "numval", + "ival", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 2245603532715892325, - 16478559053695087323, + 17767354399704235152, + 4252787363047610230, 18446744073709551615, 18446744073709551615, - 226, - 237, - 226, - 237, - 46, - 48, + 79, + 80, + 79, + 80, + 15, + 16, true, - "sharp steps", - "sharp steps" + "8", + "8" ], [ - "term", - "single-term", - 10375772475809458895, + "numval", + "ival", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 11942859038914222878, - 15085431028673446657, + 17767354399704235152, + 4252787363047621876, 18446744073709551615, 18446744073709551615, - 286, - 301, - 286, - 301, - 56, - 58, + 132, + 133, + 132, + 133, + 26, + 27, true, - "massive amounts", - "massive amounts" + "8", + "8" ], [ - "term", - "single-term", - 10375772475809458895, + "numval", + "ival", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 7252014402665196659, - 15261261573536593307, + 17767354399704235157, + 4252787363852130202, 18446744073709551615, 18446744073709551615, 342, - 354, + 343, 342, - 354, - 66, - 68, + 343, + 63, + 64, true, - "small amount", - "small amount" + "5", + "5" ], [ - "term", - "single-term", - 10375772475809458895, + "sentence", + "", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 2245697320636800498, - 85672314451497322, + 6583879206628208074, + 18118287548868180601, 18446744073709551615, 18446744073709551615, - 472, - 483, - 472, - 483, - 93, - 95, + 0, + 218, + 0, + 218, + 0, + 42, true, - "short burst", - "short burst" + "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks.", + "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks." ], [ - "term", - "single-term", - 10375772475809458895, + "sentence", + "", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 16558536334265483368, - 11847226154466128513, + 4563023981242652318, + 13772109345187795922, 18446744073709551615, 18446744073709551615, - 487, - 503, - 487, - 503, - 96, - 98, + 219, + 331, + 219, + 331, + 42, + 61, true, - "extreme activity", - "extreme activity" + "Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment.", + "Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment." ], [ - "term", - "single-term", - 10375772475809458895, + "sentence", + "", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 16381206574973295053, - 8846230013490521873, + 14978678634121360006, + 2618788565151427954, 18446744073709551615, 18446744073709551615, - 52, - 58, - 52, - 58, - 14, - 15, + 332, + 438, + 332, + 438, + 61, + 80, true, - "number", - "number" + "Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer.", + "Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer." ], [ "term", - "single-term", - 10375772475809458895, + "enum-term-mark-2", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 329104159157820437, - 8249018447781337774, + 2528135788265244608, + 5344652883238296137, 18446744073709551615, 18446744073709551615, - 62, - 67, - 62, - 67, - 16, - 17, + 179, + 211, + 179, + 211, + 36, + 40, true, - "users", - "users" + "learning training and prediction", + "learning training and prediction" ], [ "term", - "single-term", - 10375772475809458895, + "enum-term-mark-2", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 389609625631241985, - 100194020203438184, + 14743433718696772273, + 15154440347503938098, 18446744073709551615, 18446744073709551615, - 126, - 130, - 126, - 130, - 26, - 27, + 408, + 437, + 408, + 437, + 75, + 79, true, - "time", - "time" + "orchestration and store layer", + "orchestration and store layer" ], [ "term", "single-term", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 16381206590630165717, - 11719327657280904751, + 16269569412982647766, + 2024757386881102351, 18446744073709551615, 18446744073709551615, - 141, - 147, - 141, - 147, - 29, - 30, + 15, + 26, + 15, + 26, + 3, + 5, true, - "launch", - "launch" + "main system", + "main system" ], [ "term", "single-term", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 8106478708506632112, - 18231497537744338632, + 13444630328481412471, + 2581523576540413652, 18446744073709551615, 18446744073709551615, - 155, - 162, - 155, - 162, - 32, - 33, + 41, + 57, + 41, + 57, + 8, + 10, true, - "service", - "service" + "Kubernetes nodes", + "Kubernetes nodes" ], [ "term", "single-term", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 329104161963544245, - 9857237958615296698, + 6563416156472488864, + 2213523264527249618, 18446744073709551615, 18446744073709551615, - 166, - 171, - 166, - 171, - 34, - 35, + 65, + 74, + 65, + 74, + 12, + 14, true, - "April", - "April" + "CPU cores", + "CPU cores" ], [ "term", "single-term", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 329104159157820437, - 8249018447781333663, + 16269569728729474655, + 9476002452497745608, 18446744073709551615, 18446744073709551615, - 260, - 265, - 260, - 265, - 52, - 53, + 87, + 98, + 87, + 98, + 18, + 20, true, - "users", - "users" + "main memory", + "main memory" ], [ "term", "single-term", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 6167933651658664291, - 12240764636372283946, + 3613081198034507866, + 9231793791806678387, 18446744073709551615, 18446744073709551615, - 305, - 314, - 305, - 314, - 59, - 60, + 174, + 196, + 174, + 196, + 35, + 38, true, - "documents", - "documents" + "deep learning training", + "deep learning training" ], [ "term", "single-term", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 8106478708506632112, - 18231497537744787108, + 11816234786078857760, + 13747687809735994367, 18446744073709551615, 18446744073709551615, - 324, - 331, - 324, - 331, - 62, - 63, + 201, + 217, + 201, + 217, + 39, + 41, true, - "service", - "service" + "prediction tasks", + "prediction tasks" ], [ "term", "single-term", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 389609625631241985, - 100194020203425198, + 1277611218502696979, + 1791114475320884340, 18446744073709551615, 18446744073709551615, - 358, - 362, - 358, - 362, - 69, - 70, + 229, + 245, + 229, + 245, + 45, + 47, true, - "time", - "time" + "flexible binding", + "flexible binding" ], [ "term", "single-term", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 16381206568241679420, - 9368345895491961575, + 15130402117130351741, + 1356464078922122453, 18446744073709551615, 18446744073709551615, - 375, - 381, - 375, - 381, - 74, - 75, + 266, + 280, + 266, + 280, + 50, + 52, true, - "design", - "design" + "specific nodes", + "specific nodes" ], [ "term", "single-term", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 8106476000253296785, - 450489361038114021, + 5422119649868232113, + 9343245046888260619, 18446744073709551615, 18446744073709551615, - 396, - 403, - 396, - 403, - 80, - 81, + 286, + 301, + 286, + 301, + 54, + 56, true, - "problem", - "problem" + "great advantage", + "great advantage" ], [ "term", "single-term", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 329104161666914718, - 9871052204414646047, + 3499436126781074633, + 11463332423358967580, 18446744073709551615, 18446744073709551615, - 425, - 430, - 425, - 430, - 84, - 85, + 309, + 330, + 309, + 330, + 58, + 60, true, - "peaks", - "peaks" + "Kubernetes deployment", + "Kubernetes deployment" ], [ "term", "single-term", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 8106478708506632112, - 18231497537744780505, + 6641605220774829847, + 5925222761573375364, 18446744073709551615, 18446744073709551615, - 439, - 446, - 439, - 446, - 87, - 88, + 344, + 366, + 344, + 366, + 64, + 67, true, - "service", - "service" + "other virtual machines", + "other virtual machines" ], [ - "verb", - "compound-verb", - 10375772475809458895, + "term", + "single-term", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 11953671505157202285, - 11937015333801488817, + 3081516039280483029, + 12956841069716270529, 18446744073709551615, 18446744073709551615, - 92, - 120, - 92, - 120, - 21, - 25, + 426, + 437, + 426, + 437, + 77, + 79, true, - "has been increasing steadily", - "has been increasing steadily" + "store layer", + "store layer" ], [ - "verb", - "compound-verb", - 10375772475809458895, + "term", + "single-term", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 15603889104119874938, - 7803556645500016268, + 15441160910541479948, + 9352056469740640944, 18446744073709551615, 18446744073709551615, - 181, - 191, - 181, - 191, - 38, - 40, + 81, + 83, + 81, + 83, + 16, + 17, true, - "is however", - "is however" + "GB", + "GB" ], [ - "verb", - "compound-verb", - 10375772475809458895, + "term", + "single-term", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 7806959182595507225, - 2570245507595885020, + 329104162118942300, + 7223786837846062912, 18446744073709551615, 18446744073709551615, - 266, - 285, - 266, - 285, - 53, - 56, + 126, + 131, + 126, + 131, + 25, + 26, true, - "have been uploading", - "have been uploading" + "POWER", + "POWER" ], [ - "verb", - "compound-verb", - 10375772475809458895, + "term", + "single-term", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 8106477873809970266, - 16668743459306840426, + 389609625621164460, + 9971368100247265020, 18446744073709551615, 18446744073709551615, - 386, - 393, - 386, - 393, - 77, - 79, + 134, + 138, + 134, + 138, + 27, + 28, true, - "was not", - "was not" + "node", + "node" ], [ - "verb", - "single-verb", - 10375772475809458895, + "term", + "single-term", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 8106397812083771063, - 12986611233641368860, + 389609625538377862, + 10024562876350301328, 18446744073709551615, 18446744073709551615, - 39, - 46, - 39, - 46, - 10, - 12, + 149, + 153, + 149, + 153, + 30, + 31, true, - "can see", - "can see" + "GPUs", + "GPUs" ], [ - "verb", - "single-verb", - 10375772475809458895, + "term", + "single-term", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 6171728176299542016, - 11463438981088562167, + 990358581043194791, + 3870495549619153573, 18446744073709551615, 18446744073709551615, - 72, - 81, - 72, - 81, - 18, - 19, + 249, + 262, + 249, + 262, + 48, + 49, true, - "processed", - "processed" + "microservices", + "microservices" ], [ - "verb", - "single-verb", - 10375772475809458895, + "term", + "single-term", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 12178341415895638617, - 17393490097429873872, + 14635102416861801722, + 13295514760162013366, 18446744073709551615, 18446744073709551615, - 207, - 210, - 207, - 210, - 42, - 43, + 392, + 400, + 392, + 400, + 72, + 73, true, - "see", - "see" + "services", + "services" ], [ - "verb", - "single-verb", - 10375772475809458895, + "term", + "single-term", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 12178341415895564896, - 17393508726112862574, + 4327709553742697698, + 1943965779924640606, 18446744073709551615, 18446744073709551615, - 222, - 225, - 222, - 225, - 45, - 46, + 408, + 421, + 408, + 421, + 75, + 76, true, - "are", - "are" + "orchestration", + "orchestration" ], [ "verb", - "single-verb", - 10375772475809458895, + "compound-verb", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 5581574448026047221, - 15445029894002382055, + 16914551794403134749, + 14289767005593609982, 18446744073709551615, 18446744073709551615, - 239, - 249, - 239, - 249, - 49, - 50, + 154, + 169, + 154, + 169, + 31, + 34, true, - "indicating", - "indicating" + "is dedicated to", + "is dedicated to" ], [ "verb", - "single-verb", - 10375772475809458895, + "compound-verb", + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 6807190128157759045, - 13053632147510739476, + 556334503717216086, + 14184460239514695626, 18446744073709551615, 18446744073709551615, - 407, - 418, - 407, - 418, - 82, - 83, + 367, + 387, + 367, + 387, + 67, + 71, true, - "accommodate", - "accommodate" + "are employed to host", + "are employed to host" ], [ "verb", "single-verb", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 12178341415895525606, - 17393661643866953573, + 14814150617868433693, + 2828843033811804789, 18446744073709551615, 18446744073709551615, - 447, - 450, - 447, - 450, - 88, - 89, + 27, + 35, + 27, + 35, + 5, + 6, true, - "was", - "was" + "operates", + "operates" ], [ "verb", "single-verb", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 16381206485955868973, - 14033094471219798649, + 15441160910541486535, + 9351998670672415825, 18446744073709551615, 18446744073709551615, - 459, - 465, - 459, - 465, - 91, - 92, + 281, + 283, + 281, + 283, + 52, + 53, true, - "handle", - "handle" + "is", + "is" ], [ "conn", "single-conn", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 15441160910541480533, - 6866903594362655679, + 15441160910541485678, + 9351998466637895866, 18446744073709551615, 18446744073709551615, - 32, - 34, - 32, - 34, - 8, - 9, + 36, + 38, + 36, + 38, + 6, + 7, true, - "As", - "As" + "on", + "on" ], [ "conn", "single-conn", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 15441160910541485670, - 6866903700093396448, + 389609625618037948, + 9971074391070552914, 18446744073709551615, 18446744073709551615, - 59, - 61, - 59, - 61, - 15, - 16, + 58, + 62, + 58, + 62, + 10, + 11, true, - "of", - "of" + "with", + "with" ], [ "conn", "single-conn", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 389609625618865305, - 100185561388315538, + 15441160910541485670, + 9351998465977112201, 18446744073709551615, 18446744073709551615, - 121, - 125, - 121, - 125, - 25, - 26, + 84, + 86, + 84, + 86, + 17, + 18, true, - "over", - "over" + "of", + "of" ], [ "conn", "single-conn", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 6168057894310307081, - 494748048694411645, + 389609625618037948, + 9971074391070570820, 18446744073709551615, 18446744073709551615, - 131, - 140, - 131, - 140, - 27, + 139, + 143, + 139, + 143, + 28, 29, true, - "since the", - "since the" + "with", + "with" ], [ "conn", "single-conn", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, 15441160910541485670, - 6866903700093386593, + 9351998465978041953, 18446744073709551615, 18446744073709551615, - 148, - 150, - 148, - 150, - 30, - 31, + 246, + 248, + 246, + 248, + 47, + 48, true, "of", "of" @@ -71501,2120 +73546,2099 @@ [ "conn", "single-conn", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 15441160910541486538, - 6866903364400136605, + 16381206565712212855, + 15798433650653459254, 18446744073709551615, 18446744073709551615, - 163, - 165, - 163, - 165, - 33, - 34, + 302, + 308, + 302, + 308, + 56, + 58, true, - "in", - "in" + "of the", + "of the" ], [ "conn", "single-conn", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 389609625631229034, - 100122430387311494, + 16381206560518651853, + 15995808466227689457, 18446744073709551615, 18446744073709551615, - 211, - 215, - 211, - 215, - 43, - 44, + 401, + 407, + 401, + 407, + 73, + 75, true, - "that", - "that" + "in the", + "in the" ], [ "conn", "single-conn", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 3504047303126433547, - 11549299435570708613, + 16381206519425733256, + 10817242341263362701, 18446744073709551615, 18446744073709551615, - 250, - 259, - 250, - 259, - 50, - 52, + 167, + 173, + 167, + 173, + 33, + 35, true, - "that some", - "that some" + "to the", + "to the" ], [ "conn", "single-conn", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 15441160910541485670, - 6866903700093379447, + 15441160910541485865, + 9351998575526350776, 18446744073709551615, 18446744073709551615, - 302, - 304, - 302, - 304, - 58, - 59, + 263, + 265, + 263, + 265, + 49, + 50, true, - "of", - "of" + "to", + "to" ], [ "conn", "single-conn", - 10375772475809458895, + 7845460979782401889, "TEXT", - "#/texts/89", + "#/texts/86", 1.0, - 14637953883063114384, - 1261358077151630278, + 15441160910541485865, + 9351998575526358427, 18446744073709551615, 18446744073709551615, - 315, - 323, - 315, - 323, - 60, - 62, + 380, + 382, + 380, + 382, + 69, + 70, true, - "into the", - "into the" + "to", + "to" ], [ - "conn", - "single-conn", - 10375772475809458895, + "numval", + "fval", + 17769988780693768120, "TEXT", - "#/texts/89", + "#/texts/87", 1.0, - 389609625698530964, - 94041907290639477, + 12178341415896306587, + 11831950895164487341, 18446744073709551615, 18446744073709551615, - 332, - 336, - 332, - 336, - 63, - 65, + 0, + 3, + 0, + 3, + 0, + 1, true, - "in a", - "in a" + "4.3", + "4.3" ], [ - "conn", - "single-conn", - 10375772475809458895, + "numval", + "ival", + 12387489643011067991, "TEXT", - "#/texts/89", + "#/texts/88", 1.0, - 15441160910541485670, - 6866903700093367010, + 17767354399704235159, + 6323623135901186785, 18446744073709551615, 18446744073709551615, - 355, - 357, - 355, - 357, - 68, - 69, + 258, + 259, + 258, + 259, + 48, + 49, true, - "of", - "of" + "7", + "7" ], [ - "conn", - "single-conn", - 10375772475809458895, + "sentence", + "", + 12387489643011067991, "TEXT", - "#/texts/89", + "#/texts/88", 1.0, - 15441160910541485670, - 6866903700093361485, + 11703520391970010536, + 357834892882144608, 18446744073709551615, 18446744073709551615, - 484, - 486, - 484, - 486, - 95, - 96, + 0, + 56, + 0, + 56, + 0, + 11, true, - "of", - "of" + "Let us now discuss some scaling results on our platform.", + "Let us now discuss some scaling results on our platform." ], [ - "conn", - "single-conn", - 10375772475809458895, + "sentence", + "", + 12387489643011067991, "TEXT", - "#/texts/89", + "#/texts/88", 1.0, - 15441160910541485865, - 6866903731987646441, + 8157900891627315247, + 15072204804485786404, 18446744073709551615, 18446744073709551615, - 204, - 206, - 204, - 206, - 41, - 42, + 57, + 247, + 57, + 247, + 11, + 46, true, - "to", - "to" + "As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources.", + "As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources." ], [ - "conn", - "single-conn", - 10375772475809458895, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/89", + "#/texts/88", 1.0, - 15441160910541485865, - 6866903731988159776, + 4421383392096991748, + 9783447214836928971, 18446744073709551615, 18446744073709551615, - 368, - 370, - 368, - 370, - 72, - 73, + 229, + 246, + 229, + 246, + 43, + 45, true, - "to", - "to" + "compute resources", + "compute resources" ], [ - "conn", - "single-conn", - 10375772475809458895, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/89", + "#/texts/88", 1.0, - 15441160910541485865, - 6866903731988157479, + 8106478445190161533, + 16203321325185840639, 18446744073709551615, 18446744073709551615, - 404, - 406, - 404, - 406, - 81, - 82, + 32, + 39, + 32, + 39, + 6, + 7, true, - "to", - "to" + "results", + "results" ], [ - "conn", - "single-conn", - 10375772475809458895, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/89", + "#/texts/88", 1.0, - 15441160910541485865, - 6866903731988157354, + 14814125365076808131, + 8660743237002823027, 18446744073709551615, 18446744073709551615, - 456, - 458, - 456, - 458, - 90, - 91, + 47, + 55, + 47, + 55, + 9, + 10, true, - "to", - "to" + "platform", + "platform" ], [ - "numval", - "ival", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 17767354399704235152, - 10891777227864623310, + 5948159060234732715, + 1856499645219012237, 18446744073709551615, 18446744073709551615, - 10, - 11, - 10, - 11, - 2, - 3, + 82, + 91, + 82, + 91, + 17, + 18, true, - "8", - "8" + "beginning", + "beginning" ], [ - "parenthesis", - "round brackets", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 4022242346074010063, - 12541000686584287248, + 8106478708629288965, + 9823809706360263062, 18446744073709551615, 18446744073709551615, - 74, - 178, - 74, - 178, - 14, - 33, + 99, + 106, + 99, + 106, + 20, + 21, true, - "(i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON)", - "(i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON)" + "section", + "section" ], [ - "expression", - "common", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 15441160910541486545, - 15841608933708088140, + 13240311013633905449, + 13295854927356281099, 18446744073709551615, 18446744073709551615, - 75, - 79, - 75, - 79, - 15, - 16, + 112, + 124, + 112, + 124, + 23, + 24, true, - "ie", - "i.e." + "requirements", + "requirements" ], [ - "expression", - "word-concatenation", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 2217258678859216685, - 3493505507787421146, + 14814125365076808131, + 8660743237002811080, 18446744073709551615, 18446744073709551615, - 621, - 639, - 621, - 639, - 119, - 120, + 133, + 141, + 133, + 141, + 26, + 27, true, - "better-than-linear", - "better-than-linear" + "platform", + "platform" ], [ - "expression", - "word-concatenation", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 6285955549867796622, - 17538568638231419383, + 16381206521526353544, + 17652704280141269029, 18446744073709551615, 18446744073709551615, - 1121, - 1137, - 1121, - 1137, - 209, - 210, + 160, + 166, + 160, + 166, + 30, + 31, true, - "time-to-solution", - "time-to-solution" + "regard", + "regard" ], [ - "expression", - "word-concatenation", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 14639522327238241124, - 8193922819820873277, + 16381206574973295053, + 10204329654469875979, 18446744073709551615, 18446744073709551615, - 1155, - 1163, - 1155, - 1163, - 213, - 214, + 174, + 180, + 174, + 180, + 33, + 34, true, - "job-size", - "job-size" + "number", + "number" ], [ - "sentence", - "", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 17950606815080185664, - 182687704084943809, + 329104159157820437, + 7766615889727787026, 18446744073709551615, 18446744073709551615, - 0, - 228, - 0, - 228, - 0, - 42, + 184, + 189, + 184, + 189, + 35, + 36, true, - "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources.", - "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources." + "users", + "users" ], [ - "sentence", - "", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 9291869836472436551, - 7073966199782583842, + 16381206574973295053, + 10204329654469873734, 18446744073709551615, 18446744073709551615, - 229, - 320, - 229, - 320, - 42, - 58, + 195, + 201, + 195, + 201, + 38, + 39, true, - "We show this scaling by displaying the speedup versus the number of worker nodes available.", - "We show this scaling by displaying the speedup versus the number of worker nodes available." + "number", + "number" ], [ - "sentence", - "", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 11942797776008272897, - 18354315767267706544, + 6167933651658664291, + 11552308682759832261, 18446744073709551615, 18446744073709551615, - 321, - 448, - 321, - 448, - 58, - 83, + 215, + 224, + 215, + 224, + 41, + 42, true, - "Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores.", - "Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores." + "documents", + "documents" ], [ - "sentence", - "", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 1337110641996971981, - 12497994289374110365, + 16381206514091025767, + 14670700706856427543, 18446744073709551615, 18446744073709551615, - 449, - 580, - 449, - 580, - 83, - 111, + 251, + 257, + 251, + 257, + 47, + 48, true, - "As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes.", - "As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes." + "Figure", + "Figure" ], [ - "sentence", - "", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 7140787443244237501, - 14548003650603154277, + 16381206574973295053, + 10204329654469868686, 18446744073709551615, 18446744073709551615, - 581, - 724, - 581, - 724, - 111, - 135, + 273, + 279, + 273, + 279, + 53, + 54, true, - "Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker.", - "Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker." + "number", + "number" ], [ - "sentence", - "", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 7774794569544328631, - 12157835320367080769, + 329104159157820437, + 7766615889727784459, 18446744073709551615, 18446744073709551615, - 725, - 876, - 725, - 876, - 135, - 166, + 283, + 288, + 283, + 288, + 55, + 56, true, - "The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level.", - "The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level." + "users", + "users" ], [ - "sentence", - "", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 11827058603931473819, - 1210577326445407272, + 16381206574973295053, + 10204329654469868200, 18446744073709551615, 18446744073709551615, - 877, - 1042, - 877, - 1042, - 166, - 194, + 297, + 303, + 297, + 303, + 58, + 59, true, - "The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes.", - "The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes." + "number", + "number" ], [ - "sentence", - "", - 7054726458191881751, + "term", + "single-term", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 8702943219455942098, - 9096969319153565329, + 12178341415896289890, + 7943963832689990500, 18446744073709551615, 18446744073709551615, - 1043, - 1164, - 1043, - 1164, - 194, - 215, + 317, + 320, + 317, + 320, + 61, + 62, true, - "Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size.", - "Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size." + "PDF", + "PDF" ], [ - "term", - "single-term", - 7054726458191881751, + "verb", + "compound-verb", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 8172710598775048780, - 6087292431822475163, + 17858839733535377008, + 1920907252286073734, 18446744073709551615, 18446744073709551615, - 46, - 73, - 46, - 73, - 11, - 14, + 142, + 154, + 142, + 154, + 27, + 29, true, - "main pipeline microservices", - "main pipeline microservices" + "were scaling", + "were scaling" ], [ - "term", - "single-term", - 7054726458191881751, + "verb", + "single-verb", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 12653831733608918357, - 5399694712153694222, + 12178341415896275389, + 7943964340963228966, 18446744073709551615, 18446744073709551615, - 95, - 108, - 95, - 108, - 19, - 21, + 0, + 3, + 0, + 3, + 0, + 1, true, - "PDF documents", - "PDF documents" + "Let", + "Let" ], [ - "term", - "single-term", - 7054726458191881751, + "verb", + "single-verb", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 12400507963759742880, - 6135519514760056473, + 8106397868479560363, + 17872032239065412285, 18446744073709551615, 18446744073709551615, - 297, - 309, - 297, - 309, - 54, - 56, + 11, + 18, + 11, + 18, + 3, + 4, true, - "worker nodes", - "worker nodes" + "discuss", + "discuss" ], [ - "term", - "single-term", - 7054726458191881751, + "verb", + "single-verb", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 4940765489471971613, - 347303216115352656, + 8106478648771436891, + 8278854775081679845, 18446744073709551615, 18446744073709551615, - 370, - 391, - 370, - 391, - 68, - 70, + 24, + 31, + 24, + 31, + 5, + 6, true, - "pipeline microservice", - "pipeline microservice" + "scaling", + "scaling" ], [ - "term", - "single-term", - 7054726458191881751, + "verb", + "single-verb", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 10318072901532559633, - 7254172824621403054, + 8106476015433464060, + 11919317671569973370, 18446744073709551615, 18446744073709551615, - 507, - 519, - 507, - 519, - 96, - 98, + 63, + 70, + 63, + 70, + 13, + 14, true, - "tasks scales", - "tasks scales" - ], - [ - "term", - "single-term", - 7054726458191881751, - "TEXT", - "#/texts/90", - 1.0, - 18001738063114990140, - 7718080442102537061, - 18446744073709551615, - 18446744073709551615, - 621, - 647, - 621, - 647, - 119, - 121, - true, - "better-than-linear speedup", - "better-than-linear speedup" + "pointed", + "pointed" ], [ - "term", - "single-term", - 7054726458191881751, + "verb", + "single-verb", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 3088520230983972493, - 6524782884039209835, + 6171728176299542016, + 13908821060191020107, 18446744073709551615, 18446744073709551615, - 670, - 691, - 670, - 691, - 126, - 128, + 205, + 214, + 205, + 214, + 40, + 41, true, - "bandwidth constraints", - "bandwidth constraints" + "processed", + "processed" ], [ - "term", - "single-term", - 7054726458191881751, + "verb", + "single-verb", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 14290393742330326868, - 1869283060159003292, + 389609625741152123, + 16671401016536211852, 18446744073709551615, 18446744073709551615, - 744, - 758, - 744, - 758, - 139, - 141, + 264, + 268, + 264, + 268, + 51, + 52, true, - "assemble tasks", - "assemble tasks" + "show", + "show" ], [ - "term", - "single-term", - 7054726458191881751, + "verb", + "single-verb", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 13968810274884964698, - 7333175022141755015, + 6171728176299542016, + 13908821060190998226, 18446744073709551615, 18446744073709551615, - 865, - 875, - 865, - 875, - 163, - 165, + 307, + 316, + 307, + 316, + 60, + 61, true, - "page level", - "page level" + "processed", + "processed" ], [ - "term", - "single-term", - 7054726458191881751, + "conn", + "single-conn", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 18404777356709557822, - 5867368598465364348, + 15441160910541485678, + 1498640040591994871, 18446744073709551615, 18446744073709551615, - 938, - 952, - 938, - 952, - 177, - 179, + 40, + 42, + 40, + 42, + 7, + 8, true, - "load imbalance", - "load imbalance" + "on", + "on" ], [ - "term", - "single-term", - 7054726458191881751, + "conn", + "single-conn", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 12400507963759742880, - 6135519514760170296, + 15441160910541480533, + 1498641809232943552, 18446744073709551615, 18446744073709551615, - 965, - 977, - 965, - 977, - 181, - 183, + 57, + 59, + 57, + 59, + 11, + 12, true, - "worker nodes", - "worker nodes" + "As", + "As" ], [ - "term", - "single-term", - 7054726458191881751, + "conn", + "single-conn", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 12569603855738370264, - 1147410557148444790, + 16381206560518651853, + 18249880271754047870, 18446744073709551615, 18446744073709551615, - 1023, - 1041, - 1023, - 1041, - 190, - 193, + 75, + 81, + 75, + 81, + 15, + 17, true, - "large corpus sizes", - "large corpus sizes" + "in the", + "in the" ], [ - "term", - "single-term", - 7054726458191881751, + "conn", + "single-conn", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 4421383392096991748, - 17586453718413772848, + 16381206565712212855, + 14962824842694991931, 18446744073709551615, 18446744073709551615, - 1082, - 1099, - 1082, - 1099, - 202, - 204, + 92, + 98, + 92, + 98, + 18, + 20, true, - "compute resources", - "compute resources" + "of the", + "of the" ], [ - "term", - "single-term", - 7054726458191881751, + "conn", + "single-conn", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 16381206514091025767, - 11298218412956847237, + 8106397727991264470, + 14674040507008400644, 18446744073709551615, 18446744073709551615, - 3, - 9, - 3, - 9, - 1, - 2, + 125, + 132, + 125, + 132, + 24, + 26, true, - "Figure", - "Figure" + "for the", + "for the" ], [ - "term", - "single-term", - 7054726458191881751, + "conn", + "single-conn", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 8106478648771436891, - 15412781195243883400, + 389609625618037948, + 16164147193980002015, 18446744073709551615, 18446744073709551615, - 25, - 32, - 25, - 32, - 7, - 8, + 155, + 159, + 155, + 159, + 29, + 30, true, - "scaling", - "scaling" + "with", + "with" ], [ - "term", - "single-term", - 7054726458191881751, + "conn", + "single-conn", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 8106479143794098783, - 12796848297776230218, + 15441160910541485670, + 1498640040717785247, 18446744073709551615, 18446744073709551615, - 84, - 91, - 84, - 91, - 17, - 18, + 181, + 183, + 181, + 183, + 34, + 35, true, - "parsing", - "parsing" + "of", + "of" ], [ - "term", - "single-term", - 7054726458191881751, + "conn", + "single-conn", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 8106464587473865376, - 14658563877589949653, + 15441160910541485670, + 1498640040717786992, 18446744073709551615, 18446744073709551615, - 119, - 126, - 119, - 126, - 23, - 24, + 202, + 204, + 202, + 204, + 39, + 40, true, - "machine", - "machine" + "of", + "of" ], [ - "term", - "single-term", - 7054726458191881751, + "conn", + "single-conn", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 16381206567230470443, - 6941201434190273501, + 15441160910541480354, + 1498641903762977573, 18446744073709551615, 18446744073709551615, - 135, - 141, - 135, - 141, - 25, - 26, + 248, + 250, + 248, + 250, + 46, + 47, true, - "models", - "models" + "In", + "In" ], [ - "term", - "single-term", - 7054726458191881751, + "conn", + "single-conn", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 2703018679320364082, - 5037546725350435645, + 15441160910541485670, + 1498640040717800068, 18446744073709551615, 18446744073709551615, - 146, - 156, - 146, - 156, - 27, - 28, + 280, + 282, + 280, + 282, + 54, + 55, true, - "conversion", - "conversion" + "of", + "of" ], [ - "term", - "single-term", - 7054726458191881751, + "conn", + "single-conn", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 6167933651658664291, - 11495196011120521523, + 15441160910541485670, + 1498640040717801334, 18446744073709551615, 18446744073709551615, - 160, - 169, - 160, - 169, - 29, - 30, + 304, + 306, + 304, + 306, + 59, + 60, true, - "documents", - "documents" + "of", + "of" ], [ - "term", - "single-term", - 7054726458191881751, + "conn", + "single-conn", + 12387489643011067991, "TEXT", - "#/texts/90", + "#/texts/88", 1.0, - 389609625541450799, - 11852205465525539051, + 16381206519425733256, + 17614093764484085203, 18446744073709551615, 18446744073709551615, + 167, 173, - 177, + 167, 173, - 177, 31, - 32, + 33, true, - "JSON", - "JSON" + "to the", + "to the" ], [ - "term", - "single-term", - 7054726458191881751, + "numval", + "year", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 14814125365076808131, - 1145249750150199435, + 389609625548777057, + 52173736134253972, 18446744073709551615, 18446744073709551615, - 186, - 194, - 186, - 194, + 172, + 176, + 172, + 176, 35, 36, true, - "platform", - "platform" + "2017", + "2017" ], [ - "term", - "single-term", - 7054726458191881751, + "numval", + "ival", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 16381206521526353544, - 6483551066150257775, + 15441160910541481786, + 6866904732321432818, 18446744073709551615, 18446744073709551615, - 200, - 206, - 200, - 206, - 37, - 38, + 6, + 8, + 6, + 8, + 1, + 2, true, - "regard", - "regard" + "20", + "20" ], [ - "term", - "single-term", - 7054726458191881751, + "sentence", + "", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 6168338487309432467, - 8730004420584075578, + 7518545641936550538, + 13327477621696107517, 18446744073709551615, 18446744073709551615, - 218, - 227, - 218, - 227, - 40, - 41, + 32, + 177, + 32, + 177, + 8, + 37, true, - "resources", - "resources" + "As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017.", + "As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017." ], [ - "term", - "single-term", - 7054726458191881751, + "sentence", + "", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 8106478648771436891, - 15412781195243911626, + 10434596786350098942, + 4246700910361462765, 18446744073709551615, 18446744073709551615, - 242, - 249, - 242, - 249, - 45, - 46, + 178, + 363, + 178, + 363, + 37, + 71, true, - "scaling", - "scaling" + "It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time.", + "It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time." ], [ - "term", - "single-term", - 7054726458191881751, + "sentence", + "", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 8106478695960615463, - 15062829371033729664, + 15858606414680046310, + 9355067359629881245, 18446744073709551615, 18446744073709551615, - 268, - 275, - 268, - 275, - 49, - 50, + 364, + 504, + 364, + 504, + 71, + 99, true, - "speedup", - "speedup" + "Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", + "Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity." ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 16381206574973295053, - 7759102089231672623, + 2903324788977241891, + 10047065559827135054, 18446744073709551615, 18446744073709551615, - 287, - 293, - 287, - 293, - 52, - 53, + 82, + 91, + 82, + 91, + 19, + 21, true, - "number", - "number" + "PDF pages", + "PDF pages" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 8106478059506484182, - 992201111003243269, + 2245603532715892325, + 16478559053695087323, 18446744073709551615, 18446744073709551615, - 349, - 356, - 349, - 356, - 65, - 66, + 226, + 237, + 226, + 237, + 46, + 48, true, - "workers", - "workers" + "sharp steps", + "sharp steps" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 16381206557159905849, - 4500784834839577245, + 11942859038914222878, + 15085431028673446657, 18446744073709551615, 18446744073709551615, - 404, - 410, - 404, - 410, - 73, - 74, + 286, + 301, + 286, + 301, + 56, + 58, true, - "worker", - "worker" + "massive amounts", + "massive amounts" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 389609625621164460, - 11851913345703152408, + 7252014402665196659, + 15261261573536593307, 18446744073709551615, 18446744073709551615, - 427, - 431, - 427, - 431, - 78, - 79, + 342, + 354, + 342, + 354, + 66, + 68, true, - "node", - "node" + "small amount", + "small amount" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 329104161555640697, - 12658575526930668613, + 2245697320636800498, + 85672314451497322, 18446744073709551615, 18446744073709551615, - 442, - 447, - 442, - 447, - 81, - 82, + 472, + 483, + 472, + 483, + 93, + 95, true, - "cores", - "cores" + "short burst", + "short burst" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 8106478695960615463, - 15062829371036718218, + 16558536334265483368, + 11847226154466128513, 18446744073709551615, 18446744073709551615, - 473, - 480, - 473, - 480, - 89, - 90, + 487, + 503, + 487, + 503, + 96, + 98, true, - "speedup", - "speedup" + "extreme activity", + "extreme activity" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 329104161667983915, - 11953058923899695299, + 329104161667992688, + 9871171554517434066, 18446744073709551615, 18446744073709551615, - 488, - 493, - 488, - 493, - 92, - 93, + 0, + 5, + 0, + 5, + 0, + 1, true, - "parse", - "parse" + "pages", + "pages" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 15441160910541480579, - 15841608372110957817, + 14637915316557309079, + 12708841971050605893, 18446744073709551615, 18446744073709551615, - 498, - 500, - 498, - 500, - 94, - 95, + 14, + 22, + 14, + 22, + 4, + 5, true, - "ML", - "ML" + "function", + "function" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 16381206574973295053, - 7759102089231787311, + 389609625631241985, + 100194020203453107, 18446744073709551615, 18446744073709551615, - 542, - 548, - 542, - 548, - 102, - 103, + 26, + 30, + 26, + 30, + 6, + 7, true, - "number", - "number" + "time", + "time" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 8106478059506484182, - 992201111003203952, + 16381206574973295053, + 8846230013490521873, 18446744073709551615, 18446744073709551615, - 552, - 559, - 552, - 559, - 104, - 105, + 52, + 58, + 52, + 58, + 14, + 15, true, - "workers", - "workers" + "number", + "number" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 329104161758737773, - 12673240055158662957, + 329104159157820437, + 8249018447781337774, 18446744073709551615, 18446744073709551615, - 574, - 579, - 574, - 579, - 109, - 110, + 62, + 67, + 62, + 67, + 16, + 17, true, - "nodes", - "nodes" + "users", + "users" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 14652257141644167489, - 8090587240840002892, + 389609625631241985, + 100194020203438184, 18446744073709551615, 18446744073709551615, - 699, - 707, - 699, - 707, + 126, 130, - 131, + 126, + 130, + 26, + 27, true, - "baseline", - "baseline" + "time", + "time" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 16381206557159905849, - 4500784834839606045, + 16381206590630165717, + 11719327657280904751, 18446744073709551615, 18446744073709551615, - 717, - 723, - 717, - 723, - 133, - 134, + 141, + 147, + 141, + 147, + 29, + 30, true, - "worker", - "worker" + "launch", + "launch" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 8106478695960615463, - 15062829371033753418, + 8106478708506632112, + 18231497537744338632, 18446744073709551615, 18446744073709551615, - 729, - 736, - 729, - 736, - 136, - 137, + 155, + 162, + 155, + 162, + 32, + 33, true, - "speedup", - "speedup" + "service", + "service" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 2703018939289543887, - 15004756027981769615, + 329104161963544245, + 9857237958615296698, 18446744073709551615, 18446744073709551615, - 763, - 773, - 763, - 773, - 143, - 144, + 166, + 171, + 166, + 171, + 34, + 35, true, - "comparison", - "comparison" + "April", + "April" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 389609625631210899, - 11837631279683138832, + 329104159157820437, + 8249018447781333663, 18446744073709551615, 18446744073709551615, - 804, - 808, - 804, - 808, - 151, - 152, + 260, + 265, + 260, + 265, + 52, + 53, true, - "task", - "task" + "users", + "users" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 14650401089286948001, - 16245137145237128880, + 6167933651658664291, + 12240764636372283946, 18446744073709551615, 18446744073709551615, - 841, - 849, - 841, - 849, - 158, - 159, + 305, + 314, + 305, + 314, + 59, + 60, true, - "document", - "document" + "documents", + "documents" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 11600564911974996302, - 14480046541473745390, + 8106478708506632112, + 18231497537744787108, 18446744073709551615, 18446744073709551615, - 881, - 892, - 881, - 892, - 167, - 168, + 324, + 331, + 324, + 331, + 62, + 63, true, - "variability", - "variability" + "service", + "service" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 16381206590668214829, - 418453764520998193, + 389609625631241985, + 100194020203425198, 18446744073709551615, 18446744073709551615, - 900, - 906, - 900, - 906, - 170, - 171, + 358, + 362, + 358, + 362, + 69, + 70, true, - "length", - "length" + "time", + "time" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 6167933651658664291, - 11495196011120532495, + 16381206568241679420, + 9368345895491961575, 18446744073709551615, 18446744073709551615, - 910, - 919, - 910, - 919, - 172, - 173, + 375, + 381, + 375, + 381, + 74, + 75, true, - "documents", - "documents" + "design", + "design" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 329104161571401725, - 12673278341361969037, + 8106476000253296785, + 450489361038114021, 18446744073709551615, 18446744073709551615, - 1103, - 1108, - 1103, - 1108, - 205, - 206, + 396, + 403, + 396, + 403, + 80, + 81, true, - "order", - "order" + "problem", + "problem" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 6285955549867796622, - 17538568638231419383, + 329104161666914718, + 9871052204414646047, 18446744073709551615, 18446744073709551615, - 1121, - 1137, - 1121, - 1137, - 209, - 210, + 425, + 430, + 425, + 430, + 84, + 85, true, - "time-to-solution", - "time-to-solution" + "peaks", + "peaks" ], [ "term", "single-term", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 14639522327238241124, - 8193922819820873277, + 8106478708506632112, + 18231497537744780505, 18446744073709551615, 18446744073709551615, - 1155, - 1163, - 1155, - 1163, - 213, - 214, + 439, + 446, + 439, + 446, + 87, + 88, true, - "job-size", - "job-size" + "service", + "service" ], [ "verb", "compound-verb", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 16437991637119672281, - 10698691307559986577, + 11953671505157202285, + 11937015333801488817, 18446744073709551615, 18446744073709551615, - 330, - 343, - 330, - 343, - 61, - 64, + 92, + 120, + 92, + 120, + 21, + 25, true, - "chose to have", - "chose to have" + "has been increasing steadily", + "has been increasing steadily" ], [ "verb", "compound-verb", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 15603860935510693939, - 14727933870906010759, + 15603889104119874938, + 7803556645500016268, 18446744073709551615, 18446744073709551615, - 411, - 421, - 411, - 421, - 74, - 76, + 181, + 191, + 181, + 191, + 38, + 40, true, - "is running", - "is running" + "is however", + "is however" ], [ "verb", "compound-verb", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 4859670139939149227, - 6280919696978205559, + 7806959182595507225, + 2570245507595885020, 18446744073709551615, 18446744073709551615, - 818, - 833, - 818, - 833, - 154, - 156, + 266, + 285, + 266, + 285, + 53, + 56, true, - "be parallelised", - "be parallelised" + "have been uploading", + "have been uploading" ], [ "verb", "compound-verb", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 2871347585595950403, - 17619376144327821929, + 8106477873809970266, + 16668743459306840426, 18446744073709551615, 18446744073709551615, - 920, - 932, - 920, - 932, - 173, - 175, + 386, + 393, + 386, + 393, + 77, + 79, true, - "is reflected", - "is reflected" + "was not", + "was not" ], [ "verb", "single-verb", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 389609625741152123, - 11952417481958675502, + 8106397812083771063, + 12986611233641368860, 18446744073709551615, 18446744073709551615, - 16, - 20, - 16, - 20, - 5, - 6, + 39, + 46, + 39, + 46, + 10, + 12, true, - "show", - "show" + "can see", + "can see" ], [ "verb", "single-verb", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 15441160910541486545, - 15841608933708088140, + 6171728176299542016, + 11463438981088562167, 18446744073709551615, 18446744073709551615, - 75, - 79, - 75, - 79, - 15, - 16, + 72, + 81, + 72, + 81, + 18, + 19, true, - "ie", - "i.e." + "processed", + "processed" ], [ "verb", "single-verb", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 14650448030444381648, - 6220015674503799158, + 12178341415895638617, + 17393490097429873872, 18446744073709551615, 18446744073709551615, - 110, - 118, - 110, - 118, - 22, - 23, + 207, + 210, + 207, + 210, + 42, + 43, true, - "applying", - "applying" + "see", + "see" ], [ "verb", "single-verb", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 8106342444693204894, - 3141726382109473264, + 12178341415895564896, + 17393508726112862574, 18446744073709551615, 18446744073709551615, - 127, - 134, - 127, - 134, - 24, - 25, + 222, + 225, + 222, + 225, + 45, + 46, true, - "learned", - "learned" + "are", + "are" ], [ "verb", "single-verb", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 8106398484825895017, - 9843385479250406664, + 5581574448026047221, + 15445029894002382055, 18446744073709551615, 18446744073709551615, - 210, - 217, - 210, - 217, - 39, - 40, + 239, + 249, + 239, + 249, + 49, + 50, true, - "compute", - "compute" + "indicating", + "indicating" ], [ "verb", "single-verb", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 389609625741152123, - 11952417481958832640, + 6807190128157759045, + 13053632147510739476, 18446744073709551615, 18446744073709551615, - 232, - 236, - 232, - 236, - 43, - 44, + 407, + 418, + 407, + 418, + 82, + 83, true, - "show", - "show" + "accommodate", + "accommodate" ], [ "verb", "single-verb", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 5314879136556773391, - 10872235211663904849, + 12178341415895525606, + 17393661643866953573, 18446744073709551615, 18446744073709551615, - 253, - 263, - 253, - 263, - 47, - 48, + 447, + 450, + 447, + 450, + 88, + 89, true, - "displaying", - "displaying" + "was", + "was" ], [ "verb", "single-verb", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 16381206519567123880, - 11458061957152441695, + 16381206485955868973, + 14033094471219798649, 18446744073709551615, 18446744073709551615, - 276, - 282, - 276, - 282, - 50, - 51, + 459, + 465, + 459, + 465, + 91, + 92, true, - "versus", - "versus" + "handle", + "handle" ], [ - "verb", - "single-verb", - 7054726458191881751, + "conn", + "single-conn", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 8106478708506631920, - 15215214992224138903, + 389609625700764258, + 94329536986859678, 18446744073709551615, 18446744073709551615, - 357, - 364, - 357, - 364, - 66, - 67, + 9, + 13, + 9, + 13, + 2, + 4, true, - "serving", - "serving" + "as a", + "as a" ], [ - "verb", - "single-verb", - 7054726458191881751, + "conn", + "single-conn", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 14892726175400695403, - 11929108714311456052, + 15441160910541485670, + 6866903700093394497, 18446744073709551615, 18446744073709551615, - 456, - 467, - 456, - 467, - 85, - 87, + 23, + 25, + 23, + 25, + 5, + 6, true, - "can observe", - "can observe" + "of", + "of" ], [ - "verb", - "single-verb", - 7054726458191881751, + "conn", + "single-conn", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 329104159174415764, - 15201692130147370947, + 15441160910541480533, + 6866903594362655679, 18446744073709551615, 18446744073709551615, - 501, - 506, - 501, - 506, - 95, - 96, + 32, + 34, + 32, + 34, + 8, + 9, true, - "apply", - "apply" + "As", + "As" ], [ - "verb", - "single-verb", - 7054726458191881751, + "conn", + "single-conn", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 8106342033696543838, - 12894177882275964000, + 15441160910541485670, + 6866903700093396448, 18446744073709551615, 18446744073709551615, - 602, - 609, - 602, - 609, - 116, - 117, + 59, + 61, + 59, + 61, + 15, + 16, true, - "observe", - "observe" + "of", + "of" ], [ - "verb", - "single-verb", - 7054726458191881751, + "conn", + "single-conn", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 8106397800846024988, - 10875439300264671294, + 389609625618865305, + 100185561388315538, 18446744073709551615, 18446744073709551615, - 655, - 662, - 655, - 662, - 123, - 124, + 121, + 125, + 121, + 125, + 25, + 26, true, - "appears", - "appears" - ], + "over", + "over" + ], [ - "verb", - "single-verb", - 7054726458191881751, + "conn", + "single-conn", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 14637929372960545624, - 5914110341326941133, + 6168057894310307081, + 494748048694411645, 18446744073709551615, 18446744073709551615, - 775, - 783, - 775, - 783, - 145, - 146, + 131, + 140, + 131, + 140, + 27, + 29, true, - "flattens", - "flattens" + "since the", + "since the" ], [ - "verb", - "single-verb", - 7054726458191881751, + "conn", + "single-conn", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 14650440089690345452, - 13442210495055254219, + 15441160910541485670, + 6866903700093386593, 18446744073709551615, 18446744073709551615, - 992, - 1000, - 992, - 1000, - 186, - 187, + 148, + 150, + 148, + 150, + 30, + 31, true, - "averages", - "averages" + "of", + "of" ], [ - "verb", - "single-verb", - 7054726458191881751, + "conn", + "single-conn", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 12178341415895564896, - 12750707122609445157, + 15441160910541486538, + 6866903364400136605, 18446744073709551615, 18446744073709551615, - 1060, - 1063, - 1060, - 1063, - 197, - 198, + 163, + 165, + 163, + 165, + 33, + 34, true, - "are", - "are" + "in", + "in" ], [ - "verb", - "single-verb", - 7054726458191881751, + "conn", + "single-conn", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 329104161785194305, - 12659210101863210938, + 389609625631229034, + 100122430387311494, 18446744073709551615, 18446744073709551615, - 1072, - 1077, - 1072, - 1077, - 200, - 201, + 211, + 215, + 211, + 215, + 43, + 44, true, - "scale", - "scale" + "that", + "that" ], [ - "verb", - "single-verb", - 7054726458191881751, + "conn", + "single-conn", + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 389609625632420840, - 11837667465904018998, + 3504047303126433547, + 11549299435570708613, 18446744073709551615, 18446744073709551615, - 1112, - 1116, - 1112, - 1116, - 207, - 208, + 250, + 259, + 250, + 259, + 50, + 52, true, - "keep", - "keep" + "that some", + "that some" ], [ "conn", "single-conn", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 4606782280409462864, - 1759956167227045589, + 15441160910541485670, + 6866903700093379447, 18446744073709551615, 18446744073709551615, - 1138, - 1150, - 1138, - 1150, - 210, - 212, + 302, + 304, + 302, + 304, + 58, + 59, true, - "constant for", - "constant for" + "of", + "of" ], [ "conn", "single-conn", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 15441160910541480354, - 15841607655799874221, + 14637953883063114384, + 1261358077151630278, 18446744073709551615, 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 315, + 323, + 315, + 323, + 60, + 62, true, - "In", - "In" + "into the", + "into the" ], [ "conn", "single-conn", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 16381206565712212855, - 6967163224569431769, + 389609625698530964, + 94041907290639477, 18446744073709551615, 18446744073709551615, - 33, - 39, - 33, - 39, - 8, - 10, + 332, + 336, + 332, + 336, + 63, + 65, true, - "of the", - "of the" + "in a", + "in a" ], [ "conn", "single-conn", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, 15441160910541485670, - 15841608916207186363, + 6866903700093367010, 18446744073709551615, 18446744073709551615, - 92, - 94, - 92, - 94, - 18, - 19, + 355, + 357, + 355, + 357, + 68, + 69, true, "of", "of" @@ -73622,20 +75646,20 @@ [ "conn", "single-conn", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, 15441160910541485670, - 15841608916207173993, + 6866903700093361485, 18446744073709551615, 18446744073709551615, - 157, - 159, - 157, - 159, - 28, - 29, + 484, + 486, + 484, + 486, + 95, + 96, true, "of", "of" @@ -73643,1973 +75667,1994 @@ [ "conn", "single-conn", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 16381206566339127348, - 6948695024569863629, + 15441160910541485865, + 6866903731987646441, 18446744073709551615, 18446744073709551615, - 179, - 185, - 179, - 185, - 33, - 35, + 204, + 206, + 204, + 206, + 41, + 42, true, - "on the", - "on the" + "to", + "to" ], [ "conn", "single-conn", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 389609625618037948, - 11853381785047006947, + 15441160910541485865, + 6866903731988159776, 18446744073709551615, 18446744073709551615, - 195, - 199, - 195, - 199, - 36, - 37, + 368, + 370, + 368, + 370, + 72, + 73, true, - "with", - "with" + "to", + "to" ], [ "conn", "single-conn", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 15441160910541486989, - 15841608542350473314, + 15441160910541485865, + 6866903731988157479, 18446744073709551615, 18446744073709551615, - 250, - 252, - 250, - 252, - 46, - 47, + 404, + 406, + 404, + 406, + 81, + 82, true, - "by", - "by" + "to", + "to" ], [ "conn", "single-conn", - 7054726458191881751, + 10375772475809458895, "TEXT", - "#/texts/90", + "#/texts/89", 1.0, - 15441160910541485670, - 15841608916207199049, + 15441160910541485865, + 6866903731988157354, 18446744073709551615, 18446744073709551615, - 294, - 296, - 294, - 296, - 53, - 54, + 456, + 458, + 456, + 458, + 90, + 91, true, - "of", - "of" + "to", + "to" ], [ - "conn", - "single-conn", + "numval", + "ival", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 14091433066300748251, - 10813497490882117614, + 17767354399704235152, + 10891777227864623310, 18446744073709551615, 18446744073709551615, - 393, - 403, - 393, - 403, - 71, - 73, + 10, + 11, + 10, + 11, + 2, + 3, true, - "since each", - "since each" + "8", + "8" ], [ - "conn", - "single-conn", + "parenthesis", + "round brackets", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 389609625618762887, - 11853298334064133313, + 4022242346074010063, + 12541000686584287248, 18446744073709551615, 18446744073709551615, - 422, - 426, - 422, - 426, - 76, - 78, + 74, + 178, + 74, + 178, + 14, + 33, true, - "on a", - "on a" + "(i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON)", + "(i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON)" ], [ - "conn", - "single-conn", + "expression", + "common", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 389609625618037948, - 11853381785046890442, + 15441160910541486545, + 15841608933708088140, 18446744073709551615, 18446744073709551615, - 432, - 436, - 432, - 436, + 75, 79, - 80, + 75, + 79, + 15, + 16, true, - "with", - "with" + "ie", + "i.e." ], [ - "conn", - "single-conn", + "expression", + "word-concatenation", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 15441160910541480533, - 15841607645344043171, + 2217258678859216685, + 3493505507787421146, 18446744073709551615, 18446744073709551615, - 449, - 451, - 449, - 451, - 83, - 84, + 621, + 639, + 621, + 639, + 119, + 120, true, - "As", - "As" + "better-than-linear", + "better-than-linear" ], [ - "conn", - "single-conn", + "expression", + "word-concatenation", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 16381206560518651853, - 299391170561034072, + 6285955549867796622, + 17538568638231419383, 18446744073709551615, 18446744073709551615, - 481, - 487, - 481, - 487, - 90, - 92, + 1121, + 1137, + 1121, + 1137, + 209, + 210, true, - "in the", - "in the" + "time-to-solution", + "time-to-solution" ], [ - "conn", - "single-conn", + "expression", + "word-concatenation", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 5535791613041986682, - 1987456193213323417, + 14639522327238241124, + 8193922819820873277, 18446744073709551615, 18446744073709551615, - 529, - 541, - 529, - 541, - 99, - 102, + 1155, + 1163, + 1155, + 1163, + 213, + 214, true, - "with the the", - "with the the" + "job-size", + "job-size" ], [ - "conn", - "single-conn", + "sentence", + "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 15441160910541485670, - 15841608916207808715, + 17950606815080185664, + 182687704084943809, 18446744073709551615, 18446744073709551615, - 549, - 551, - 549, - 551, - 103, - 104, + 0, + 228, + 0, + 228, + 0, + 42, true, - "of", - "of" - ], + "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources.", + "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources." + ], [ - "conn", - "single-conn", + "sentence", + "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 16381206566339127348, - 6948695024569829645, + 9291869836472436551, + 7073966199782583842, 18446744073709551615, 18446744073709551615, - 692, - 698, - 692, - 698, - 128, - 130, + 229, + 320, + 229, + 320, + 42, + 58, true, - "on the", - "on the" + "We show this scaling by displaying the speedup versus the number of worker nodes available.", + "We show this scaling by displaying the speedup versus the number of worker nodes available." ], [ - "conn", - "single-conn", + "sentence", + "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 389609625618037948, - 11853381785046972707, + 11942797776008272897, + 18354315767267706544, 18446744073709551615, 18446744073709551615, - 708, - 712, - 708, - 712, - 131, - 132, + 321, + 448, + 321, + 448, + 58, + 83, true, - "with", - "with" + "Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores.", + "Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores." ], [ - "conn", - "single-conn", + "sentence", + "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 16381206566339127348, - 6948695024569840929, + 1337110641996971981, + 12497994289374110365, 18446744073709551615, 18446744073709551615, - 737, - 743, - 737, - 743, - 137, - 139, + 449, + 580, + 449, + 580, + 83, + 111, true, - "on the", - "on the" + "As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes.", + "As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes." ], [ - "conn", - "single-conn", + "sentence", + "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 15441160910541486538, - 15841608934679761481, + 7140787443244237501, + 14548003650603154277, 18446744073709551615, 18446744073709551615, - 760, - 762, - 760, - 762, - 142, - 143, + 581, + 724, + 581, + 724, + 111, + 135, true, - "in", - "in" + "Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker.", + "Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker." ], [ - "conn", - "single-conn", + "sentence", + "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 8106397797884119903, - 2253942102629974295, + 7774794569544328631, + 12157835320367080769, 18446744073709551615, 18446744073709551615, - 796, - 803, - 796, - 803, - 149, - 151, + 725, + 876, + 725, + 876, + 135, + 166, true, - "as this", - "as this" + "The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level.", + "The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level." ], [ - "conn", - "single-conn", + "sentence", + "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 16381206566339127348, - 6948695024569822665, + 11827058603931473819, + 1210577326445407272, 18446744073709551615, 18446744073709551615, - 834, - 840, - 834, - 840, - 156, - 158, + 877, + 1042, + 877, + 1042, + 166, + 194, true, - "on the", - "on the" + "The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes.", + "The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes." ], [ - "conn", - "single-conn", + "sentence", + "", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 16381206566339127348, - 6948695024569833459, + 8702943219455942098, + 9096969319153565329, 18446744073709551615, 18446744073709551615, - 858, - 864, - 858, - 864, - 161, - 163, + 1043, + 1164, + 1043, + 1164, + 194, + 215, true, - "on the", - "on the" + "Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size.", + "Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size." ], [ - "conn", - "single-conn", + "term", + "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 16381206560518651853, - 299391170561066249, + 8172710598775048780, + 6087292431822475163, 18446744073709551615, 18446744073709551615, - 893, - 899, - 893, - 899, - 168, - 170, + 46, + 73, + 46, + 73, + 11, + 14, true, - "in the", - "in the" + "main pipeline microservices", + "main pipeline microservices" ], [ - "conn", - "single-conn", + "term", + "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 15441160910541485670, - 15841608916207884744, + 12653831733608918357, + 5399694712153694222, 18446744073709551615, 18446744073709551615, - 907, - 909, - 907, - 909, - 171, - 172, + 95, + 108, + 95, + 108, + 19, + 21, true, - "of", - "of" + "PDF documents", + "PDF documents" ], [ - "conn", - "single-conn", + "term", + "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 389609625698530964, - 11943889382951508475, + 12400507963759742880, + 6135519514760056473, 18446744073709551615, 18446744073709551615, - 933, - 937, - 933, - 937, - 175, - 177, + 297, + 309, + 297, + 309, + 54, + 56, true, - "in a", - "in a" + "worker nodes", + "worker nodes" ], [ - "conn", - "single-conn", + "term", + "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 2011002864325523456, - 9569710153707003014, + 4940765489471971613, + 347303216115352656, 18446744073709551615, 18446744073709551615, - 953, - 964, - 953, - 964, - 179, - 181, + 370, + 391, + 370, + 391, + 68, + 70, true, - "between the", - "between the" + "pipeline microservice", + "pipeline microservice" ], [ - "conn", - "single-conn", + "term", + "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 389609625618037948, - 11853381785046974620, + 10318072901532559633, + 7254172824621403054, 18446744073709551615, 18446744073709551615, - 1005, - 1009, - 1005, - 1009, - 188, - 189, + 507, + 519, + 507, + 519, + 96, + 98, true, - "with", - "with" + "tasks scales", + "tasks scales" ], [ - "conn", - "single-conn", + "term", + "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 15441160910541486538, - 15841608934679158336, + 18001738063114990140, + 7718080442102537061, 18446744073709551615, 18446744073709551615, - 1100, - 1102, - 1100, - 1102, - 204, - 205, + 621, + 647, + 621, + 647, + 119, + 121, true, - "in", - "in" + "better-than-linear speedup", + "better-than-linear speedup" ], [ - "conn", - "single-conn", + "term", + "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 15441160910541485865, - 15841608914925665456, + 3088520230983972493, + 6524782884039209835, 18446744073709551615, 18446744073709551615, - 170, - 172, - 170, - 172, - 30, - 31, + 670, + 691, + 670, + 691, + 126, + 128, true, - "to", - "to" + "bandwidth constraints", + "bandwidth constraints" ], [ - "conn", - "single-conn", + "term", + "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 15441160910541485865, - 15841608914925705101, + 14290393742330326868, + 1869283060159003292, 18446744073709551615, 18446744073709551615, - 207, - 209, - 207, - 209, - 38, - 39, + 744, + 758, + 744, + 758, + 139, + 141, true, - "to", - "to" + "assemble tasks", + "assemble tasks" ], [ - "conn", - "single-conn", + "term", + "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 15441160910541485865, - 15841608914925660269, + 13968810274884964698, + 7333175022141755015, 18446744073709551615, 18446744073709551615, - 336, - 338, - 336, - 338, - 62, - 63, + 865, + 875, + 865, + 875, + 163, + 165, true, - "to", - "to" + "page level", + "page level" ], [ - "conn", - "single-conn", + "term", + "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 15441160910541485865, - 15841608914925567374, + 18404777356709557822, + 5867368598465364348, 18446744073709551615, 18446744073709551615, - 667, - 669, - 667, - 669, - 125, - 126, + 938, + 952, + 938, + 952, + 177, + 179, true, - "to", - "to" + "load imbalance", + "load imbalance" ], [ - "conn", - "single-conn", + "term", + "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 15441160910541485865, - 15841608914925573526, + 12400507963759742880, + 6135519514760170296, 18446744073709551615, 18446744073709551615, - 1069, - 1071, - 1069, - 1071, - 199, - 200, + 965, + 977, + 965, + 977, + 181, + 183, true, - "to", - "to" + "worker nodes", + "worker nodes" ], [ - "conn", - "single-conn", + "term", + "single-term", 7054726458191881751, "TEXT", "#/texts/90", 1.0, - 15441160910541485865, - 15841608914925579244, + 12569603855738370264, + 1147410557148444790, 18446744073709551615, 18446744073709551615, - 1109, - 1111, - 1109, - 1111, - 206, - 207, + 1023, + 1041, + 1023, + 1041, + 190, + 193, true, - "to", - "to" + "large corpus sizes", + "large corpus sizes" ], [ - "numval", - "ival", - 7794115281016062068, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/91", + "#/texts/90", 1.0, - 17767354399704235157, - 9706977069123592745, + 4421383392096991748, + 17586453718413772848, 18446744073709551615, 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 1082, + 1099, + 1082, + 1099, + 202, + 204, true, - "5", - "5" + "compute resources", + "compute resources" ], [ - "sentence", - "", - 7038163015905900647, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 657005981473069779, - 855544429870992132, + 16381206514091025767, + 11298218412956847237, 18446744073709551615, 18446744073709551615, - 0, - 276, - 0, - 276, - 0, - 48, + 3, + 9, + 3, + 9, + 1, + 2, true, - "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation.", - "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation." + "Figure", + "Figure" ], [ "term", - "enum-term-mark-2", - 7038163015905900647, + "single-term", + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 15515663500877316360, - 9779616463577804921, + 8106478648771436891, + 15412781195243883400, 18446744073709551615, 18446744073709551615, - 70, - 88, - 70, - 88, - 14, - 17, + 25, + 32, + 25, + 32, + 7, + 8, true, - "parse and annotate", - "parse and annotate" + "scaling", + "scaling" ], [ "term", "single-term", - 7038163015905900647, + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 3764444893564113560, - 6998103868267134559, + 8106479143794098783, + 12796848297776230218, 18446744073709551615, 18446744073709551615, - 80, - 98, - 80, - 98, - 16, + 84, + 91, + 84, + 91, + 17, 18, true, - "annotate documents", - "annotate documents" + "parsing", + "parsing" ], [ "term", "single-term", - 7038163015905900647, + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 10640406915501670366, - 17510700195140757278, + 8106464587473865376, + 14658563877589949653, 18446744073709551615, 18446744073709551615, - 204, - 222, - 204, - 222, - 37, - 39, + 119, + 126, + 119, + 126, + 23, + 24, true, - "ingested documents", - "ingested documents" + "machine", + "machine" ], [ "term", "single-term", - 7038163015905900647, + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 7992990666316029472, - 588680236579575873, + 16381206567230470443, + 6941201434190273501, 18446744073709551615, 18446744073709551615, - 245, - 275, - 245, - 275, - 44, - 47, + 135, + 141, + 135, + 141, + 25, + 26, true, - "structured data representation", - "structured data representation" + "models", + "models" ], [ "term", "single-term", - 7038163015905900647, + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 14814125365076808131, - 17380690645567101284, + 2703018679320364082, + 5037546725350435645, 18446744073709551615, 18446744073709551615, - 42, - 50, - 42, - 50, - 8, - 9, + 146, + 156, + 146, + 156, + 27, + 28, true, - "platform", - "platform" + "conversion", + "conversion" ], [ "term", "single-term", - 7038163015905900647, + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 329104161667983915, - 6897824401029810817, + 6167933651658664291, + 11495196011120521523, 18446744073709551615, 18446744073709551615, - 70, - 75, - 70, - 75, - 14, - 15, + 160, + 169, + 160, + 169, + 29, + 30, true, - "parse", - "parse" + "documents", + "documents" ], [ "term", "single-term", - 7038163015905900647, + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 329104159241569908, - 5347589032455145571, + 389609625541450799, + 11852205465525539051, 18446744073709551615, 18446744073709551615, - 118, - 123, - 118, - 123, - 22, - 23, + 173, + 177, + 173, + 177, + 31, + 32, true, - "train", - "train" + "JSON", + "JSON" ], [ "term", "single-term", - 7038163015905900647, + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 8106464587473865376, - 2835791627105026255, + 14814125365076808131, + 1145249750150199435, 18446744073709551615, 18446744073709551615, - 141, - 148, - 141, - 148, - 26, - 27, + 186, + 194, + 186, + 194, + 35, + 36, true, - "machine", - "machine" + "platform", + "platform" ], [ "term", "single-term", - 7038163015905900647, + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 16381206567230470443, - 15747559098284370939, + 16381206521526353544, + 6483551066150257775, 18446744073709551615, 18446744073709551615, - 158, - 164, - 158, - 164, - 28, - 29, + 200, + 206, + 200, + 206, + 37, + 38, true, - "models", - "models" + "regard", + "regard" ], [ "term", "single-term", - 7038163015905900647, + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 329104161571401725, - 6886205595361632203, + 6168338487309432467, + 8730004420584075578, 18446744073709551615, 18446744073709551615, - 168, - 173, - 168, - 173, - 30, - 31, + 218, + 227, + 218, + 227, + 40, + 41, true, - "order", - "order" + "resources", + "resources" ], [ "term", "single-term", - 7038163015905900647, + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 8106398484416916345, - 12761515448611326706, + 8106478648771436891, + 15412781195243911626, 18446744073709551615, 18446744073709551615, - 189, - 196, - 189, - 196, - 34, - 35, + 242, + 249, + 242, + 249, + 45, + 46, true, - "content", - "content" + "scaling", + "scaling" ], [ - "verb", - "compound-verb", - 7038163015905900647, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 13481804153867000640, - 5921155616940636913, + 8106478695960615463, + 15062829371033729664, 18446744073709551615, 18446744073709551615, - 3, - 17, - 3, - 17, - 1, - 3, + 268, + 275, + 268, + 275, + 49, + 50, true, - "have presented", - "have presented" + "speedup", + "speedup" ], [ - "verb", - "single-verb", - 7038163015905900647, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 329104159219515955, - 5354365758875797437, + 16381206574973295053, + 7759102089231672623, 18446744073709551615, 18446744073709551615, - 36, - 41, - 36, - 41, - 7, - 8, + 287, + 293, + 287, + 293, + 52, + 53, true, - "based", - "based" + "number", + "number" ], [ - "verb", - "single-verb", - 7038163015905900647, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 2873440693780286732, - 4285909795825994377, + 8106478059506484182, + 992201111003243269, 18446744073709551615, 18446744073709551615, - 58, - 68, - 58, - 68, - 11, - 13, + 349, + 356, + 349, + 356, + 65, + 66, true, - "can ingest", - "can ingest" + "workers", + "workers" ], [ - "verb", - "single-verb", - 7038163015905900647, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 14650447832610756948, - 58344428677043651, + 16381206557159905849, + 4500784834839577245, 18446744073709551615, 18446744073709551615, - 132, - 140, - 132, - 140, - 25, - 26, + 404, + 410, + 404, + 410, + 73, + 74, true, - "advanced", - "advanced" + "worker", + "worker" ], [ - "verb", - "single-verb", - 7038163015905900647, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 14639581097006750428, - 9301901239831102358, + 389609625621164460, + 11851913345703152408, 18446744073709551615, 18446744073709551615, - 149, - 157, - 149, - 157, - 27, - 28, + 427, + 431, + 427, + 431, + 78, + 79, true, - "learning", - "learning" + "node", + "node" ], [ - "verb", - "single-verb", - 7038163015905900647, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 8106397496930289884, - 12659480351226973306, + 329104161555640697, + 12658575526930668613, 18446744073709551615, 18446744073709551615, - 177, - 184, - 177, - 184, - 32, - 33, + 442, + 447, + 442, + 447, + 81, + 82, true, - "extract", - "extract" + "cores", + "cores" ], [ - "verb", - "single-verb", - 7038163015905900647, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 8106398484416229602, - 12761525411618135232, + 8106478695960615463, + 15062829371036718218, 18446744073709551615, 18446744073709551615, - 227, - 234, - 227, - 234, - 40, - 41, + 473, + 480, + 473, + 480, + 89, + 90, true, - "convert", - "convert" + "speedup", + "speedup" ], [ - "conn", - "single-conn", - 7038163015905900647, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 15441160910541486538, - 5009177319356301500, + 329104161667983915, + 11953058923899695299, 18446744073709551615, 18446744073709551615, - 165, - 167, - 165, - 167, - 29, - 30, + 488, + 493, + 488, + 493, + 92, + 93, true, - "in", - "in" + "parse", + "parse" ], [ - "conn", - "single-conn", - 7038163015905900647, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 16381206565712212855, - 15774434041302426793, + 15441160910541480579, + 15841608372110957817, 18446744073709551615, 18446744073709551615, - 197, - 203, - 197, - 203, - 35, - 37, - true, - "of the", - "of the" + 498, + 500, + 498, + 500, + 94, + 95, + true, + "ML", + "ML" ], [ - "conn", - "single-conn", - 7038163015905900647, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 16381206560517276114, - 4138873729787782213, + 16381206574973295053, + 7759102089231787311, 18446744073709551615, 18446744073709551615, - 238, - 244, - 238, - 244, - 42, - 44, + 542, + 548, + 542, + 548, + 102, + 103, true, - "into a", - "into a" + "number", + "number" ], [ - "conn", - "single-conn", - 7038163015905900647, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/92", + "#/texts/90", 1.0, - 15441160910541485865, - 5009177098714228529, + 8106478059506484182, + 992201111003203952, 18446744073709551615, 18446744073709551615, - 174, - 176, - 174, - 176, - 31, - 32, + 552, + 559, + 552, + 559, + 104, + 105, true, - "to", - "to" + "workers", + "workers" ], [ - "sentence", - "", - 1508626318915838319, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 5399734795549420383, - 9459426372965111375, + 329104161758737773, + 12673240055158662957, 18446744073709551615, 18446744073709551615, - 0, - 102, - 0, - 102, - 0, - 17, + 574, + 579, + 574, + 579, + 109, + 110, true, - "The fundamental design choices in our solution have proven to enable scaling in three elementary ways.", - "The fundamental design choices in our solution have proven to enable scaling in three elementary ways." + "nodes", + "nodes" ], [ - "sentence", - "", - 1508626318915838319, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 16807999257243449869, - 13297903776875612574, + 14652257141644167489, + 8090587240840002892, 18446744073709551615, 18446744073709551615, - 103, - 153, - 103, - 153, - 17, - 26, + 699, + 707, + 699, + 707, + 130, + 131, true, - "First, it can service multiple users concurrently.", - "First, it can service multiple users concurrently." + "baseline", + "baseline" ], [ - "sentence", - "", - 1508626318915838319, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 13213872932381400279, - 13188215006770693544, + 16381206557159905849, + 4500784834839606045, 18446744073709551615, 18446744073709551615, - 154, - 251, - 154, - 251, - 26, - 46, + 717, + 723, + 717, + 723, + 133, + 134, true, - "Second, it can ingest, parse and apply machine learned models on many documents at the same time.", - "Second, it can ingest, parse and apply machine learned models on many documents at the same time." + "worker", + "worker" ], [ - "sentence", - "", - 1508626318915838319, + "term", + "single-term", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 9242556889455087990, - 15971994141625715858, + 8106478695960615463, + 15062829371033753418, 18446744073709551615, 18446744073709551615, - 252, - 468, - 252, - 468, - 46, - 85, + 729, + 736, + 729, + 736, + 136, + 137, true, - "Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources.", - "Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources." + "speedup", + "speedup" ], [ "term", "single-term", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 2281965028547407404, - 4585290171111099204, + 2703018939289543887, + 15004756027981769615, 18446744073709551615, 18446744073709551615, - 4, - 30, - 4, - 30, - 1, - 4, + 763, + 773, + 763, + 773, + 143, + 144, true, - "fundamental design choices", - "fundamental design choices" + "comparison", + "comparison" ], [ "term", "single-term", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 10458141827175777973, - 662304186431118688, + 389609625631210899, + 11837631279683138832, 18446744073709551615, 18446744073709551615, - 86, - 101, - 86, - 101, - 14, - 16, + 804, + 808, + 804, + 808, + 151, + 152, true, - "elementary ways", - "elementary ways" + "task", + "task" ], [ "term", "single-term", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 17200993861033027072, - 10058402512815484380, + 14650401089286948001, + 16245137145237128880, 18446744073709551615, 18446744073709551615, - 125, - 139, - 125, - 139, - 22, - 24, + 841, + 849, + 841, + 849, + 158, + 159, true, - "multiple users", - "multiple users" + "document", + "document" ], [ "term", "single-term", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 12462088721494412558, - 14795705853314288990, + 11600564911974996302, + 14480046541473745390, 18446744073709551615, 18446744073709551615, - 219, - 233, - 219, - 233, - 39, - 41, + 881, + 892, + 881, + 892, + 167, + 168, true, - "many documents", - "many documents" + "variability", + "variability" ], [ "term", "single-term", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 6168880476795325400, - 1749109350869750559, + 16381206590668214829, + 418453764520998193, 18446744073709551615, 18446744073709551615, - 241, - 250, - 241, - 250, - 43, - 45, + 900, + 906, + 900, + 906, + 170, + 171, true, - "same time", - "same time" + "length", + "length" ], [ "term", "single-term", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 4421383392096991748, - 13841033715090277122, + 6167933651658664291, + 11495196011120532495, 18446744073709551615, 18446744073709551615, - 276, - 293, - 276, - 293, - 52, - 54, + 910, + 919, + 910, + 919, + 172, + 173, true, - "compute resources", - "compute resources" + "documents", + "documents" ], [ "term", "single-term", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 13127417780425802861, - 18242234552519957632, + 329104161571401725, + 12673278341361969037, 18446744073709551615, 18446744073709551615, - 298, - 313, - 298, - 313, - 55, - 57, + 1103, + 1108, + 1103, + 1108, + 205, + 206, true, - "different tasks", - "different tasks" + "order", + "order" ], [ "term", "single-term", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 871079831703051200, - 5077405699917193499, + 6285955549867796622, + 17538568638231419383, 18446744073709551615, 18446744073709551615, - 349, - 364, - 349, - 364, - 63, - 65, + 1121, + 1137, + 1121, + 1137, + 209, + 210, true, - "respective load", - "respective load" + "time-to-solution", + "time-to-solution" ], [ "term", "single-term", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 10442897704134762600, - 3285893567564519587, + 14639522327238241124, + 8193922819820873277, 18446744073709551615, 18446744073709551615, - 451, - 467, - 451, - 467, - 82, - 84, + 1155, + 1163, + 1155, + 1163, + 213, + 214, true, - "enough resources", - "enough resources" + "job-size", + "job-size" ], [ - "term", - "single-term", - 1508626318915838319, + "verb", + "compound-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 14635106751859230946, - 764401693493390910, + 16437991637119672281, + 10698691307559986577, 18446744073709551615, 18446744073709551615, - 38, - 46, - 38, - 46, - 6, - 7, + 330, + 343, + 330, + 343, + 61, + 64, true, - "solution", - "solution" + "chose to have", + "chose to have" ], [ - "term", - "single-term", - 1508626318915838319, + "verb", + "compound-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 329104161667983915, - 8353591573424153686, + 15603860935510693939, + 14727933870906010759, 18446744073709551615, 18446744073709551615, - 177, - 182, - 177, - 182, - 32, - 33, + 411, + 421, + 411, + 421, + 74, + 76, true, - "parse", - "parse" + "is running", + "is running" ], [ - "term", - "single-term", - 1508626318915838319, + "verb", + "compound-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 8106464587473865376, - 16865370909529075844, + 4859670139939149227, + 6280919696978205559, 18446744073709551615, 18446744073709551615, - 193, - 200, - 193, - 200, - 35, - 36, + 818, + 833, + 818, + 833, + 154, + 156, true, - "machine", - "machine" + "be parallelised", + "be parallelised" ], [ - "term", - "single-term", - 1508626318915838319, + "verb", + "compound-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 16381206567230470443, - 6296565769111805720, + 2871347585595950403, + 17619376144327821929, 18446744073709551615, 18446744073709551615, - 209, - 215, - 209, - 215, - 37, - 38, + 920, + 932, + 920, + 932, + 173, + 175, true, - "models", - "models" + "is reflected", + "is reflected" ], [ - "term", - "single-term", - 1508626318915838319, + "verb", + "single-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 329104161844229707, - 8223163135175074012, + 389609625741152123, + 11952417481958675502, 18446744073709551615, 18446744073709551615, - 252, - 257, - 252, - 257, - 46, - 47, + 16, + 20, + 16, + 20, + 5, + 6, true, - "Third", - "Third" + "show", + "show" ], [ - "term", - "single-term", - 1508626318915838319, + "verb", + "single-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 14814125365076808131, - 2596919094696196606, + 15441160910541486545, + 15841608933708088140, 18446744073709551615, 18446744073709551615, - 321, - 329, - 321, - 329, - 59, - 60, + 75, + 79, + 75, + 79, + 15, + 16, true, - "platform", - "platform" + "ie", + "i.e." ], [ - "term", - "single-term", - 1508626318915838319, + "verb", + "single-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 2703018679320364082, - 16899905581150215026, + 14650448030444381648, + 6220015674503799158, 18446744073709551615, 18446744073709551615, - 372, - 382, - 372, - 382, - 67, - 68, + 110, + 118, + 110, + 118, + 22, + 23, true, - "conversion", - "conversion" + "applying", + "applying" ], [ - "term", - "single-term", - 1508626318915838319, + "verb", + "single-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 6167933651658664291, - 2405213947196016063, + 8106342444693204894, + 3141726382109473264, 18446744073709551615, 18446744073709551615, - 386, - 395, - 386, - 395, - 69, - 70, + 127, + 134, + 127, + 134, + 24, + 25, true, - "documents", - "documents" + "learned", + "learned" ], [ - "term", - "single-term", - 1508626318915838319, + "verb", + "single-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 14814125365076808131, - 2596919094696188986, + 8106398484825895017, + 9843385479250406664, 18446744073709551615, 18446744073709551615, - 403, - 411, - 403, - 411, - 72, - 73, + 210, + 217, + 210, + 217, + 39, + 40, true, - "platform", - "platform" + "compute", + "compute" ], [ - "term", - "single-term", - 1508626318915838319, + "verb", + "single-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 329104159219994925, - 8265223761504278760, + 389609625741152123, + 11952417481958832640, 18446744073709551615, 18446744073709551615, - 422, - 427, - 422, - 427, - 76, - 77, + 232, + 236, + 232, + 236, + 43, + 44, true, - "times", - "times" + "show", + "show" ], [ - "term", - "single-term", - 1508626318915838319, + "verb", + "single-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 389609625631241985, - 2218225659402359325, + 5314879136556773391, + 10872235211663904849, 18446744073709551615, 18446744073709551615, - 439, - 443, - 439, - 443, - 79, - 80, + 253, + 263, + 253, + 263, + 47, + 48, true, - "time", - "time" + "displaying", + "displaying" ], [ "verb", - "compound-verb", - 1508626318915838319, + "single-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 3403952970044578622, - 10903875917460680118, + 16381206519567123880, + 11458061957152441695, 18446744073709551615, 18446744073709551615, - 47, - 76, - 47, - 76, - 7, - 12, + 276, + 282, + 276, + 282, + 50, + 51, true, - "have proven to enable scaling", - "have proven to enable scaling" + "versus", + "versus" ], [ "verb", - "compound-verb", - 1508626318915838319, + "single-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 15412069422981600492, - 6547325180036345245, + 8106478708506631920, + 15215214992224138903, 18446744073709551615, 18446744073709551615, - 330, - 342, - 330, - 342, - 60, - 62, + 357, + 364, + 357, + 364, + 66, + 67, true, - "according to", - "according to" + "serving", + "serving" ], [ "verb", "single-verb", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 14892726286148891751, - 10384184194505177525, + 14892726175400695403, + 11929108714311456052, 18446744073709551615, 18446744073709551615, - 113, - 124, - 113, - 124, - 20, - 22, + 456, + 467, + 456, + 467, + 85, + 87, true, - "can service", - "can service" + "can observe", + "can observe" ], [ "verb", "single-verb", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 2873440693780286732, - 15985974084754193151, + 329104159174415764, + 15201692130147370947, 18446744073709551615, 18446744073709551615, - 165, - 175, - 165, - 175, - 29, - 31, + 501, + 506, + 501, + 506, + 95, + 96, true, - "can ingest", - "can ingest" + "apply", + "apply" ], [ "verb", "single-verb", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 329104159174415764, - 8268242647359376883, + 8106342033696543838, + 12894177882275964000, 18446744073709551615, 18446744073709551615, - 187, - 192, - 187, - 192, - 34, - 35, + 602, + 609, + 602, + 609, + 116, + 117, true, - "apply", - "apply" + "observe", + "observe" ], [ "verb", "single-verb", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 8106342444693204894, - 6388168354172051323, + 8106397800846024988, + 10875439300264671294, 18446744073709551615, 18446744073709551615, - 201, - 208, - 201, - 208, - 36, - 37, + 655, + 662, + 655, + 662, + 123, + 124, true, - "learned", - "learned" + "appears", + "appears" ], [ "verb", "single-verb", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 5949049089925459445, - 6157765644161528738, + 14637929372960545624, + 5914110341326941133, 18446744073709551615, 18446744073709551615, - 262, - 271, - 262, - 271, - 49, - 51, + 775, + 783, + 775, + 783, + 145, + 146, true, - "can scale", - "can scale" + "flattens", + "flattens" ], [ "verb", "single-verb", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 15441160910541486535, - 12447978246358110993, + 14650440089690345452, + 13442210495055254219, 18446744073709551615, 18446744073709551615, - 412, - 414, - 412, - 414, - 73, - 74, + 992, + 1000, + 992, + 1000, + 186, + 187, true, - "is", - "is" + "averages", + "averages" ], [ "verb", "single-verb", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 8106396909840561507, - 15824344309645083727, + 12178341415895564896, + 12750707122609445157, 18446744073709551615, 18446744073709551615, - 428, - 435, - 428, - 435, - 77, - 78, + 1060, + 1063, + 1060, + 1063, + 197, + 198, true, - "bounded", - "bounded" + "are", + "are" ], [ "verb", "single-verb", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 329104159209890620, - 8264779023608497036, + 329104161785194305, + 12659210101863210938, 18446744073709551615, 18446744073709551615, - 445, - 450, - 445, - 450, - 81, - 82, + 1072, + 1077, + 1072, + 1077, + 200, + 201, true, - "given", - "given" + "scale", + "scale" ], [ - "conn", - "single-conn", - 1508626318915838319, + "verb", + "single-verb", + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 15441160910541486538, - 12447978247236708799, + 389609625632420840, + 11837667465904018998, 18446744073709551615, 18446744073709551615, - 31, - 33, - 31, - 33, - 4, - 5, + 1112, + 1116, + 1112, + 1116, + 207, + 208, true, - "in", - "in" + "keep", + "keep" ], [ "conn", "single-conn", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 15441160910541486538, - 12447978247236781944, + 4606782280409462864, + 1759956167227045589, 18446744073709551615, 18446744073709551615, - 77, - 79, - 77, - 79, - 12, - 13, + 1138, + 1150, + 1138, + 1150, + 210, + 212, true, - "in", - "in" + "constant for", + "constant for" ], [ "conn", "single-conn", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 15441160910541485678, - 12447978235992890146, + 15441160910541480354, + 15841607655799874221, 18446744073709551615, 18446744073709551615, - 216, - 218, - 216, - 218, - 38, - 39, + 0, + 2, + 0, + 2, + 0, + 1, true, - "on", - "on" + "In", + "In" ], [ "conn", "single-conn", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 16381206568372064271, - 7263818147332248111, + 16381206565712212855, + 6967163224569431769, 18446744073709551615, 18446744073709551615, - 234, - 240, - 234, - 240, - 41, - 43, + 33, + 39, + 33, + 39, + 8, + 10, true, - "at the", - "at the" + "of the", + "of the" ], [ "conn", "single-conn", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 12178341415895625940, - 10930510655083395949, + 15441160910541485670, + 15841608916207186363, 18446744073709551615, 18446744073709551615, - 294, - 297, - 294, - 297, - 54, - 55, + 92, + 94, + 92, + 94, + 18, + 19, true, - "for", - "for" + "of", + "of" ], [ "conn", "single-conn", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", + 1.0, + 15441160910541485670, + 15841608916207173993, + 18446744073709551615, + 18446744073709551615, + 157, + 159, + 157, + 159, + 28, + 29, + true, + "of", + "of" + ], + [ + "conn", + "single-conn", + 7054726458191881751, + "TEXT", + "#/texts/90", 1.0, 16381206566339127348, - 6281427824769892480, + 6948695024569863629, 18446744073709551615, 18446744073709551615, - 314, - 320, - 314, - 320, - 57, - 59, + 179, + 185, + 179, + 185, + 33, + 35, true, "on the", "on the" @@ -75617,41 +77662,62 @@ [ "conn", "single-conn", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 16381206579218901666, - 7932367388675800903, + 389609625618037948, + 11853381785047006947, 18446744073709551615, 18446744073709551615, - 365, - 371, - 365, - 371, - 65, - 67, + 195, + 199, + 195, + 199, + 36, + 37, true, - "so the", - "so the" + "with", + "with" ], [ "conn", "single-conn", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", + 1.0, + 15441160910541486989, + 15841608542350473314, + 18446744073709551615, + 18446744073709551615, + 250, + 252, + 250, + 252, + 46, + 47, + true, + "by", + "by" + ], + [ + "conn", + "single-conn", + 7054726458191881751, + "TEXT", + "#/texts/90", 1.0, 15441160910541485670, - 12447978245548248810, + 15841608916207199049, 18446744073709551615, 18446744073709551615, - 383, - 385, - 383, - 385, - 68, - 69, + 294, + 296, + 294, + 296, + 53, + 54, true, "of", "of" @@ -75659,5031 +77725,5031 @@ [ "conn", "single-conn", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 16381206566339127348, - 6281427824769909768, + 14091433066300748251, + 10813497490882117614, 18446744073709551615, 18446744073709551615, - 396, - 402, - 396, - 402, - 70, - 72, + 393, + 403, + 393, + 403, + 71, + 73, true, - "on the", - "on the" + "since each", + "since each" ], [ "conn", "single-conn", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 16381206568372178543, - 7263503225869480888, + 389609625618762887, + 11853298334064133313, 18446744073709551615, 18446744073709551615, - 415, - 421, - 415, - 421, - 74, + 422, + 426, + 422, + 426, 76, + 78, true, - "at all", - "at all" + "on a", + "on a" ], [ "conn", "single-conn", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 15441160910541486538, - 12447978247236683376, + 389609625618037948, + 11853381785046890442, 18446744073709551615, 18446744073709551615, + 432, 436, - 438, + 432, 436, - 438, - 78, 79, + 80, true, - "in", - "in" + "with", + "with" ], [ "conn", "single-conn", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 15441160910541485865, - 12447978233189958175, + 15441160910541480533, + 15841607645344043171, 18446744073709551615, 18446744073709551615, - 59, - 61, - 59, - 61, - 9, - 10, + 449, + 451, + 449, + 451, + 83, + 84, true, - "to", - "to" + "As", + "As" ], [ "conn", "single-conn", - 1508626318915838319, + 7054726458191881751, "TEXT", - "#/texts/93", + "#/texts/90", 1.0, - 15441160910541485865, - 12447978233189813654, + 16381206560518651853, + 299391170561034072, 18446744073709551615, 18446744073709551615, - 340, - 342, - 340, - 342, - 61, - 62, + 481, + 487, + 481, + 487, + 90, + 92, true, - "to", - "to" + "in the", + "in the" ], [ - "numval", - "ival", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 17767354399704235157, - 6666235790308819566, + 5535791613041986682, + 1987456193213323417, 18446744073709551615, 18446744073709551615, - 720, - 721, - 720, - 721, - 134, - 135, + 529, + 541, + 529, + 541, + 99, + 102, true, - "5", - "5" + "with the the", + "with the the" ], [ - "parenthesis", - "round brackets", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 772704748867907067, - 17873771936385193962, + 15441160910541485670, + 15841608916207808715, 18446744073709551615, 18446744073709551615, - 215, - 286, - 215, - 286, - 42, - 57, + 549, + 551, + 549, + 551, + 103, + 104, true, - "(e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc)", - "(e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc)" + "of", + "of" ], [ - "parenthesis", - "round brackets", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 7596548548401207156, - 5106769991605743942, + 16381206566339127348, + 6948695024569829645, 18446744073709551615, 18446744073709551615, - 697, - 722, - 697, - 722, + 692, + 698, + 692, + 698, 128, - 136, + 130, true, - "(as is shown in Figure 5)", - "(as is shown in Figure 5)" + "on the", + "on the" ], [ - "expression", - "common", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 15441160910541487324, - 12448443577304465400, + 389609625618037948, + 11853381785046972707, 18446744073709551615, 18446744073709551615, - 216, - 220, - 216, - 220, - 43, - 44, + 708, + 712, + 708, + 712, + 131, + 132, true, - "eg", - "e.g." + "with", + "with" ], [ - "expression", - "word-concatenation", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 13953038768306043326, - 2217483007470679809, + 16381206566339127348, + 6948695024569840929, 18446744073709551615, 18446744073709551615, - 253, - 263, - 253, - 263, - 50, - 51, + 737, + 743, + 737, + 743, + 137, + 139, true, - "pie-charts", - "pie-charts" + "on the", + "on the" ], [ - "expression", - "word-concatenation", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 5428486186575573840, - 17552603483030949066, + 15441160910541486538, + 15841608934679761481, 18446744073709551615, 18446744073709551615, - 412, - 428, - 412, - 428, - 80, - 81, + 760, + 762, + 760, + 762, + 142, + 143, true, - "image-classifier", - "image-classifier" + "in", + "in" ], [ - "sentence", - "", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 9576287605285270893, - 7775032662306861151, + 8106397797884119903, + 2253942102629974295, 18446744073709551615, 18446744073709551615, - 0, - 65, - 0, - 65, - 0, - 15, + 796, + 803, + 796, + 803, + 149, + 151, true, - "In the future, we plan to extend the platform in two major areas.", - "In the future, we plan to extend the platform in two major areas." + "as this", + "as this" ], [ - "sentence", - "", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 7980828285556281738, - 2544051083396498287, + 16381206566339127348, + 6948695024569822665, 18446744073709551615, 18446744073709551615, - 66, - 172, - 66, - 172, - 15, - 34, + 834, + 840, + 834, + 840, + 156, + 158, true, - "First, we would like to extend the number of microservices, especially with regard to image understanding.", - "First, we would like to extend the number of microservices, especially with regard to image understanding." + "on the", + "on the" ], [ - "sentence", - "", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 16816675794156539317, - 4106452168371569212, + 16381206566339127348, + 6948695024569833459, 18446744073709551615, 18446744073709551615, - 173, - 287, - 173, - 287, - 34, - 58, + 858, + 864, + 858, + 864, + 161, + 163, true, - "The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc).", - "The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc)." + "on the", + "on the" ], [ - "sentence", - "", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 16727745954821675360, - 18300594417076082954, + 16381206560518651853, + 299391170561066249, 18446744073709551615, 18446744073709551615, - 288, - 429, - 288, - 429, - 58, - 82, + 893, + 899, + 893, + 899, + 168, + 170, true, - "The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier.", - "The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier." + "in the", + "in the" ], [ - "sentence", - "", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 10448641789434054504, - 14093320860906874170, + 15441160910541485670, + 15841608916207884744, 18446744073709551615, 18446744073709551615, - 430, - 513, - 430, - 513, - 82, - 98, + 907, + 909, + 907, + 909, + 171, + 172, true, - "Second, we would like to improve the quality and performance of our default models.", - "Second, we would like to improve the quality and performance of our default models." + "of", + "of" ], [ - "sentence", - "", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 3084657715463842285, - 15630767766630582663, + 389609625698530964, + 11943889382951508475, 18446744073709551615, 18446744073709551615, - 514, - 723, - 514, - 723, - 98, - 137, + 933, + 937, + 933, + 937, + 175, + 177, true, - "We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5).", - "We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5)." + "in a", + "in a" ], [ - "term", - "enum-term-mark-2", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 18219039247346551478, - 2994436876810062612, + 2011002864325523456, + 9569710153707003014, 18446744073709551615, 18446744073709551615, - 216, - 239, - 216, - 239, - 43, - 47, + 953, + 964, + 953, + 964, + 179, + 181, true, - "eg line & scatterplot", - "e.g. line & scatterplot" + "between the", + "between the" ], [ - "term", - "enum-term-mark-2", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 2459701502714558679, - 5298793252682520889, + 389609625618037948, + 11853381785046974620, 18446744073709551615, 18446744073709551615, - 467, - 490, - 467, - 490, - 90, - 93, + 1005, + 1009, + 1005, + 1009, + 188, + 189, true, - "quality and performance", - "quality and performance" + "with", + "with" ], [ - "term", - "single-term", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 16589376492252179077, - 7295144040672653108, + 15441160910541486538, + 15841608934679158336, 18446744073709551615, 18446744073709551615, - 53, - 64, - 53, - 64, - 12, - 14, + 1100, + 1102, + 1100, + 1102, + 204, + 205, true, - "major areas", - "major areas" + "in", + "in" ], [ - "term", - "single-term", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 8106398377967204844, - 1921596529468359029, + 15441160910541485865, + 15841608914925665456, 18446744073709551615, 18446744073709551615, - 216, - 225, - 216, - 225, - 43, - 45, + 170, + 172, + 170, + 172, + 30, + 31, true, - "eg line", - "e.g. line" + "to", + "to" ], [ - "term", - "single-term", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 5358230985886796623, - 5106522770952356562, + 15441160910541485865, + 15841608914925705101, 18446744073709551615, 18446744073709551615, - 265, - 280, - 265, - 280, - 52, - 54, + 207, + 209, + 207, + 209, + 38, + 39, true, - "geographic maps", - "geographic maps" + "to", + "to" ], [ - "term", - "single-term", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 15357232380281159303, - 112568471828176926, + 15441160910541485865, + 15841608914925660269, 18446744073709551615, 18446744073709551615, - 344, - 359, - 344, - 359, - 70, - 72, + 336, + 338, + 336, + 338, + 62, + 63, true, - "individual type", - "individual type" + "to", + "to" ], [ - "term", - "single-term", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 3849116425022465253, - 9034086680124657749, + 15441160910541485865, + 15841608914925567374, 18446744073709551615, 18446744073709551615, - 378, - 403, - 378, - 403, - 76, - 78, + 667, + 669, + 667, + 669, + 125, + 126, true, - "successful identification", - "successful identification" + "to", + "to" ], [ - "term", - "single-term", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 1915006193249717419, - 4993787564856558201, + 15441160910541485865, + 15841608914925573526, 18446744073709551615, 18446744073709551615, - 498, - 512, - 498, - 512, - 95, - 97, + 1069, + 1071, + 1069, + 1071, + 199, + 200, true, - "default models", - "default models" + "to", + "to" ], [ - "term", - "single-term", - 17247086344435786796, + "conn", + "single-conn", + 7054726458191881751, "TEXT", - "#/texts/94", + "#/texts/90", 1.0, - 3374009463271020691, - 3843260871587525071, + 15441160910541485865, + 15841608914925579244, 18446744073709551615, 18446744073709551615, - 585, - 600, - 585, - 600, - 110, - 112, + 1109, + 1111, + 1109, + 1111, + 206, + 207, true, - "neural networks", - "neural networks" + "to", + "to" ], [ - "term", - "single-term", - 17247086344435786796, + "numval", + "ival", + 7794115281016062068, "TEXT", - "#/texts/94", + "#/texts/91", 1.0, - 10900025937134233159, - 18131173731884203799, + 17767354399704235157, + 9706977069123592745, 18446744073709551615, 18446744073709551615, - 636, - 655, - 636, - 655, - 118, - 120, + 0, + 1, + 0, + 1, + 0, + 1, true, - "photographic images", - "photographic images" + "5", + "5" ], [ - "term", - "single-term", - 17247086344435786796, + "sentence", + "", + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 5766847864654328399, - 4382574540747563376, + 657005981473069779, + 855544429870992132, 18446744073709551615, 18446744073709551615, - 675, - 696, - 675, - 696, - 125, - 128, + 0, + 276, + 0, + 276, + 0, + 48, true, - "parsed document pages", - "parsed document pages" + "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation.", + "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation." ], [ "term", - "single-term", - 17247086344435786796, + "enum-term-mark-2", + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 16381206565274670318, - 14238598565348925208, + 15515663500877316360, + 9779616463577804921, 18446744073709551615, 18446744073709551615, - 7, - 13, - 7, - 13, - 2, - 3, + 70, + 88, + 70, + 88, + 14, + 17, true, - "future", - "future" + "parse and annotate", + "parse and annotate" ], [ "term", "single-term", - 17247086344435786796, + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 14814125365076808131, - 2312829961765099304, + 3764444893564113560, + 6998103868267134559, 18446744073709551615, 18446744073709551615, - 37, - 45, - 37, - 45, - 9, - 10, + 80, + 98, + 80, + 98, + 16, + 18, true, - "platform", - "platform" + "annotate documents", + "annotate documents" ], [ "term", "single-term", - 17247086344435786796, + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 16381206574973295053, - 1926660952952474766, + 10640406915501670366, + 17510700195140757278, 18446744073709551615, 18446744073709551615, - 101, - 107, - 101, - 107, - 23, - 24, + 204, + 222, + 204, + 222, + 37, + 39, true, - "number", - "number" + "ingested documents", + "ingested documents" ], [ "term", "single-term", - 17247086344435786796, + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 990358581043194791, - 6104208925519602427, + 7992990666316029472, + 588680236579575873, 18446744073709551615, 18446744073709551615, - 111, - 124, - 111, - 124, - 25, - 26, + 245, + 275, + 245, + 275, + 44, + 47, true, - "microservices", - "microservices" + "structured data representation", + "structured data representation" ], [ "term", "single-term", - 17247086344435786796, + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 16381206521526353544, - 4410027063676095069, + 14814125365076808131, + 17380690645567101284, 18446744073709551615, 18446744073709551615, - 142, - 148, - 142, - 148, - 29, - 30, + 42, + 50, + 42, + 50, + 8, + 9, true, - "regard", - "regard" + "platform", + "platform" ], [ "term", "single-term", - 17247086344435786796, + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 11827147635933835345, - 3554885262491213918, + 329104161667983915, + 6897824401029810817, 18446744073709551615, 18446744073709551615, - 158, - 171, - 158, - 171, - 32, - 33, + 70, + 75, + 70, + 75, + 14, + 15, true, - "understanding", - "understanding" + "parse", + "parse" ], [ "term", "single-term", - 17247086344435786796, + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 16381206574973295053, - 1926660952951671347, + 329104159241569908, + 5347589032455145571, 18446744073709551615, 18446744073709551615, - 177, - 183, - 177, - 183, - 35, - 36, + 118, + 123, + 118, + 123, + 22, + 23, true, - "number", - "number" + "train", + "train" ], [ "term", "single-term", - 17247086344435786796, + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 329104159243796903, - 7082055202846522668, + 8106464587473865376, + 2835791627105026255, 18446744073709551615, 18446744073709551615, - 187, - 192, - 187, - 192, - 37, - 38, + 141, + 148, + 141, + 148, + 26, + 27, true, - "types", - "types" + "machine", + "machine" ], [ "term", "single-term", - 17247086344435786796, + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 16381206560620045048, - 3914201981705366923, + 16381206567230470443, + 15747559098284370939, 18446744073709551615, 18446744073709551615, - 196, - 202, - 196, - 202, - 39, - 40, + 158, + 164, + 158, + 164, + 28, + 29, true, - "images", - "images" + "models", + "models" ], [ "term", "single-term", - 17247086344435786796, + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 1839290100020230611, - 2397938769091018318, + 329104161571401725, + 6886205595361632203, 18446744073709551615, 18446744073709551615, - 228, - 239, - 228, - 239, - 46, - 47, + 168, + 173, + 168, + 173, + 30, + 31, true, - "scatterplot", - "scatterplot" + "order", + "order" ], [ "term", "single-term", - 17247086344435786796, + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 16102584389807428912, - 3793139059914902481, + 8106398484416916345, + 12761515448611326706, 18446744073709551615, 18446744073709551615, - 241, - 251, - 241, - 251, - 48, - 49, + 189, + 196, + 189, + 196, + 34, + 35, true, - "histograms", - "histograms" + "content", + "content" ], [ - "term", - "single-term", - 17247086344435786796, + "verb", + "compound-verb", + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 13953038768306043326, - 2217483007470679809, + 13481804153867000640, + 5921155616940636913, 18446744073709551615, 18446744073709551615, - 253, - 263, - 253, - 263, - 50, - 51, + 3, + 17, + 3, + 17, + 1, + 3, true, - "pie-charts", - "pie-charts" + "have presented", + "have presented" ], [ - "term", - "single-term", - 17247086344435786796, + "verb", + "single-verb", + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 389609625699055241, - 7447546965782188814, + 329104159219515955, + 5354365758875797437, 18446744073709551615, 18446744073709551615, - 292, - 296, - 292, - 296, - 59, - 60, + 36, + 41, + 36, + 41, + 7, + 8, true, - "goal", - "goal" + "based", + "based" ], [ - "term", - "single-term", - 17247086344435786796, + "verb", + "single-verb", + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 389609625696431489, - 7440840763973745685, + 2873440693780286732, + 4285909795825994377, 18446744073709551615, 18446744073709551615, - 326, - 330, - 326, - 330, - 66, - 67, + 58, + 68, + 58, + 68, + 11, + 13, true, - "data", - "data" + "can ingest", + "can ingest" ], [ - "term", - "single-term", - 17247086344435786796, + "verb", + "single-verb", + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 16381206560620045048, - 3914201981705340835, + 14650447832610756948, + 58344428677043651, 18446744073709551615, 18446744073709551615, - 363, - 369, - 363, - 369, - 73, - 74, + 132, + 140, + 132, + 140, + 25, + 26, true, - "images", - "images" + "advanced", + "advanced" ], [ - "term", - "single-term", - 17247086344435786796, + "verb", + "single-verb", + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 5428486186575573840, - 17552603483030949066, + 14639581097006750428, + 9301901239831102358, 18446744073709551615, 18446744073709551615, - 412, - 428, - 412, - 428, - 80, - 81, + 149, + 157, + 149, + 157, + 27, + 28, true, - "image-classifier", - "image-classifier" + "learning", + "learning" ], [ - "term", - "single-term", - 17247086344435786796, + "verb", + "single-verb", + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 8106477781724488761, - 4422931059285339225, + 8106397496930289884, + 12659480351226973306, 18446744073709551615, 18446744073709551615, - 467, - 474, - 467, - 474, - 90, - 91, + 177, + 184, + 177, + 184, + 32, + 33, true, - "quality", - "quality" + "extract", + "extract" ], [ - "term", - "single-term", - 17247086344435786796, + "verb", + "single-verb", + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 5731695876385560379, - 12754564211995509475, + 8106398484416229602, + 12761525411618135232, 18446744073709551615, 18446744073709551615, - 479, - 490, - 479, - 490, - 92, - 93, + 227, + 234, + 227, + 234, + 40, + 41, true, - "performance", - "performance" + "convert", + "convert" ], [ - "term", - "single-term", - 17247086344435786796, + "conn", + "single-conn", + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 8106478445190161533, - 8668956716153119308, + 15441160910541486538, + 5009177319356301500, 18446744073709551615, 18446744073709551615, - 543, - 550, - 543, - 550, - 103, - 104, + 165, + 167, + 165, + 167, + 29, + 30, true, - "results", - "results" + "in", + "in" ], [ - "term", - "single-term", - 17247086344435786796, + "conn", + "single-conn", + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 16381206560620045048, - 3914201981705337550, + 16381206565712212855, + 15774434041302426793, 18446744073709551615, 18446744073709551615, - 665, - 671, - 665, - 671, - 123, - 124, + 197, + 203, + 197, + 203, + 35, + 37, true, - "images", - "images" + "of the", + "of the" ], [ - "term", - "single-term", - 17247086344435786796, + "conn", + "single-conn", + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 16381206514091025767, - 4428872138347593094, + 16381206560517276114, + 4138873729787782213, 18446744073709551615, 18446744073709551615, - 713, - 719, - 713, - 719, - 133, - 134, + 238, + 244, + 238, + 244, + 42, + 44, true, - "Figure", - "Figure" + "into a", + "into a" ], [ - "verb", - "compound-verb", - 17247086344435786796, + "conn", + "single-conn", + 7038163015905900647, "TEXT", - "#/texts/94", + "#/texts/92", 1.0, - 6843908984328718198, - 4424980337438809569, + 15441160910541485865, + 5009177098714228529, 18446744073709551615, 18446744073709551615, - 18, - 32, - 18, + 174, + 176, + 174, + 176, + 31, 32, - 5, - 8, true, - "plan to extend", - "plan to extend" + "to", + "to" ], [ - "verb", - "compound-verb", - 17247086344435786796, + "sentence", + "", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 14998042519330616781, - 177101627084045088, + 5399734795549420383, + 9459426372965111375, 18446744073709551615, 18446744073709551615, - 76, - 96, - 76, - 96, - 18, - 22, + 0, + 102, + 0, + 102, + 0, + 17, true, - "would like to extend", - "would like to extend" + "The fundamental design choices in our solution have proven to enable scaling in three elementary ways.", + "The fundamental design choices in our solution have proven to enable scaling in three elementary ways." ], [ - "verb", - "compound-verb", - 17247086344435786796, + "sentence", + "", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 4420603704750285605, - 14167669410101881458, + 16807999257243449869, + 13297903776875612574, 18446744073709551615, 18446744073709551615, - 302, - 321, - 302, - 321, - 61, - 65, + 103, + 153, + 103, + 153, + 17, + 26, true, - "would be to extract", - "would be to extract" + "First, it can service multiple users concurrently.", + "First, it can service multiple users concurrently." ], [ - "verb", - "compound-verb", - 17247086344435786796, + "sentence", + "", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 16290083057699948816, - 15990868729997335654, + 13213872932381400279, + 13188215006770693544, 18446744073709551615, 18446744073709551615, - 441, - 462, - 441, - 462, - 85, - 89, + 154, + 251, + 154, + 251, + 26, + 46, true, - "would like to improve", - "would like to improve" + "Second, it can ingest, parse and apply machine learned models on many documents at the same time.", + "Second, it can ingest, parse and apply machine learned models on many documents at the same time." ], [ - "verb", - "compound-verb", - 17247086344435786796, + "sentence", + "", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 17236050900252224747, - 9854267715107878317, + 9242556889455087990, + 15971994141625715858, 18446744073709551615, 18446744073709551615, - 551, - 574, - 551, - 574, - 104, - 108, + 252, + 468, + 252, + 468, + 46, + 85, true, - "can be greatly improved", - "can be greatly improved" + "Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources.", + "Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources." ], [ - "verb", - "compound-verb", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 8208641893359681869, - 16932607482672372426, + 2281965028547407404, + 4585290171111099204, 18446744073709551615, 18446744073709551615, - 614, - 631, - 614, - 631, - 114, - 117, + 4, + 30, + 4, + 30, + 1, + 4, true, - "use are optimised", - "use are optimised" + "fundamental design choices", + "fundamental design choices" ], [ - "verb", - "compound-verb", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 14637951881518043285, - 16016201078485034145, + 10458141827175777973, + 662304186431118688, 18446744073709551615, 18446744073709551615, - 701, - 709, - 701, - 709, - 130, - 132, + 86, + 101, + 86, + 101, + 14, + 16, true, - "is shown", - "is shown" + "elementary ways", + "elementary ways" ], [ - "verb", - "single-verb", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 329104161828335551, - 7191149074974692359, + 17200993861033027072, + 10058402512815484380, 18446744073709551615, 18446744073709551615, - 152, - 157, - 152, - 157, - 31, - 32, + 125, + 139, + 125, + 139, + 22, + 24, true, - "image", - "image" + "multiple users", + "multiple users" ], [ - "verb", - "single-verb", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541486535, - 12448443551126566363, + 12462088721494412558, + 14795705853314288990, 18446744073709551615, 18446744073709551615, - 203, - 205, - 203, - 205, - 40, + 219, + 233, + 219, + 233, + 39, 41, true, - "is", - "is" + "many documents", + "many documents" ], [ - "verb", - "single-verb", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 8106397860663428876, - 7464848062962649547, + 6168880476795325400, + 1749109350869750559, 18446744073709551615, 18446744073709551615, - 526, - 533, - 526, - 533, - 100, - 101, + 241, + 250, + 241, + 250, + 43, + 45, true, - "believe", - "believe" + "same time", + "same time" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 16380809977974811061, - 16065202910059383934, + 4421383392096991748, + 13841033715090277122, 18446744073709551615, 18446744073709551615, - 0, - 6, - 0, - 6, - 0, - 2, + 276, + 293, + 276, + 293, + 52, + 54, true, - "In the", - "In the" + "compute resources", + "compute resources" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541486538, - 12448443553082805214, + 13127417780425802861, + 18242234552519957632, 18446744073709551615, 18446744073709551615, - 46, - 48, - 46, - 48, - 10, - 11, + 298, + 313, + 298, + 313, + 55, + 57, true, - "in", - "in" + "different tasks", + "different tasks" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541485670, - 12448449173148059932, + 871079831703051200, + 5077405699917193499, 18446744073709551615, 18446744073709551615, - 108, - 110, - 108, - 110, - 24, - 25, + 349, + 364, + 349, + 364, + 63, + 65, true, - "of", - "of" + "respective load", + "respective load" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 389609625618037948, - 7445535260585538379, + 10442897704134762600, + 3285893567564519587, 18446744073709551615, 18446744073709551615, - 137, - 141, - 137, - 141, - 28, - 29, + 451, + 467, + 451, + 467, + 82, + 84, true, - "with", - "with" + "enough resources", + "enough resources" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541485670, - 12448449173148052031, + 14635106751859230946, + 764401693493390910, 18446744073709551615, 18446744073709551615, - 184, - 186, - 184, - 186, - 36, - 37, + 38, + 46, + 38, + 46, + 6, + 7, true, - "of", - "of" + "solution", + "solution" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541485670, - 12448449173148050529, + 329104161667983915, + 8353591573424153686, 18446744073709551615, 18446744073709551615, - 193, - 195, - 193, - 195, - 38, - 39, + 177, + 182, + 177, + 182, + 32, + 33, true, - "of", - "of" + "parse", + "parse" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 12178341415895623120, - 598445003466491402, + 8106464587473865376, + 16865370909529075844, 18446744073709551615, 18446744073709551615, - 331, - 334, - 331, - 334, - 67, - 68, + 193, + 200, + 193, + 200, + 35, + 36, true, - "out", - "out" + "machine", + "machine" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 14814148868025447689, - 6811951436730744836, + 16381206567230470443, + 6296565769111805720, 18446744073709551615, 18446744073709551615, - 335, - 343, - 335, - 343, - 68, - 70, + 209, + 215, + 209, + 215, + 37, + 38, true, - "of these", - "of these" + "models", + "models" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541485670, - 12448449173147950427, + 329104161844229707, + 8223163135175074012, 18446744073709551615, 18446744073709551615, - 360, - 362, - 360, - 362, - 72, - 73, + 252, + 257, + 252, + 257, + 46, + 47, true, - "of", - "of" + "Third", + "Third" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 8106398472718381934, - 9575911640413642094, + 14814125365076808131, + 2596919094696196606, 18446744073709551615, 18446744073709551615, - 370, - 377, - 370, - 377, - 74, - 76, + 321, + 329, + 321, + 329, + 59, + 60, true, - "after a", - "after a" + "platform", + "platform" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 8106477988572616406, - 15264315134668474563, + 2703018679320364082, + 16899905581150215026, 18446744073709551615, 18446744073709551615, - 404, - 411, - 404, - 411, - 78, - 80, + 372, + 382, + 372, + 382, + 67, + 68, true, - "with an", - "with an" + "conversion", + "conversion" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541485670, - 12448449173148007931, + 6167933651658664291, + 2405213947196016063, 18446744073709551615, 18446744073709551615, - 491, - 493, - 491, - 493, - 93, - 94, + 386, + 395, + 386, + 395, + 69, + 70, true, - "of", - "of" + "documents", + "documents" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 14634130761162415388, - 3833651190149238108, + 14814125365076808131, + 2596919094696188986, 18446744073709551615, 18446744073709551615, - 534, - 542, - 534, - 542, - 101, - 103, + 403, + 411, + 403, + 411, + 72, + 73, true, - "that the", - "that the" + "platform", + "platform" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 6168057894310307081, - 11769172586530017585, + 329104159219994925, + 8265223761504278760, 18446744073709551615, 18446744073709551615, - 575, - 584, - 575, - 584, - 108, - 110, + 422, + 427, + 422, + 427, + 76, + 77, true, - "since the", - "since the" + "times", + "times" ], [ - "conn", - "single-conn", - 17247086344435786796, + "term", + "single-term", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 12178341415895625940, - 598444982766319560, + 389609625631241985, + 2218225659402359325, 18446744073709551615, 18446744073709551615, - 632, - 635, - 632, - 635, - 117, - 118, + 439, + 443, + 439, + 443, + 79, + 80, true, - "for", - "for" + "time", + "time" ], [ - "conn", - "single-conn", - 17247086344435786796, + "verb", + "compound-verb", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541485670, - 12448449173147963145, + 3403952970044578622, + 10903875917460680118, 18446744073709551615, 18446744073709551615, - 672, - 674, - 672, - 674, - 124, - 125, + 47, + 76, + 47, + 76, + 7, + 12, true, - "of", - "of" + "have proven to enable scaling", + "have proven to enable scaling" ], [ - "conn", - "single-conn", - 17247086344435786796, + "verb", + "compound-verb", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541487053, - 12448443593105791703, + 15412069422981600492, + 6547325180036345245, 18446744073709551615, 18446744073709551615, - 698, - 700, - 698, - 700, - 129, - 130, + 330, + 342, + 330, + 342, + 60, + 62, true, - "as", - "as" + "according to", + "according to" ], [ - "conn", - "single-conn", - 17247086344435786796, + "verb", + "single-verb", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541486538, - 12448443553082893684, + 14892726286148891751, + 10384184194505177525, 18446744073709551615, 18446744073709551615, - 710, - 712, - 710, - 712, - 132, - 133, + 113, + 124, + 113, + 124, + 20, + 22, true, - "in", - "in" + "can service", + "can service" ], [ - "conn", - "single-conn", - 17247086344435786796, + "verb", + "single-verb", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541485865, - 12448449225007565696, + 2873440693780286732, + 15985974084754193151, 18446744073709551615, 18446744073709551615, - 23, - 25, - 23, - 25, - 6, - 7, + 165, + 175, + 165, + 175, + 29, + 31, true, - "to", - "to" + "can ingest", + "can ingest" ], [ - "conn", - "single-conn", - 17247086344435786796, + "verb", + "single-verb", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541485865, - 12448449225007823792, + 329104159174415764, + 8268242647359376883, 18446744073709551615, 18446744073709551615, - 87, - 89, - 87, - 89, - 20, - 21, + 187, + 192, + 187, + 192, + 34, + 35, true, - "to", - "to" + "apply", + "apply" ], [ - "conn", - "single-conn", - 17247086344435786796, + "verb", + "single-verb", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541485865, - 12448449225007818811, + 8106342444693204894, + 6388168354172051323, 18446744073709551615, 18446744073709551615, - 149, - 151, - 149, - 151, - 30, - 31, + 201, + 208, + 201, + 208, + 36, + 37, true, - "to", - "to" + "learned", + "learned" ], [ - "conn", - "single-conn", - 17247086344435786796, + "verb", + "single-verb", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541485865, - 12448449224998732616, + 5949049089925459445, + 6157765644161528738, 18446744073709551615, 18446744073709551615, - 311, - 313, - 311, - 313, - 63, - 64, + 262, + 271, + 262, + 271, + 49, + 51, true, - "to", - "to" + "can scale", + "can scale" ], [ - "conn", - "single-conn", - 17247086344435786796, + "verb", + "single-verb", + 1508626318915838319, "TEXT", - "#/texts/94", + "#/texts/93", 1.0, - 15441160910541485865, - 12448449224998740693, + 15441160910541486535, + 12447978246358110993, 18446744073709551615, 18446744073709551615, - 452, - 454, - 452, - 454, - 87, - 88, + 412, + 414, + 412, + 414, + 73, + 74, true, - "to", - "to" + "is", + "is" ], [ - "expression", - "word-concatenation", - 10287541089279789496, + "verb", + "single-verb", + 1508626318915838319, "TEXT", - "#/texts/95", + "#/texts/93", 1.0, - 17249225789261661029, - 3807297211102715149, + 8106396909840561507, + 15824344309645083727, 18446744073709551615, 18446744073709551615, - 12, - 28, - 12, - 28, - 1, - 2, + 428, + 435, + 428, + 435, + 77, + 78, true, - "data-parallelism", - "data-parallelism" + "bounded", + "bounded" ], [ - "expression", - "word-concatenation", - 10287541089279789496, + "verb", + "single-verb", + 1508626318915838319, "TEXT", - "#/texts/95", + "#/texts/93", 1.0, - 8685358683472264781, - 17027290145523372529, + 329104159209890620, + 8264779023608497036, 18446744073709551615, 18446744073709551615, - 87, - 105, - 87, - 105, - 12, - 13, + 445, + 450, + 445, + 450, + 81, + 82, true, - "user-customisation", - "user-customisation" + "given", + "given" ], [ - "sentence", - "", - 10287541089279789496, + "conn", + "single-conn", + 1508626318915838319, "TEXT", - "#/texts/95", + "#/texts/93", 1.0, - 10588183979877639592, - 1367000647117206524, + 15441160910541486538, + 12447978247236708799, 18446744073709551615, 18446744073709551615, - 12, - 119, - 12, - 119, - 1, - 15, + 31, + 33, + 31, + 33, + 4, + 5, true, - "data-parallelism in order to speed up the training and provide interactive user-customisation capabilities.", - "data-parallelism in order to speed up the training and provide interactive user-customisation capabilities." + "in", + "in" ], [ - "term", - "single-term", - 10287541089279789496, + "conn", + "single-conn", + 1508626318915838319, "TEXT", - "#/texts/95", + "#/texts/93", 1.0, - 9998261106336570604, - 4856078764969945002, + 15441160910541486538, + 12447978247236781944, 18446744073709551615, 18446744073709551615, - 75, - 118, - 75, - 118, - 11, - 14, + 77, + 79, + 77, + 79, + 12, + 13, true, - "interactive user-customisation capabilities", - "interactive user-customisation capabilities" + "in", + "in" ], [ - "term", - "single-term", - 10287541089279789496, + "conn", + "single-conn", + 1508626318915838319, "TEXT", - "#/texts/95", + "#/texts/93", 1.0, - 17249225789261661029, - 3807297211102715149, + 15441160910541485678, + 12447978235992890146, 18446744073709551615, 18446744073709551615, - 12, - 28, - 12, - 28, - 1, - 2, + 216, + 218, + 216, + 218, + 38, + 39, true, - "data-parallelism", - "data-parallelism" + "on", + "on" ], [ - "term", - "single-term", - 10287541089279789496, + "conn", + "single-conn", + 1508626318915838319, "TEXT", - "#/texts/95", + "#/texts/93", 1.0, - 329104161571401725, - 3792502362005124423, + 16381206568372064271, + 7263818147332248111, 18446744073709551615, 18446744073709551615, - 32, - 37, - 32, - 37, - 3, - 4, + 234, + 240, + 234, + 240, + 41, + 43, true, - "order", - "order" + "at the", + "at the" ], [ - "term", - "single-term", - 10287541089279789496, + "conn", + "single-conn", + 1508626318915838319, "TEXT", - "#/texts/95", + "#/texts/93", 1.0, - 14634153919632515335, - 3840780376526095372, + 12178341415895625940, + 10930510655083395949, 18446744073709551615, 18446744073709551615, + 294, + 297, + 294, + 297, 54, - 62, - 54, - 62, - 8, - 9, + 55, true, - "training", - "training" - ], + "for", + "for" + ], [ - "verb", - "single-verb", - 10287541089279789496, + "conn", + "single-conn", + 1508626318915838319, "TEXT", - "#/texts/95", + "#/texts/93", 1.0, - 329104161639049345, - 3799043945253257651, + 16381206566339127348, + 6281427824769892480, 18446744073709551615, 18446744073709551615, - 41, - 46, - 41, - 46, - 5, - 6, + 314, + 320, + 314, + 320, + 57, + 59, true, - "speed", - "speed" + "on the", + "on the" ], [ - "verb", - "single-verb", - 10287541089279789496, + "conn", + "single-conn", + 1508626318915838319, "TEXT", - "#/texts/95", + "#/texts/93", 1.0, - 8106476000214061408, - 9620881782228868220, + 16381206579218901666, + 7932367388675800903, 18446744073709551615, 18446744073709551615, + 365, + 371, + 365, + 371, + 65, 67, - 74, - 67, - 74, - 10, - 11, true, - "provide", - "provide" + "so the", + "so the" ], [ "conn", "single-conn", - 10287541089279789496, + 1508626318915838319, "TEXT", - "#/texts/95", + "#/texts/93", 1.0, - 15441160910541486538, - 14667436044722575629, + 15441160910541485670, + 12447978245548248810, 18446744073709551615, 18446744073709551615, - 29, - 31, - 29, - 31, - 2, - 3, + 383, + 385, + 383, + 385, + 68, + 69, true, - "in", - "in" + "of", + "of" ], [ "conn", "single-conn", - 10287541089279789496, + 1508626318915838319, "TEXT", - "#/texts/95", + "#/texts/93", 1.0, - 15441160910541485865, - 14667435948507858038, + 16381206566339127348, + 6281427824769909768, 18446744073709551615, 18446744073709551615, - 38, - 40, - 38, - 40, - 4, - 5, + 396, + 402, + 396, + 402, + 70, + 72, true, - "to", - "to" + "on the", + "on the" ], [ - "sentence", - "", - 15983582675278266440, + "conn", + "single-conn", + 1508626318915838319, "TEXT", - "#/texts/97", + "#/texts/93", 1.0, - 5556222901900980902, - 15746519596852768008, + 16381206568372178543, + 7263503225869480888, 18446744073709551615, 18446744073709551615, - 0, - 127, - 0, - 127, - 0, - 22, + 415, + 421, + 415, + 421, + 74, + 76, true, - "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system.", - "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system." + "at all", + "at all" ], [ - "term", - "enum-term-mark-4", - 15983582675278266440, + "conn", + "single-conn", + 1508626318915838319, "TEXT", - "#/texts/97", + "#/texts/93", 1.0, - 13556182311682325280, - 9761797471225359212, + 15441160910541486538, + 12447978247236683376, 18446744073709551615, 18446744073709551615, - 32, - 66, - 32, - 66, - 6, - 11, + 436, + 438, + 436, + 438, + 78, + 79, true, - "Roxana Istrate and Matthieu Mottet", - "Roxana Istrate and Matthieu Mottet" + "in", + "in" ], [ - "term", - "single-term", - 15983582675278266440, + "conn", + "single-conn", + 1508626318915838319, "TEXT", - "#/texts/97", + "#/texts/93", 1.0, - 7949755686502200390, - 668350583233417234, + 15441160910541485865, + 12447978233189958175, 18446744073709551615, 18446744073709551615, - 32, - 46, - 32, - 46, - 6, - 8, + 59, + 61, + 59, + 61, + 9, + 10, true, - "Roxana Istrate", - "Roxana Istrate" + "to", + "to" ], [ - "term", - "single-term", - 15983582675278266440, + "conn", + "single-conn", + 1508626318915838319, "TEXT", - "#/texts/97", + "#/texts/93", 1.0, - 422584487912656734, - 11698320462170095527, + 15441160910541485865, + 12447978233189813654, 18446744073709551615, 18446744073709551615, - 51, - 66, - 51, - 66, - 9, - 11, + 340, + 342, + 340, + 342, + 61, + 62, true, - "Matthieu Mottet", - "Matthieu Mottet" + "to", + "to" ], [ - "term", - "single-term", - 15983582675278266440, + "numval", + "ival", + 17247086344435786796, "TEXT", - "#/texts/97", + "#/texts/94", 1.0, - 244635901031456436, - 9625433519480199700, + 17767354399704235157, + 6666235790308819566, 18446744073709551615, 18446744073709551615, - 116, - 126, - 116, - 126, - 19, - 21, + 720, + 721, + 720, + 721, + 134, + 135, true, - "CCS system", - "CCS system" + "5", + "5" ], [ - "term", - "single-term", - 15983582675278266440, + "parenthesis", + "round brackets", + 17247086344435786796, "TEXT", - "#/texts/97", + "#/texts/94", 1.0, - 8106397759446161562, - 5642353918280438479, + 772704748867907067, + 17873771936385193962, 18446744073709551615, 18446744073709551615, - 4, - 11, - 4, - 11, - 1, - 2, + 215, + 286, + 215, + 286, + 42, + 57, true, - "authors", - "authors" + "(e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc)", + "(e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc)" ], [ - "term", - "single-term", - 15983582675278266440, + "parenthesis", + "round brackets", + 17247086344435786796, "TEXT", - "#/texts/97", + "#/texts/94", 1.0, - 4603153860084293890, - 9630773090599505701, + 7596548548401207156, + 5106769991605743942, 18446744073709551615, 18446744073709551615, - 77, - 89, - 77, - 89, - 13, - 14, + 697, + 722, + 697, + 722, + 128, + 136, true, - "contribution", - "contribution" + "(as is shown in Figure 5)", + "(as is shown in Figure 5)" ], [ - "term", - "single-term", - 15983582675278266440, + "expression", + "common", + 17247086344435786796, "TEXT", - "#/texts/97", + "#/texts/94", 1.0, - 1525875096007260836, - 16905177906202921866, + 15441160910541487324, + 12448443577304465400, 18446744073709551615, 18446744073709551615, - 97, - 108, - 97, - 108, - 16, - 17, + 216, + 220, + 216, + 220, + 43, + 44, true, - "development", - "development" + "eg", + "e.g." ], [ - "verb", - "compound-verb", - 15983582675278266440, + "expression", + "word-concatenation", + 17247086344435786796, "TEXT", - "#/texts/97", + "#/texts/94", 1.0, - 17737636287413194494, - 18246863768738587194, + 13953038768306043326, + 2217483007470679809, 18446744073709551615, 18446744073709551615, - 12, - 31, - 12, - 31, - 2, - 6, + 253, + 263, + 253, + 263, + 50, + 51, true, - "would like to thank", - "would like to thank" + "pie-charts", + "pie-charts" ], [ - "conn", - "single-conn", - 15983582675278266440, + "expression", + "word-concatenation", + 17247086344435786796, "TEXT", - "#/texts/97", + "#/texts/94", 1.0, - 12178341415895625940, - 9042585404458343529, + 5428486186575573840, + 17552603483030949066, 18446744073709551615, 18446744073709551615, - 67, - 70, - 67, - 70, - 11, - 12, + 412, + 428, + 412, + 428, + 80, + 81, true, - "for", - "for" + "image-classifier", + "image-classifier" ], [ - "conn", - "single-conn", - 15983582675278266440, + "sentence", + "", + 17247086344435786796, "TEXT", - "#/texts/97", + "#/texts/94", 1.0, - 16381206565712212855, - 15703671923459609107, + 9576287605285270893, + 7775032662306861151, 18446744073709551615, 18446744073709551615, - 109, - 115, - 109, - 115, - 17, - 19, + 0, + 65, + 0, + 65, + 0, + 15, true, - "of the", - "of the" + "In the future, we plan to extend the platform in two major areas.", + "In the future, we plan to extend the platform in two major areas." ], [ - "conn", - "single-conn", - 15983582675278266440, + "sentence", + "", + 17247086344435786796, "TEXT", - "#/texts/97", + "#/texts/94", 1.0, - 15441160910541485865, - 16366793807298640842, + 7980828285556281738, + 2544051083396498287, 18446744073709551615, 18446744073709551615, - 23, - 25, - 23, - 25, - 4, - 5, + 66, + 172, + 66, + 172, + 15, + 34, true, - "to", - "to" + "First, we would like to extend the number of microservices, especially with regard to image understanding.", + "First, we would like to extend the number of microservices, especially with regard to image understanding." ], [ - "conn", - "single-conn", - 15983582675278266440, + "sentence", + "", + 17247086344435786796, "TEXT", - "#/texts/97", + "#/texts/94", 1.0, - 16381206519425733256, - 17710263008813390102, + 16816675794156539317, + 4106452168371569212, 18446744073709551615, 18446744073709551615, - 90, - 96, - 90, - 96, - 14, - 16, + 173, + 287, + 173, + 287, + 34, + 58, true, - "to the", - "to the" + "The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc).", + "The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc)." ], [ - "numval", - "year", - 12711351442546714716, + "sentence", + "", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 389609625548777262, - 15175051322594687321, + 16727745954821675360, + 18300594417076082954, 18446744073709551615, 18446744073709551615, - 175, - 179, - 175, - 179, - 39, - 40, + 288, + 429, + 288, + 429, + 58, + 82, true, - "2020", - "2020" + "The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier.", + "The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier." ], [ - "numval", - "ival", - 12711351442546714716, + "sentence", + "", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 16380810010060182105, - 2087358970220343258, + 10448641789434054504, + 14093320860906874170, 18446744073709551615, 18446744073709551615, - 232, - 238, - 232, - 238, - 47, - 48, + 430, + 513, + 430, + 513, + 82, + 98, true, - "721027", - "721027" + "Second, we would like to improve the quality and performance of our default models.", + "Second, we would like to improve the quality and performance of our default models." ], [ - "link", - "url", - 12711351442546714716, + "sentence", + "", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 4558951843677957919, - 6153426188298487244, + 3084657715463842285, + 15630767766630582663, 18446744073709551615, 18446744073709551615, - 44, - 62, - 44, - 62, - 9, - 16, + 514, + 723, + 514, + 723, + 98, + 137, true, - "http://nccr-marvel", - "http://nccr-marvel" + "We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5).", + "We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5)." ], [ - "link", - "url", - 12711351442546714716, + "term", + "enum-term-mark-2", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 1840514147198720564, - 15805877038624594621, + 18219039247346551478, + 2994436876810062612, 18446744073709551615, 18446744073709551615, - 240, - 267, - 240, - 267, - 49, - 60, + 216, + 239, + 216, + 239, + 43, + 47, true, - "http://the-force-project.eu", - "http://the-force-project.eu" + "eg line & scatterplot", + "e.g. line & scatterplot" ], [ - "parenthesis", - "round brackets", - 12711351442546714716, + "term", + "enum-term-mark-2", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 9725913318321680311, - 4961995203860234930, + 2459701502714558679, + 5298793252682520889, 18446744073709551615, 18446744073709551615, - 43, - 67, - 43, - 67, - 8, - 19, + 467, + 490, + 467, + 490, + 90, + 93, true, - "(http://nccr-marvel. ch)", - "(http://nccr-marvel. ch)" + "quality and performance", + "quality and performance" ], [ - "parenthesis", - "round brackets", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 2988796312331131177, - 6687402703764012002, + 16589376492252179077, + 7295144040672653108, 18446744073709551615, 18446744073709551615, - 239, - 268, - 239, - 268, - 48, - 61, + 53, + 64, + 53, + 64, + 12, + 14, true, - "(http://the-force-project.eu)", - "(http://the-force-project.eu)" + "major areas", + "major areas" ], [ - "expression", - "wtoken-concatenation", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 10991632387650324970, - 16837241231127303249, + 8106398377967204844, + 1921596529468359029, 18446744073709551615, 18446744073709551615, - 186, - 198, - 186, - 198, - 41, - 42, + 216, + 225, + 216, + 225, + 43, + 45, true, - "NMBP-23-2016", - "NMBP-23-2016" + "eg line", + "e.g. line" ], [ - "sentence", - "", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 9490709138959189212, - 12250691288144973169, + 5358230985886796623, + 5106522770952356562, 18446744073709551615, 18446744073709551615, - 0, - 117, - 0, - 117, - 0, - 28, + 265, + 280, + 265, + 280, + 52, + 54, true, - "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation.", - "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation." + "geographic maps", + "geographic maps" ], [ - "sentence", - "", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 1102470314652222820, - 15601071363037323756, + 15357232380281159303, + 112568471828176926, 18446744073709551615, 18446744073709551615, - 118, - 269, - 118, - 269, - 28, - 62, + 344, + 359, + 344, + 359, + 70, + 72, true, - "MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", - "MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu)." + "individual type", + "individual type" ], [ "term", "single-term", - 12711351442546714716, + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 9107120802959375325, - 14854418713978759874, + 3849116425022465253, + 9034086680124657749, 18446744073709551615, 18446744073709551615, - 31, - 42, - 31, - 42, - 6, - 8, + 378, + 403, + 378, + 403, + 76, + 78, true, - "NCCR MARVEL", - "NCCR MARVEL" + "successful identification", + "successful identification" ], [ "term", "single-term", - 12711351442546714716, + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 4312908239263712749, - 12629609910975902459, + 1915006193249717419, + 4993787564856558201, 18446744073709551615, 18446744073709551615, - 83, - 116, - 83, - 116, - 23, - 27, + 498, + 512, + 498, + 512, + 95, + 97, true, - "Swiss National Science Foundation", - "Swiss National Science Foundation" + "default models", + "default models" ], [ "term", "single-term", - 12711351442546714716, + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 15770732106686559794, - 7781941783302435281, + 3374009463271020691, + 3843260871587525071, 18446744073709551615, 18446744073709551615, - 142, - 155, - 142, - 155, - 33, - 35, + 585, + 600, + 585, + 600, + 110, + 112, true, - "FORCE project", - "FORCE project" + "neural networks", + "neural networks" ], [ "term", "single-term", - 12711351442546714716, + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 4392060515500483083, - 8866459764729165903, + 10900025937134233159, + 18131173731884203799, 18446744073709551615, 18446744073709551615, - 209, - 231, - 209, - 231, - 44, - 47, + 636, + 655, + 636, + 655, + 118, + 120, true, - "Grant agreement number", - "Grant agreement number" + "photographic images", + "photographic images" ], [ "term", "single-term", - 12711351442546714716, + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 389609625633592023, - 15303732731624508399, + 5766847864654328399, + 4382574540747563376, 18446744073709551615, 18446744073709551615, - 5, - 9, - 5, - 9, - 1, - 2, + 675, + 696, + 675, + 696, + 125, + 128, true, - "work", - "work" + "parsed document pages", + "parsed document pages" ], [ "term", "single-term", - 12711351442546714716, + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 15441160910541486943, - 7829630847478764393, + 6604953305718748559, + 8562548416720689057, 18446744073709551615, 18446744073709551615, - 64, - 66, - 64, - 66, - 17, - 18, + 756, + 776, + 756, + 776, + 143, + 146, true, - "ch", - "ch" + "deep learning models", + "deep learning models" ], [ "term", "single-term", - 12711351442546714716, + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 15441160910541480587, - 7830721483973022036, + 16381206565274670318, + 14238598565348925208, 18446744073709551615, 18446744073709551615, - 118, - 120, - 118, - 120, - 28, - 29, + 7, + 13, + 7, + 13, + 2, + 3, true, - "MD", - "MD" + "future", + "future" ], [ "term", "single-term", - 12711351442546714716, + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 8106351288219429194, - 18213714777089539961, + 14814125365076808131, + 2312829961765099304, 18446744073709551615, 18446744073709551615, - 167, - 174, - 167, - 174, - 38, - 39, + 37, + 45, + 37, + 45, + 9, + 10, true, - "Horizon", - "Horizon" + "platform", + "platform" ], [ "term", "single-term", - 12711351442546714716, + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 10991632387650324970, - 16837241231127303249, + 16381206574973295053, + 1926660952952474766, 18446744073709551615, 18446744073709551615, - 186, - 198, - 186, - 198, - 41, - 42, + 101, + 107, + 101, + 107, + 23, + 24, true, - "NMBP-23-2016", - "NMBP-23-2016" + "number", + "number" ], [ - "verb", - "compound-verb", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 13041846394845825316, - 5320956231753433918, + 990358581043194791, + 6104208925519602427, 18446744073709551615, 18446744073709551615, - 10, - 23, - 10, - 23, - 2, - 4, + 111, + 124, + 111, + 124, + 25, + 26, true, - "was supported", - "was supported" + "microservices", + "microservices" ], [ - "verb", - "compound-verb", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 13041846394845825316, - 5320956231753459128, + 16381206521526353544, + 4410027063676095069, 18446744073709551615, 18446744073709551615, - 121, - 134, - 121, - 134, + 142, + 148, + 142, + 148, 29, - 31, + 30, true, - "was supported", - "was supported" + "regard", + "regard" ], [ - "verb", - "single-verb", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 16381206565272093797, - 5039615147538801699, + 11827147635933835345, + 3554885262491213918, 18446744073709551615, 18446744073709551615, - 69, - 75, - 69, - 75, - 20, - 21, + 158, + 171, + 158, + 171, + 32, + 33, true, - "funded", - "funded" + "understanding", + "understanding" ], [ - "verb", - "single-verb", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 16381206565272093797, - 5039615147538790981, + 16381206574973295053, + 1926660952951671347, 18446744073709551615, 18446744073709551615, - 157, - 163, - 157, - 163, + 177, + 183, + 177, + 183, + 35, 36, - 37, true, - "funded", - "funded" + "number", + "number" ], [ - "verb", - "single-verb", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 389609625695109591, - 15313901038780033729, + 329104159243796903, + 7082055202846522668, 18446744073709551615, 18446744073709551615, - 199, - 203, - 199, - 203, - 42, - 43, + 187, + 192, + 187, + 192, + 37, + 38, true, - "call", - "call" + "types", + "types" ], [ - "conn", - "single-conn", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 16381206574363061705, - 5224141779864768374, + 16381206560620045048, + 3914201981705366923, 18446744073709551615, 18446744073709551615, - 24, - 30, - 24, - 30, - 4, - 6, + 196, + 202, + 196, + 202, + 39, + 40, true, - "by the", - "by the" + "images", + "images" ], [ - "conn", - "single-conn", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 16381206574363061705, - 5224141779864730347, + 1839290100020230611, + 2397938769091018318, 18446744073709551615, 18446744073709551615, - 76, - 82, - 76, - 82, - 21, - 23, + 228, + 239, + 228, + 239, + 46, + 47, true, - "by the", - "by the" + "scatterplot", + "scatterplot" ], [ - "conn", - "single-conn", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 16381206574363061705, - 5224141779864726325, + 16102584389807428912, + 3793139059914902481, 18446744073709551615, 18446744073709551615, - 135, - 141, - 135, - 141, - 31, - 33, + 241, + 251, + 241, + 251, + 48, + 49, true, - "by the", - "by the" + "histograms", + "histograms" ], [ - "conn", - "single-conn", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 15441160910541486989, - 7829629584886114826, + 13953038768306043326, + 2217483007470679809, 18446744073709551615, 18446744073709551615, - 164, - 166, - 164, - 166, - 37, - 38, + 253, + 263, + 253, + 263, + 50, + 51, true, - "by", - "by" + "pie-charts", + "pie-charts" ], [ - "conn", - "single-conn", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 329104159159151530, - 12430892283433612669, + 389609625699055241, + 7447546965782188814, 18446744073709551615, 18446744073709551615, - 180, - 185, - 180, - 185, - 40, - 41, + 292, + 296, + 292, + 296, + 59, + 60, true, - "under", - "under" + "goal", + "goal" ], [ - "conn", - "single-conn", - 12711351442546714716, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/98", + "#/texts/94", 1.0, - 389609625618037948, - 15311592167218177731, + 389609625696431489, + 7440840763973745685, 18446744073709551615, 18446744073709551615, - 204, - 208, - 204, - 208, - 43, - 44, + 326, + 330, + 326, + 330, + 66, + 67, true, - "with", - "with" + "data", + "data" ], [ - "reference", - "author", - 1712774266196702392, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/100", + "#/texts/94", 1.0, - 10921193442290853772, - 7808176325166967948, + 16381206560620045048, + 3914201981705340835, 18446744073709551615, 18446744073709551615, - 4, - 21, - 4, - 21, - 1, - 4, + 363, + 369, + 363, + 369, + 73, + 74, true, - "A. Antonacopoulos", - "A. Antonacopoulos" + "images", + "images" ], [ - "reference", - "author", - 1712774266196702392, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/100", + "#/texts/94", 1.0, - 5181382481262336037, - 5307751930075227018, + 5428486186575573840, + 17552603483030949066, 18446744073709551615, 18446744073709551615, - 23, - 34, - 23, - 34, - 5, - 8, + 412, + 428, + 412, + 428, + 80, + 81, true, - "C. Clausner", - "C. Clausner" + "image-classifier", + "image-classifier" ], [ - "reference", - "author", - 1712774266196702392, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/100", + "#/texts/94", 1.0, - 18410882341323932977, - 3950678732393374894, + 8106477781724488761, + 4422931059285339225, 18446744073709551615, 18446744073709551615, - 36, - 51, - 36, - 51, - 9, - 12, + 467, + 474, + 467, + 474, + 90, + 91, true, - "C. Papadopoulos", - "C. Papadopoulos" + "quality", + "quality" ], [ - "reference", - "author", - 1712774266196702392, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/100", + "#/texts/94", 1.0, - 6326253284428776844, - 2242368337149903292, + 5731695876385560379, + 12754564211995509475, 18446744073709551615, 18446744073709551615, - 57, - 73, - 57, - 73, - 14, - 18, + 479, + 490, + 479, + 490, + 92, + 93, true, - "S. Pletschacher.", - "S. Pletschacher." + "performance", + "performance" ], [ - "reference", - "citation-number", - 1712774266196702392, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/100", + "#/texts/94", 1.0, - 12178341415895551530, - 18332345913337968356, + 8106478445190161533, + 8668956716153119308, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 543, + 550, + 543, + 550, + 103, + 104, true, - "[1]", - "[1]" + "results", + "results" ], [ - "reference", - "container-title", - 1712774266196702392, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/100", + "#/texts/94", 1.0, - 2527079864200222812, - 474810476780653321, + 16381206560620045048, + 3914201981705337550, 18446744073709551615, 18446744073709551615, - 161, - 249, - 161, - 249, - 30, - 42, + 665, + 671, + 665, + 671, + 123, + 124, true, - "In Proceedings of the 13th International Conference on Document Analysis and Recognition", - "In Proceedings of the 13th International Conference on Document Analysis and Recognition" + "images", + "images" ], [ - "reference", - "container-title", - 1712774266196702392, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/100", + "#/texts/94", 1.0, - 6558131902220562236, - 4761966619744782752, + 16381206514091025767, + 4428872138347593094, 18446744073709551615, 18446744073709551615, - 251, - 260, - 251, - 260, - 43, - 45, + 713, + 719, + 713, + 719, + 133, + 134, true, - "ICDAR2015", - "ICDAR2015" + "Figure", + "Figure" ], [ - "reference", - "date", - 1712774266196702392, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/100", + "#/texts/94", 1.0, - 389609625548777059, - 4138332198474599496, + 14639580755784032837, + 10701824725110310827, 18446744073709551615, 18446744073709551615, - 74, - 78, - 74, - 78, - 18, - 19, + 727, + 735, + 727, + 735, + 138, + 139, true, - "2015", - "2015" + "leverage", + "leverage" ], [ - "reference", - "date", - 1712774266196702392, + "term", + "single-term", + 17247086344435786796, "TEXT", - "#/texts/100", + "#/texts/94", 1.0, - 10303630957638511768, - 3815340683710445282, + 12178341415895516060, + 598493059994502436, 18446744073709551615, 18446744073709551615, - 270, - 279, - 270, - 279, - 49, - 50, + 749, + 752, + 749, + 752, + 141, + 142, true, - "1151-1155", - "1151-1155" + "use", + "use" ], [ - "reference", - "location", - 1712774266196702392, + "verb", + "compound-verb", + 17247086344435786796, "TEXT", - "#/texts/100", + "#/texts/94", 1.0, - 329104162200796337, - 14591806354842233425, + 6843908984328718198, + 4424980337438809569, 18446744073709551615, 18446744073709551615, - 263, - 268, - 263, - 268, - 47, - 48, + 18, + 32, + 18, + 32, + 5, + 8, true, - "Nancy", - "Nancy" + "plan to extend", + "plan to extend" ], [ - "reference", - "title", - 1712774266196702392, + "verb", + "compound-verb", + 17247086344435786796, "TEXT", - "#/texts/100", + "#/texts/94", 1.0, - 17804212744220731295, - 13329383501201933373, + 14998042519330616781, + 177101627084045088, 18446744073709551615, 18446744073709551615, - 80, - 159, - 80, - 159, - 20, - 29, + 76, + 96, + 76, + 96, + 18, + 22, true, - "ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015", - "ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015" + "would like to extend", + "would like to extend" ], [ - "numval", - "year", - 14718288547983000340, + "verb", + "compound-verb", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 389609625548757414, - 14515784463162085628, + 4420603704750285605, + 14167669410101881458, 18446744073709551615, 18446744073709551615, - 17, - 21, - 17, - 21, - 4, - 5, + 302, + 321, + 302, + 321, + 61, + 65, true, - "2001", - "2001" + "would be to extract", + "would be to extract" ], [ - "numval", - "year", - 14718288547983000340, + "verb", + "compound-verb", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 389609625548757414, - 14515784463162082595, + 16290083057699948816, + 15990868729997335654, 18446744073709551615, 18446744073709551615, - 70, - 74, - 70, - 74, - 17, - 18, + 441, + 462, + 441, + 462, + 85, + 89, true, - "2001", - "2001" + "would like to improve", + "would like to improve" ], [ - "numval", - "fval", - 14718288547983000340, + "verb", + "compound-verb", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 8104408072666159999, - 6544755582293006081, + 17236050900252224747, + 9854267715107878317, 18446744073709551615, 18446744073709551615, - 99, - 106, - 99, - 106, - 30, - 31, + 551, + 574, + 551, + 574, + 104, + 108, true, - "10.1023", - "10.1023" + "can be greatly improved", + "can be greatly improved" ], [ - "numval", - "irng", - 14718288547983000340, + "verb", + "compound-verb", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 389609625655395305, - 14454171207833729215, + 8208641893359681869, + 16932607482672372426, 18446744073709551615, 18446744073709551615, - 77, - 81, - 77, - 81, - 20, - 21, + 614, + 631, + 614, + 631, + 114, + 117, true, - "5-32", - "5-32" + "use are optimised", + "use are optimised" ], [ - "numval", - "ival", - 14718288547983000340, + "verb", + "compound-verb", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 15441160910541486271, - 2222475241750256418, + 14637951881518043285, + 16016201078485034145, 18446744073709551615, 18446744073709551615, - 56, - 58, - 56, - 58, - 11, - 12, + 701, + 709, + 701, + 709, + 130, + 132, true, - "45", - "45" + "is shown", + "is shown" ], [ - "numval", - "ival", - 14718288547983000340, + "verb", + "single-verb", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 17767354399704235161, - 1208869658482268274, + 329104161828335551, + 7191149074974692359, 18446744073709551615, 18446744073709551615, - 60, - 61, - 60, - 61, - 13, - 14, + 152, + 157, + 152, + 157, + 31, + 32, true, - "1", - "1" + "image", + "image" ], [ - "numval", - "ival", - 14718288547983000340, + "verb", + "single-verb", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 15441160910541481918, - 2222473849477022423, + 15441160910541486535, + 12448443551126566363, 18446744073709551615, 18446744073709551615, - 63, - 65, - 63, - 65, - 15, - 16, + 203, + 205, + 203, + 205, + 40, + 41, true, - "01", - "01" + "is", + "is" ], [ - "numval", - "ival", - 14718288547983000340, + "verb", + "single-verb", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 8238939589069097802, - 7985792528150377935, + 8106397860663428876, + 7464848062962649547, 18446744073709551615, 18446744073709551615, - 109, - 122, - 109, - 122, - 34, - 35, + 526, + 533, + 526, + 533, + 100, + 101, true, - "1010933404324", - "1010933404324" + "believe", + "believe" ], [ - "link", - "url", - 14718288547983000340, + "verb", + "single-verb", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 1225079762841478321, - 13531790532415888950, + 8106397113586492286, + 6559895150961820650, 18446744073709551615, 18446744073709551615, - 83, - 122, - 83, - 122, - 22, - 35, + 741, + 748, + 741, + 748, + 140, + 141, true, - "https://doi.org/10.1023/A:1010933404324", - "https://doi.org/10.1023/A:1010933404324" + "growing", + "growing" ], [ - "link", - "doi", - 14718288547983000340, + "verb", + "single-verb", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 3684595426122890679, - 8279367666964033467, + 6182665070913771698, + 13573081868742307876, 18446744073709551615, 18446744073709551615, - 91, - 122, - 91, - 122, - 26, - 35, + 799, + 808, + 799, + 808, + 150, + 151, true, - "doi.org/10.1023/A:1010933404324", - "doi.org/10.1023/A:1010933404324" + "introduce", + "introduce" ], [ - "parenthesis", - "reference", - 14718288547983000340, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 12178341415895551595, - 12282095972636501808, + 16380809977974811061, + 16065202910059383934, 18446744073709551615, 18446744073709551615, 0, - 3, + 6, 0, - 3, + 6, 0, - 1, + 2, true, - "[2]", - "[2]" + "In the", + "In the" ], [ - "parenthesis", - "round brackets", - 14718288547983000340, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 1665356215973274173, - 6648617910504136340, + 15441160910541486538, + 12448443553082805214, 18446744073709551615, 18446744073709551615, - 62, - 75, - 62, - 75, - 14, - 19, + 46, + 48, + 46, + 48, + 10, + 11, true, - "(01 Oct 2001)", - "(01 Oct 2001)" + "in", + "in" ], [ - "expression", - "wtoken-concatenation", - 14718288547983000340, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 12178341415895551595, - 12282095972636501808, + 15441160910541485670, + 12448449173148059932, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 108, + 110, + 108, + 110, + 24, + 25, true, - "[2]", - "[2]" + "of", + "of" ], [ - "sentence", - "", - 14718288547983000340, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 15712069677801245586, - 15474412409802070037, + 389609625618037948, + 7445535260585538379, 18446744073709551615, 18446744073709551615, - 0, - 16, - 0, - 16, - 0, - 4, + 137, + 141, + 137, + 141, + 28, + 29, true, - "[2] Leo Breiman.", - "[2] Leo Breiman." + "with", + "with" ], [ - "sentence", - "", - 14718288547983000340, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 12717495542064580601, - 7141139343320249805, + 15441160910541485670, + 12448449173148052031, 18446744073709551615, 18446744073709551615, - 23, - 38, - 23, - 38, - 6, - 9, + 184, + 186, + 184, + 186, + 36, + 37, true, - "Random Forests.", - "Random Forests." + "of", + "of" ], [ - "sentence", - "", - 14718288547983000340, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 378110784683181940, - 10684547640314883302, + 15441160910541485670, + 12448449173148050529, 18446744073709551615, 18446744073709551615, + 193, + 195, + 193, + 195, + 38, 39, - 82, - 39, - 82, - 9, - 22, true, - "Machine Learning 45, 1 (01 Oct 2001), 5-32.", - "Machine Learning 45, 1 (01 Oct 2001), 5-32." + "of", + "of" ], [ - "term", - "single-term", - 14718288547983000340, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 4182884638369411954, - 3549752055104827895, + 12178341415895623120, + 598445003466491402, 18446744073709551615, 18446744073709551615, - 4, - 15, - 4, - 15, - 1, - 3, + 331, + 334, + 331, + 334, + 67, + 68, true, - "Leo Breiman", - "Leo Breiman" + "out", + "out" ], [ - "term", - "single-term", - 14718288547983000340, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 2109081024677782429, - 14560503901773287747, + 14814148868025447689, + 6811951436730744836, 18446744073709551615, 18446744073709551615, - 23, - 37, - 23, - 37, - 6, - 8, + 335, + 343, + 335, + 343, + 68, + 70, true, - "Random Forests", - "Random Forests" + "of these", + "of these" ], [ - "term", - "single-term", - 14718288547983000340, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 13278563109182224937, - 9894237306486099503, + 15441160910541485670, + 12448449173147950427, 18446744073709551615, 18446744073709551615, - 39, - 55, - 39, - 55, - 9, - 11, - true, - "Machine Learning", - "Machine Learning" - ], + 360, + 362, + 360, + 362, + 72, + 73, + true, + "of", + "of" + ], [ - "term", - "single-term", - 14718288547983000340, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/101", + "#/texts/94", 1.0, - 12178341415896271308, - 12282147181195563083, + 8106398472718381934, + 9575911640413642094, 18446744073709551615, 18446744073709551615, - 66, - 69, - 66, - 69, - 16, - 17, + 370, + 377, + 370, + 377, + 74, + 76, true, - "Oct", - "Oct" + "after a", + "after a" ], [ - "reference", - "author", - 16943780574244090186, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/102", + "#/texts/94", 1.0, - 1401374873664364883, - 11647727014815681179, + 8106477988572616406, + 15264315134668474563, 18446744073709551615, 18446744073709551615, - 4, - 14, - 4, - 14, - 1, - 4, + 404, + 411, + 404, + 411, + 78, + 80, true, - "R. Cattoni", - "R. Cattoni" + "with an", + "with an" ], [ - "reference", - "author", - 16943780574244090186, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/102", + "#/texts/94", 1.0, - 8489759580118410179, - 13292301803598722609, + 15441160910541485670, + 12448449173148007931, 18446744073709551615, 18446744073709551615, - 16, - 26, - 16, - 26, - 5, - 8, + 491, + 493, + 491, + 493, + 93, + 94, true, - "T. Coianiz", - "T. Coianiz" + "of", + "of" ], [ - "reference", - "author", - 16943780574244090186, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/102", + "#/texts/94", 1.0, - 6842824740074268202, - 13861579202330443089, + 14634130761162415388, + 3833651190149238108, 18446744073709551615, 18446744073709551615, - 28, - 40, - 28, - 40, - 9, - 12, + 534, + 542, + 534, + 542, + 101, + 103, true, - "S. Messelodi", - "S. Messelodi" + "that the", + "that the" ], [ - "reference", - "author", - 16943780574244090186, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/102", + "#/texts/94", 1.0, - 3186691256225071720, - 5893020180892593571, + 6168057894310307081, + 11769172586530017585, 18446744073709551615, 18446744073709551615, - 46, - 59, - 46, - 59, - 14, - 20, + 575, + 584, + 575, + 584, + 108, + 110, true, - "C. M. Modena.", - "C. M. Modena." + "since the", + "since the" ], [ - "reference", - "citation-number", - 16943780574244090186, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/102", + "#/texts/94", 1.0, - 12178341415895577000, - 12922636114896239788, + 12178341415895625940, + 598444982766319560, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 632, + 635, + 632, + 635, + 117, + 118, true, - "[3]", - "[3]" + "for", + "for" ], [ - "reference", - "date", - 16943780574244090186, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/102", + "#/texts/94", 1.0, - 389609625536085742, - 14383425253514843049, + 15441160910541485670, + 12448449173147963145, 18446744073709551615, 18446744073709551615, - 60, - 64, - 60, - 64, - 20, - 21, + 672, + 674, + 672, + 674, + 124, + 125, true, - "1998", - "1998" + "of", + "of" ], [ - "reference", - "title", - 16943780574244090186, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/102", + "#/texts/94", 1.0, - 10272469742902868819, - 13721964765306049914, + 15441160910541487053, + 12448443593105791703, 18446744073709551615, 18446744073709551615, - 66, - 145, - 66, - 145, - 22, - 33, + 698, + 700, + 698, + 700, + 129, + 130, true, - "Geometric layout analysis techniques for document image understanding: a review", - "Geometric layout analysis techniques for document image understanding: a review" + "as", + "as" ], [ - "numval", - "year", - 8004985786049140169, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/103", + "#/texts/94", 1.0, - 389609625548757410, - 11746200903899729970, + 15441160910541486538, + 12448443553082893684, 18446744073709551615, 18446744073709551615, + 710, + 712, + 710, + 712, 132, - 136, - 129, 133, - 27, - 28, true, - "2005", - "2005" + "in", + "in" ], [ - "numval", - "fval", - 8004985786049140169, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/103", + "#/texts/94", 1.0, - 8104408072666212330, - 4511393581502851323, + 15441160910541485670, + 12448449173147964365, 18446744073709551615, 18446744073709551615, - 264, - 271, - 261, - 268, - 57, - 58, + 753, + 755, + 753, + 755, + 142, + 143, true, - "10.1007", - "10.1007" + "of", + "of" ], [ - "numval", - "irng", - 8004985786049140169, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/103", + "#/texts/94", 1.0, - 16380810033755625172, - 7099395661617449598, + 15441160910541485865, + 12448449225007565696, 18446744073709551615, 18446744073709551615, - 240, - 246, - 237, - 243, - 47, - 48, + 23, + 25, + 23, + 25, + 6, + 7, true, - "92-103", - "92-103" + "to", + "to" ], [ - "numval", - "ival", - 8004985786049140169, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/103", + "#/texts/94", 1.0, - 14654384880687695893, - 11510483770024350538, + 15441160910541485865, + 12448449225007823792, 18446744073709551615, 18446744073709551615, - 272, - 280, - 269, - 277, - 59, - 60, + 87, + 89, + 87, + 89, + 20, + 21, true, - "11551362", - "11551362" + "to", + "to" ], [ - "numval", - "ival", - 8004985786049140169, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/103", + "#/texts/94", 1.0, - 17767354399704235153, - 16958274266350069298, + 15441160910541485865, + 12448449225007818811, 18446744073709551615, 18446744073709551615, - 281, - 282, - 278, - 279, - 61, - 62, + 149, + 151, + 149, + 151, + 30, + 31, true, - "9", - "9" + "to", + "to" ], [ - "link", - "url", - 8004985786049140169, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/103", + "#/texts/94", 1.0, - 9115058383761225167, - 648438667166468655, + 15441160910541485865, + 12448449224998732616, 18446744073709551615, 18446744073709551615, - 248, - 282, - 245, - 279, - 49, - 62, + 311, + 313, + 311, + 313, + 63, + 64, true, - "https://doi.org/10.1007/11551362_9", - "https://doi.org/10.1007/11551362_9" + "to", + "to" ], [ - "link", - "doi", - 8004985786049140169, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/103", + "#/texts/94", 1.0, - 5111704410202581687, - 8009806043582861918, + 15441160910541485865, + 12448449224998740693, 18446744073709551615, 18446744073709551615, - 256, - 282, - 253, - 279, - 53, - 62, + 452, + 454, + 452, + 454, + 87, + 88, true, - "doi.org/10.1007/11551362_9", - "doi.org/10.1007/11551362_9" + "to", + "to" ], [ - "name", - "name-concatenation", - 8004985786049140169, + "conn", + "single-conn", + 17247086344435786796, "TEXT", - "#/texts/103", + "#/texts/94", 1.0, - 1570415358681803520, - 1677517284588620982, + 15441160910541487889, + 12448443566585721442, 18446744073709551615, 18446744073709551615, - 4, - 15, - 4, - 15, - 1, - 4, + 724, + 726, + 724, + 726, + 137, + 138, true, - "Jean-Pierre", - "Jean-Pierre" + "To", + "To" ], [ - "name", - "name-concatenation", - 8004985786049140169, + "expression", + "word-concatenation", + 10287541089279789496, "TEXT", - "#/texts/103", + "#/texts/95", 1.0, - 14652280738021138433, - 16881362210242463731, + 17249225789261661029, + 3807297211102715149, 18446744073709551615, 18446744073709551615, - 114, - 122, - 111, - 119, - 22, - 25, + 12, + 28, + 12, + 28, + 1, + 2, true, - "Jean-Luc", - "Jean-Luc" + "data-parallelism", + "data-parallelism" ], [ - "parenthesis", - "reference", - 8004985786049140169, + "expression", + "word-concatenation", + 10287541089279789496, "TEXT", - "#/texts/103", + "#/texts/95", 1.0, - 12178341415895577065, - 17281225859930936863, + 8685358683472264781, + 17027290145523372529, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 87, + 105, + 87, + 105, + 12, + 13, true, - "[4]", - "[4]" + "user-customisation", + "user-customisation" ], [ - "expression", - "wtoken-concatenation", - 8004985786049140169, + "sentence", + "", + 10287541089279789496, "TEXT", - "#/texts/103", + "#/texts/95", 1.0, - 12178341415895577065, - 17281225859930936863, + 10588183979877639592, + 1367000647117206524, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, + 12, + 119, + 12, + 119, 1, + 15, true, - "[4]", - "[4]" + "data-parallelism in order to speed up the training and provide interactive user-customisation capabilities.", + "data-parallelism in order to speed up the training and provide interactive user-customisation capabilities." ], [ - "sentence", - "", - 8004985786049140169, + "term", + "single-term", + 10287541089279789496, "TEXT", - "#/texts/103", + "#/texts/95", 1.0, - 1454337940209217576, - 15847786010983528714, + 9998261106336570604, + 4856078764969945002, 18446744073709551615, 18446744073709551615, - 0, - 131, - 0, - 128, - 0, - 27, + 75, + 118, + 75, + 118, + 11, + 14, true, - "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier.", - "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier." + "interactive user-customisation capabilities", + "interactive user-customisation capabilities" ], [ - "sentence", - "", - 8004985786049140169, + "term", + "single-term", + 10287541089279789496, "TEXT", - "#/texts/103", + "#/texts/95", 1.0, - 980690805708056428, - 10837072452839845549, + 17249225789261661029, + 3807297211102715149, 18446744073709551615, 18446744073709551615, - 138, - 191, - 135, - 188, - 29, - 39, + 12, + 28, + 12, + 28, + 1, + 2, true, - "From Legacy Documents to XML: A Conversion Framework.", - "From Legacy Documents to XML: A Conversion Framework." + "data-parallelism", + "data-parallelism" ], [ - "sentence", - "", - 8004985786049140169, + "term", + "single-term", + 10287541089279789496, "TEXT", - "#/texts/103", + "#/texts/95", 1.0, - 9562882546299798530, - 8479999919787955978, + 329104161571401725, + 3792502362005124423, 18446744073709551615, 18446744073709551615, - 192, - 247, - 189, - 244, - 39, - 49, + 32, + 37, + 32, + 37, + 3, + 4, true, - "Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103.", - "Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103." + "order", + "order" ], [ "term", - "enum-term-mark-4", - 8004985786049140169, + "single-term", + 10287541089279789496, "TEXT", - "#/texts/103", + "#/texts/95", 1.0, - 17602133858301996024, - 15696295076363719071, + 14634153919632515335, + 3840780376526095372, 18446744073709551615, 18446744073709551615, + 54, + 62, + 54, + 62, + 8, 9, - 118, - 9, - 115, - 3, - 23, true, - "Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean", - "Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean" + "training", + "training" ], [ - "term", - "single-term", - 8004985786049140169, + "verb", + "single-verb", + 10287541089279789496, "TEXT", - "#/texts/103", + "#/texts/95", 1.0, - 10319721072853010428, - 6489882772390524296, + 541003494147177743, + 2376460771711104984, 18446744073709551615, 18446744073709551615, - 9, - 22, - 9, - 22, - 3, - 5, + 0, + 11, + 0, + 11, + 0, + 1, true, - "Pierre Chanod", - "Pierre Chanod" + "specialised", + "specialised" ], [ - "term", - "single-term", - 8004985786049140169, + "verb", + "single-verb", + 10287541089279789496, "TEXT", - "#/texts/103", + "#/texts/95", 1.0, - 7554933550167443736, - 13411551703313480687, + 329104161639049345, + 3799043945253257651, 18446744073709551615, 18446744073709551615, - 24, 41, - 24, + 46, 41, + 46, + 5, 6, - 8, true, - "Boris Chidlovskii", - "Boris Chidlovskii" + "speed", + "speed" ], [ - "term", - "single-term", - 8004985786049140169, + "verb", + "single-verb", + 10287541089279789496, "TEXT", - "#/texts/103", + "#/texts/95", 1.0, - 16299981998052668228, - 10120159009512117499, + 8106476000214061408, + 9620881782228868220, 18446744073709551615, 18446744073709551615, - 43, - 56, - 43, - 55, - 9, + 67, + 74, + 67, + 74, + 10, 11, true, - "Herv\u00e9 Dejean", - "Herv\u00e9 Dejean" + "provide", + "provide" ], [ - "term", - "single-term", - 8004985786049140169, + "conn", + "single-conn", + 10287541089279789496, "TEXT", - "#/texts/103", + "#/texts/95", 1.0, - 12186041413076963653, - 1815357622671572381, + 15441160910541486538, + 14667436044722575629, 18446744073709551615, 18446744073709551615, - 58, - 72, - 57, - 71, - 12, - 14, + 29, + 31, + 29, + 31, + 2, + 3, true, - "Olivier Fambon", - "Olivier Fambon" + "in", + "in" ], [ - "term", - "single-term", - 8004985786049140169, + "conn", + "single-conn", + 10287541089279789496, "TEXT", - "#/texts/103", + "#/texts/95", 1.0, - 10757542349073996342, - 681372576460736923, + 15441160910541485865, + 14667435948507858038, 18446744073709551615, 18446744073709551615, - 74, - 91, - 73, - 88, - 15, - 17, + 38, + 40, + 38, + 40, + 4, + 5, true, - "J\u00e9r\u00f4me Fuselier", - "J\u00e9r\u00f4me Fuselier" + "to", + "to" + ], + [ + "sentence", + "", + 15983582675278266440, + "TEXT", + "#/texts/97", + 1.0, + 5556222901900980902, + 15746519596852768008, + 18446744073709551615, + 18446744073709551615, + 0, + 127, + 0, + 127, + 0, + 22, + true, + "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system.", + "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system." ], [ "term", - "single-term", - 8004985786049140169, + "enum-term-mark-4", + 15983582675278266440, "TEXT", - "#/texts/103", + "#/texts/97", 1.0, - 17756104824925179897, - 12319066590629211102, + 13556182311682325280, + 9761797471225359212, 18446744073709551615, 18446744073709551615, - 93, - 108, - 90, - 105, - 18, - 20, + 32, + 66, + 32, + 66, + 6, + 11, true, - "Thierry Jacquin", - "Thierry Jacquin" + "Roxana Istrate and Matthieu Mottet", + "Roxana Istrate and Matthieu Mottet" ], [ "term", "single-term", - 8004985786049140169, + 15983582675278266440, "TEXT", - "#/texts/103", + "#/texts/97", 1.0, - 5537218577218077560, - 2866017161052533450, + 7949755686502200390, + 668350583233417234, 18446744073709551615, 18446744073709551615, - 119, - 130, - 116, - 127, - 24, - 26, + 32, + 46, + 32, + 46, + 6, + 8, true, - "Luc Meunier", - "Luc Meunier" + "Roxana Istrate", + "Roxana Istrate" ], [ "term", "single-term", - 8004985786049140169, + 15983582675278266440, "TEXT", - "#/texts/103", + "#/texts/97", 1.0, - 4381219347563518937, - 4225593426044066727, + 422584487912656734, + 11698320462170095527, 18446744073709551615, 18446744073709551615, - 143, - 159, - 140, - 156, - 30, - 32, + 51, + 66, + 51, + 66, + 9, + 11, true, - "Legacy Documents", - "Legacy Documents" + "Matthieu Mottet", + "Matthieu Mottet" ], [ "term", "single-term", - 8004985786049140169, + 15983582675278266440, "TEXT", - "#/texts/103", + "#/texts/97", 1.0, - 15039437164843108785, - 909619298391709716, + 244635901031456436, + 9625433519480199700, 18446744073709551615, 18446744073709551615, - 170, - 190, - 167, - 187, - 36, - 38, + 116, + 126, + 116, + 126, + 19, + 21, true, - "Conversion Framework", - "Conversion Framework" + "CCS system", + "CCS system" ], [ "term", "single-term", - 8004985786049140169, + 15983582675278266440, "TEXT", - "#/texts/103", + "#/texts/97", 1.0, - 14297842595136370149, - 6710149947699048907, + 8106397759446161562, + 5642353918280438479, 18446744073709551615, 18446744073709551615, - 192, - 218, - 189, - 215, - 39, - 42, + 4, + 11, + 4, + 11, + 1, + 2, true, - "Springer Berlin Heidelberg", - "Springer Berlin Heidelberg" + "authors", + "authors" ], [ "term", "single-term", - 8004985786049140169, + 15983582675278266440, "TEXT", - "#/texts/103", + "#/texts/97", 1.0, - 16381206535680833456, - 11649598247391704340, + 4603153860084293890, + 9630773090599505701, 18446744073709551615, 18446744073709551615, - 220, - 226, - 217, - 223, - 43, - 44, + 77, + 89, + 77, + 89, + 13, + 14, true, - "Berlin", - "Berlin" + "contribution", + "contribution" ], [ "term", "single-term", - 8004985786049140169, + 15983582675278266440, "TEXT", - "#/texts/103", + "#/texts/97", 1.0, - 4638979131570902619, - 4604157803470259425, + 1525875096007260836, + 16905177906202921866, 18446744073709551615, 18446744073709551615, - 228, - 238, - 225, - 235, - 45, - 46, + 97, + 108, + 97, + 108, + 16, + 17, true, - "Heidelberg", - "Heidelberg" + "development", + "development" ], [ "verb", - "single-verb", - 8004985786049140169, + "compound-verb", + 15983582675278266440, "TEXT", - "#/texts/103", + "#/texts/97", 1.0, - 12178341415895541463, - 17281175695706441462, + 17737636287413194494, + 18246863768738587194, 18446744073709551615, 18446744073709551615, - 163, - 166, - 160, - 163, - 33, - 34, + 12, + 31, + 12, + 31, + 2, + 6, true, - "XML", - "XML" + "would like to thank", + "would like to thank" ], [ "conn", "single-conn", - 8004985786049140169, + 15983582675278266440, "TEXT", - "#/texts/103", + "#/texts/97", 1.0, - 389609625538216073, - 11746398078428097470, + 12178341415895625940, + 9042585404458343529, 18446744073709551615, 18446744073709551615, - 138, - 142, - 135, - 139, - 29, - 30, + 67, + 70, + 67, + 70, + 11, + 12, true, - "From", - "From" + "for", + "for" ], [ "conn", "single-conn", - 8004985786049140169, + 15983582675278266440, "TEXT", - "#/texts/103", + "#/texts/97", 1.0, - 15441160910541485865, - 2839021695369005356, + 16381206565712212855, + 15703671923459609107, 18446744073709551615, 18446744073709551615, - 160, - 162, - 157, - 159, - 32, - 33, - true, - "to", - "to" + 109, + 115, + 109, + 115, + 17, + 19, + true, + "of the", + "of the" ], [ - "numval", - "year", - 12744546813104546377, + "conn", + "single-conn", + 15983582675278266440, "TEXT", - "#/texts/104", + "#/texts/97", 1.0, - 389609625548777059, - 1587769393776818040, + 15441160910541485865, + 16366793807298640842, 18446744073709551615, 18446744073709551615, - 19, 23, - 19, + 25, 23, + 25, 4, 5, true, - "2015", - "2015" + "to", + "to" ], [ - "numval", - "year", - 12744546813104546377, + "conn", + "single-conn", + 15983582675278266440, "TEXT", - "#/texts/104", + "#/texts/97", 1.0, - 389609625548777059, - 1587769393776763538, + 16381206519425733256, + 17710263008813390102, 18446744073709551615, 18446744073709551615, - 59, - 63, - 59, - 63, - 13, + 90, + 96, + 90, + 96, 14, + 16, true, - "2015", - "2015" + "to the", + "to the" ], [ "numval", "year", - 12744546813104546377, - "TEXT", - "#/texts/104", - 1.0, - 389609625548777059, - 1587769393776757579, - 18446744073709551615, - 18446744073709551615, - 216, - 220, - 216, - 220, - 53, - 54, - true, - "2015", - "2015" - ], - [ - "numval", - "fval", - 12744546813104546377, - "TEXT", - "#/texts/104", - 1.0, - 8104408072666216409, - 14220288417264645869, - 18446744073709551615, - 18446744073709551615, - 203, - 210, - 203, - 210, - 49, - 50, - true, - "10.1109", - "10.1109" - ], - [ - "numval", - "irng", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 10303975503395430788, - 13846363068497305469, + 389609625548777262, + 15175051322594687321, 18446744073709551615, 18446744073709551615, - 176, - 185, - 176, - 185, + 175, + 179, + 175, + 179, 39, 40, true, - "1440-1448", - "1440-1448" - ], - [ - "numval", - "ival", - 12744546813104546377, - "TEXT", - "#/texts/104", - 1.0, - 15441160910541481979, - 3495651879263029623, - 18446744073709551615, - 18446744073709551615, - 127, - 129, - 127, - 129, - 26, - 27, - true, - "15", - "15" + "2020", + "2020" ], [ "numval", "ival", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 12178341415896420726, - 15205590251298949236, + 16380810010060182105, + 2087358970220343258, 18446744073709551615, 18446744073709551615, - 221, - 224, - 221, - 224, - 55, - 56, + 232, + 238, + 232, + 238, + 47, + 48, true, - "169", - "169" + "721027", + "721027" ], [ "link", "url", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 8704287819835955947, - 1152182854074722114, + 4558951843677957919, + 6153426188298487244, 18446744073709551615, 18446744073709551615, - 187, - 224, - 187, - 224, - 41, - 56, + 44, + 62, + 44, + 62, + 9, + 16, true, - "https://doi.org/10.1109/ICCV.2015.169", - "https://doi.org/10.1109/ICCV.2015.169" + "http://nccr-marvel", + "http://nccr-marvel" ], [ "link", - "doi", - 12744546813104546377, - "TEXT", - "#/texts/104", - 1.0, - 16500190859490903724, - 11881156334101563754, - 18446744073709551615, - 18446744073709551615, - 195, - 224, - 195, - 224, - 45, - 56, - true, - "doi.org/10.1109/ICCV.2015.169", - "doi.org/10.1109/ICCV.2015.169" - ], - [ - "parenthesis", - "reference", - 12744546813104546377, + "url", + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 12178341415895577901, - 15205622006266309913, + 1840514147198720564, + 15805877038624594621, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 240, + 267, + 240, + 267, + 49, + 60, true, - "[5]", - "[5]" + "http://the-force-project.eu", + "http://the-force-project.eu" ], [ "parenthesis", "round brackets", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 16380808314424790428, - 13641327031679638352, + 9725913318321680311, + 4961995203860234930, 18446744073709551615, 18446744073709551615, - 113, - 119, - 113, - 119, - 20, - 23, + 43, + 67, + 43, + 67, + 8, + 19, true, - "(ICCV)", - "(ICCV)" + "(http://nccr-marvel. ch)", + "(http://nccr-marvel. ch)" ], [ "parenthesis", "round brackets", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 7096417115544771815, - 11395153180835677355, + 2988796312331131177, + 6687402703764012002, 18446744073709551615, 18446744073709551615, - 120, - 130, - 120, - 130, - 23, - 28, + 239, + 268, + 239, + 268, + 48, + 61, true, - "(ICCV '15)", - "(ICCV '15)" + "(http://the-force-project.eu)", + "(http://the-force-project.eu)" ], [ "expression", - "word-concatenation", - 12744546813104546377, + "wtoken-concatenation", + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 329104162326555074, - 11875090144383732350, + 10991632387650324970, + 16837241231127303249, 18446744073709551615, 18446744073709551615, - 30, - 35, - 30, - 35, - 7, - 8, + 186, + 198, + 186, + 198, + 41, + 42, true, - "R-CNN", - "R-CNN" + "NMBP-23-2016", + "NMBP-23-2016" ], [ - "expression", - "wtoken-concatenation", - 12744546813104546377, + "sentence", + "", + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 12178341415895577901, - 15205622006266309913, + 9490709138959189212, + 12250691288144973169, 18446744073709551615, 18446744073709551615, 0, - 3, + 117, 0, - 3, + 117, 0, - 1, + 28, true, - "[5]", - "[5]" + "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation.", + "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation." ], [ "sentence", "", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 9476456823999413175, - 1770001191116537879, + 1102470314652222820, + 15601071363037323756, 18446744073709551615, 18446744073709551615, - 0, - 18, - 0, - 18, - 0, - 4, + 118, + 269, + 118, + 269, + 28, + 62, true, - "[5] Ross Girshick.", - "[5] Ross Girshick." + "MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", + "MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu)." ], [ - "sentence", - "", - 12744546813104546377, + "term", + "single-term", + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 2498161724771347283, - 9048804449047555840, + 9107120802959375325, + 14854418713978759874, 18446744073709551615, 18446744073709551615, - 25, - 36, - 25, - 36, + 31, + 42, + 31, + 42, 6, - 9, + 8, true, - "Fast R-CNN.", - "Fast R-CNN." + "NCCR MARVEL", + "NCCR MARVEL" ], [ - "sentence", - "", - 12744546813104546377, + "term", + "single-term", + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 15441257360238259822, - 5601527666409867545, + 4312908239263712749, + 12629609910975902459, 18446744073709551615, 18446744073709551615, - 37, - 131, - 37, - 131, - 9, - 29, + 83, + 116, + 83, + 116, + 23, + 27, true, - "In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15).", - "In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15)." + "Swiss National Science Foundation", + "Swiss National Science Foundation" ], [ - "sentence", - "", - 12744546813104546377, + "term", + "single-term", + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 1794451012634991315, - 7573806955644456863, + 15770732106686559794, + 7781941783302435281, 18446744073709551615, 18446744073709551615, - 132, - 186, - 132, - 186, - 29, - 41, + 142, + 155, + 142, + 155, + 33, + 35, true, - "IEEE Computer Society, Washington, DC, USA, 1440-1448.", - "IEEE Computer Society, Washington, DC, USA, 1440-1448." + "FORCE project", + "FORCE project" ], [ "term", "single-term", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 13123599834782083842, - 8538907007420179436, + 4392060515500483083, + 8866459764729165903, 18446744073709551615, 18446744073709551615, - 4, - 17, - 4, - 17, - 1, - 3, + 209, + 231, + 209, + 231, + 44, + 47, true, - "Ross Girshick", - "Ross Girshick" + "Grant agreement number", + "Grant agreement number" ], [ "term", "single-term", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 15491004285883184028, - 17483261521377705764, + 389609625633592023, + 15303732731624508399, 18446744073709551615, 18446744073709551615, - 25, - 35, - 25, - 35, - 6, - 8, + 5, + 9, + 5, + 9, + 1, + 2, true, - "Fast R-CNN", - "Fast R-CNN" + "work", + "work" ], [ "term", "single-term", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 10113513805742010945, - 11458139500661842431, + 15441160910541486943, + 7829630847478764393, 18446744073709551615, 18446744073709551615, 64, - 93, + 66, 64, - 93, - 14, + 66, 17, + 18, true, - "IEEE International Conference", - "IEEE International Conference" + "ch", + "ch" ], [ "term", "single-term", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 10222924410753703457, - 248762853253947982, + 15441160910541480587, + 7830721483973022036, 18446744073709551615, 18446744073709551615, - 97, - 112, - 97, - 112, - 18, - 20, + 118, + 120, + 118, + 120, + 28, + 29, true, - "Computer Vision", - "Computer Vision" + "MD", + "MD" ], [ "term", "single-term", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 12763303431451614333, - 16935168498405799510, + 8106351288219429194, + 18213714777089539961, 18446744073709551615, 18446744073709551615, - 132, - 153, - 132, - 153, - 29, - 32, + 167, + 174, + 167, + 174, + 38, + 39, true, - "IEEE Computer Society", - "IEEE Computer Society" + "Horizon", + "Horizon" ], [ "term", "single-term", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 17329186159823478547, - 16920991383284353179, + 10991632387650324970, + 16837241231127303249, 18446744073709551615, 18446744073709551615, - 40, - 51, - 40, - 51, - 10, - 11, + 186, + 198, + 186, + 198, + 41, + 42, true, - "Proceedings", - "Proceedings" + "NMBP-23-2016", + "NMBP-23-2016" ], [ - "term", - "single-term", - 12744546813104546377, + "verb", + "compound-verb", + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 389609625537760670, - 1654267914364558446, + 13041846394845825316, + 5320956231753433918, 18446744073709551615, 18446744073709551615, - 114, - 118, - 114, - 118, - 21, - 22, + 10, + 23, + 10, + 23, + 2, + 4, true, - "ICCV", - "ICCV" + "was supported", + "was supported" ], [ - "term", - "single-term", - 12744546813104546377, + "verb", + "compound-verb", + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 389609625537760670, - 1654267914364557852, + 13041846394845825316, + 5320956231753459128, 18446744073709551615, 18446744073709551615, 121, - 125, + 134, 121, - 125, - 24, - 25, + 134, + 29, + 31, true, - "ICCV", - "ICCV" + "was supported", + "was supported" ], [ - "term", - "single-term", - 12744546813104546377, + "verb", + "single-verb", + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 5589693159453375122, - 13837519084974782204, + 16381206565272093797, + 5039615147538801699, 18446744073709551615, 18446744073709551615, - 155, - 165, - 155, - 165, - 33, - 34, + 69, + 75, + 69, + 75, + 20, + 21, true, - "Washington", - "Washington" + "funded", + "funded" ], [ - "term", - "single-term", - 12744546813104546377, + "verb", + "single-verb", + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 15441160910541480769, - 3495651894226244878, + 16381206565272093797, + 5039615147538790981, 18446744073709551615, 18446744073709551615, - 167, - 169, - 167, - 169, - 35, + 157, + 163, + 157, + 163, 36, + 37, true, - "DC", - "DC" + "funded", + "funded" ], [ - "term", - "single-term", - 12744546813104546377, + "verb", + "single-verb", + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 12178341415895650394, - 15205628192038338337, + 389609625695109591, + 15313901038780033729, 18446744073709551615, 18446744073709551615, - 171, - 174, - 171, - 174, - 37, - 38, + 199, + 203, + 199, + 203, + 42, + 43, true, - "USA", - "USA" + "call", + "call" ], [ "conn", "single-conn", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 15441160910541480354, - 3495651908653608597, + 16381206574363061705, + 5224141779864768374, 18446744073709551615, 18446744073709551615, + 24, + 30, + 24, + 30, + 4, + 6, + true, + "by the", + "by the" + ], + [ + "conn", + "single-conn", + 12711351442546714716, + "TEXT", + "#/texts/98", + 1.0, + 16381206574363061705, + 5224141779864730347, + 18446744073709551615, + 18446744073709551615, + 76, + 82, + 76, + 82, + 21, + 23, + true, + "by the", + "by the" + ], + [ + "conn", + "single-conn", + 12711351442546714716, + "TEXT", + "#/texts/98", + 1.0, + 16381206574363061705, + 5224141779864726325, + 18446744073709551615, + 18446744073709551615, + 135, + 141, + 135, + 141, + 31, + 33, + true, + "by the", + "by the" + ], + [ + "conn", + "single-conn", + 12711351442546714716, + "TEXT", + "#/texts/98", + 1.0, + 15441160910541486989, + 7829629584886114826, + 18446744073709551615, + 18446744073709551615, + 164, + 166, + 164, + 166, 37, - 39, - 37, - 39, - 9, - 10, + 38, true, - "In", - "In" + "by", + "by" ], [ "conn", "single-conn", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 16381206565712212855, - 2695983015266329611, + 329104159159151530, + 12430892283433612669, 18446744073709551615, 18446744073709551615, - 52, - 58, - 52, - 58, - 11, - 13, + 180, + 185, + 180, + 185, + 40, + 41, true, - "of the", - "of the" + "under", + "under" ], [ "conn", "single-conn", - 12744546813104546377, + 12711351442546714716, "TEXT", - "#/texts/104", + "#/texts/98", 1.0, - 15441160910541485678, - 3495646783295715187, + 389609625618037948, + 15311592167218177731, 18446744073709551615, 18446744073709551615, - 94, - 96, - 94, - 96, - 17, - 18, + 204, + 208, + 204, + 208, + 43, + 44, true, - "on", - "on" + "with", + "with" ], [ "reference", "author", - 16061746189176848219, + 1712774266196702392, "TEXT", - "#/texts/105", + "#/texts/100", 1.0, - 141995704861070506, - 4358412458884164235, + 10921193442290853772, + 7808176325166967948, 18446744073709551615, 18446744073709551615, 4, - 20, + 21, 4, - 20, + 21, 1, - 5, + 4, true, - "Ross B. Girshick", - "Ross B. Girshick" + "A. Antonacopoulos", + "A. Antonacopoulos" ], [ "reference", "author", - 16061746189176848219, + 1712774266196702392, "TEXT", - "#/texts/105", + "#/texts/100", 1.0, - 16700235966000105766, - 16857612526578801697, + 5181382481262336037, + 5307751930075227018, 18446744073709551615, 18446744073709551615, - 22, + 23, 34, - 22, + 23, 34, - 6, + 5, 8, true, - "Jeff Donahue", - "Jeff Donahue" + "C. Clausner", + "C. Clausner" ], [ "reference", "author", - 16061746189176848219, + 1712774266196702392, "TEXT", - "#/texts/105", + "#/texts/100", 1.0, - 3125822382074464058, - 13386372949081827875, + 18410882341323932977, + 3950678732393374894, 18446744073709551615, 18446744073709551615, 36, - 50, + 51, 36, - 50, + 51, 9, - 11, + 12, true, - "Trevor Darrell", - "Trevor Darrell" + "C. Papadopoulos", + "C. Papadopoulos" ], [ "reference", "author", - 16061746189176848219, + 1712774266196702392, "TEXT", - "#/texts/105", + "#/texts/100", 1.0, - 10076860098015848351, - 1698280748488935181, + 6326253284428776844, + 2242368337149903292, 18446744073709551615, 18446744073709551615, - 56, - 71, - 56, - 71, - 13, - 16, + 57, + 73, + 57, + 73, + 14, + 18, true, - "Jitendra Malik.", - "Jitendra Malik." + "S. Pletschacher.", + "S. Pletschacher." ], [ "reference", "citation-number", - 16061746189176848219, + 1712774266196702392, "TEXT", - "#/texts/105", + "#/texts/100", 1.0, - 12178341415895577964, - 1023751500620290990, + 12178341415895551530, + 18332345913337968356, 18446744073709551615, 18446744073709551615, 0, @@ -80693,711 +82759,648 @@ 0, 1, true, - "[6]", - "[6]" + "[1]", + "[1]" ], [ "reference", - "date", - 16061746189176848219, + "container-title", + 1712774266196702392, "TEXT", - "#/texts/105", + "#/texts/100", 1.0, - 389609625548777061, - 894814354396885943, + 2527079864200222812, + 474810476780653321, 18446744073709551615, 18446744073709551615, - 72, - 76, - 72, - 76, - 16, - 17, + 161, + 249, + 161, + 249, + 30, + 42, true, - "2013", - "2013" + "In Proceedings of the 13th International Conference on Document Analysis and Recognition", + "In Proceedings of the 13th International Conference on Document Analysis and Recognition" ], [ "reference", - "date", - 16061746189176848219, + "container-title", + 1712774266196702392, "TEXT", - "#/texts/105", + "#/texts/100", 1.0, - 389609625548777061, - 894814354396890826, + 6558131902220562236, + 4761966619744782752, 18446744073709551615, 18446744073709551615, - 180, - 184, - 180, - 184, - 32, - 33, + 251, + 260, + 251, + 260, + 43, + 45, true, - "2013", - "2013" + "ICDAR2015", + "ICDAR2015" ], [ "reference", - "journal", - 16061746189176848219, + "date", + 1712774266196702392, "TEXT", - "#/texts/105", + "#/texts/100", 1.0, - 389609625536419383, - 889446752040326567, + 389609625548777059, + 4138332198474599496, 18446744073709551615, 18446744073709551615, - 160, - 164, - 160, - 164, - 29, - 30, + 74, + 78, + 74, + 78, + 18, + 19, true, - "CoRR", - "CoRR" + "2015", + "2015" ], [ "reference", - "title", - 16061746189176848219, + "date", + 1712774266196702392, "TEXT", - "#/texts/105", + "#/texts/100", 1.0, - 4208693923929480551, - 3754197794849426338, + 10303630957638511768, + 3815340683710445282, 18446744073709551615, 18446744073709551615, - 78, - 158, - 78, - 158, - 18, - 28, + 270, + 279, + 270, + 279, + 49, + 50, true, - "Rich feature hierarchies for accurate object detection and semantic segmentation", - "Rich feature hierarchies for accurate object detection and semantic segmentation" + "1151-1155", + "1151-1155" ], [ "reference", - "volume", - 16061746189176848219, + "location", + 1712774266196702392, "TEXT", - "#/texts/105", + "#/texts/100", 1.0, - 3979843797462439752, - 2449824314382216916, + 329104162200796337, + 14591806354842233425, 18446744073709551615, 18446744073709551615, - 165, - 178, - 165, - 178, - 30, - 31, + 263, + 268, + 263, + 268, + 47, + 48, true, - "abs/1311.2524", - "abs/1311.2524" + "Nancy", + "Nancy" ], [ "reference", - "author", - 11872392946390819176, + "title", + 1712774266196702392, "TEXT", - "#/texts/106", + "#/texts/100", 1.0, - 8106351942713029604, - 15468997146309510455, + 17804212744220731295, + 13329383501201933373, 18446744073709551615, 18446744073709551615, - 4, - 11, - 4, - 11, - 1, - 3, + 80, + 159, + 80, + 159, + 20, + 29, true, - "Wei Liu", - "Wei Liu" + "ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015", + "ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015" ], [ - "reference", - "author", - 11872392946390819176, + "numval", + "year", + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 7132768279271695, - 1832821379686674159, + 389609625548757414, + 14515784463162085628, 18446744073709551615, 18446744073709551615, - 13, - 30, - 13, - 30, + 17, + 21, + 17, + 21, 4, - 6, + 5, true, - "Dragomir Anguelov", - "Dragomir Anguelov" + "2001", + "2001" ], [ - "reference", - "author", - 11872392946390819176, + "numval", + "year", + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 12871845148221275510, - 11451573001119547147, + 389609625548757414, + 14515784463162082595, 18446744073709551615, 18446744073709551615, - 32, - 45, - 32, - 45, - 7, - 9, + 70, + 74, + 70, + 74, + 17, + 18, true, - "Dumitru Erhan", - "Dumitru Erhan" + "2001", + "2001" ], [ - "reference", - "author", - 11872392946390819176, + "numval", + "fval", + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 6963214204149412896, - 11905902671968880924, + 8104408072666159999, + 6544755582293006081, 18446744073709551615, 18446744073709551615, - 47, - 64, - 47, - 64, - 10, - 12, + 99, + 106, + 99, + 106, + 30, + 31, true, - "Christian Szegedy", - "Christian Szegedy" + "10.1023", + "10.1023" ], [ - "reference", - "author", - 11872392946390819176, + "numval", + "irng", + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 1399468129531522089, - 15637271748350955016, + 389609625655395305, + 14454171207833729215, 18446744073709551615, 18446744073709551615, - 66, - 76, - 66, - 76, - 13, - 15, + 77, + 81, + 77, + 81, + 20, + 21, true, - "Scott Reed", - "Scott Reed" + "5-32", + "5-32" ], [ - "reference", - "author", - 11872392946390819176, + "numval", + "ival", + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 12712965187511148158, - 5061563798042056469, + 15441160910541486271, + 2222475241750256418, 18446744073709551615, 18446744073709551615, - 78, - 91, - 78, - 91, - 16, - 20, + 56, + 58, + 56, + 58, + 11, + 12, true, - "Cheng-Yang Fu", - "Cheng-Yang Fu" + "45", + "45" ], [ - "reference", - "author", - 11872392946390819176, + "numval", + "ival", + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 3733048493609069913, - 12058083979397468329, + 17767354399704235161, + 1208869658482268274, 18446744073709551615, 18446744073709551615, - 97, - 115, - 97, - 115, - 22, - 27, + 60, + 61, + 60, + 61, + 13, + 14, true, - "Alexander C. Berg.", - "Alexander C. Berg." + "1", + "1" ], [ - "reference", - "citation-number", - 11872392946390819176, + "numval", + "ival", + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 12178341415895577775, - 16834182135958034128, + 15441160910541481918, + 2222473849477022423, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 1, + 63, + 65, + 63, + 65, + 15, + 16, true, - "[7]", - "[7]" + "01", + "01" ], [ - "reference", - "date", - 11872392946390819176, + "numval", + "ival", + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 389609625548777056, - 12418382060406794776, + 8238939589069097802, + 7985792528150377935, 18446744073709551615, 18446744073709551615, - 116, - 120, - 116, - 120, - 27, - 28, + 109, + 122, + 109, + 122, + 34, + 35, true, - "2016", - "2016" + "1010933404324", + "1010933404324" ], [ - "reference", - "doi", - 11872392946390819176, + "link", + "url", + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 3534146179424153776, - 1525705277889903310, + 1225079762841478321, + 13531790532415888950, 18446744073709551615, 18446744073709551615, - 206, - 224, - 206, - 224, - 44, - 45, + 83, + 122, + 83, + 122, + 22, + 35, true, - "https://doi.org/10", - "https://doi.org/10" + "https://doi.org/10.1023/A:1010933404324", + "https://doi.org/10.1023/A:1010933404324" ], [ - "reference", + "link", "doi", - 11872392946390819176, + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 3493950482346635177, - 14172820134834639105, + 3684595426122890679, + 8279367666964033467, 18446744073709551615, 18446744073709551615, - 226, - 250, - 226, - 250, - 46, - 54, + 91, + 122, + 91, + 122, + 26, + 35, true, - "1007/978-3-319-46448-0_2", - "1007/978-3-319-46448-0_2" + "doi.org/10.1023/A:1010933404324", + "doi.org/10.1023/A:1010933404324" ], [ + "parenthesis", "reference", - "location", - 11872392946390819176, + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 389609625536506042, - 12420143175742824125, + 12178341415895551595, + 12282095972636501808, 18446744073709551615, 18446744073709551615, - 193, - 197, - 193, - 197, - 40, - 41, + 0, + 3, + 0, + 3, + 0, + 1, true, - "Cham", - "Cham" + "[2]", + "[2]" ], [ - "reference", - "pages", - 11872392946390819176, + "parenthesis", + "round brackets", + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 329104147696968014, - 12309257817181187524, + 1665356215973274173, + 6648617910504136340, 18446744073709551615, 18446744073709551615, - 199, - 204, - 199, - 204, - 42, - 43, + 62, + 75, + 62, + 75, + 14, + 19, true, - "21-37", - "21-37" + "(01 Oct 2001)", + "(01 Oct 2001)" ], [ - "reference", - "publisher", - 11872392946390819176, + "expression", + "wtoken-concatenation", + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 18208766556701967117, - 9345293319432700679, + 12178341415895551595, + 12282095972636501808, 18446744073709551615, 18446744073709551615, - 158, - 191, - 158, - 191, - 36, - 39, + 0, + 3, + 0, + 3, + 0, + 1, true, - "Springer International Publishing", - "Springer International Publishing" + "[2]", + "[2]" ], [ - "reference", - "title", - 11872392946390819176, + "sentence", + "", + 14718288547983000340, "TEXT", - "#/texts/106", + "#/texts/101", 1.0, - 10201684882899222639, - 16463858842282873959, + 15712069677801245586, + 15474412409802070037, 18446744073709551615, 18446744073709551615, - 122, - 156, - 122, - 156, - 29, - 35, + 0, + 16, + 0, + 16, + 0, + 4, true, - "SSD: Single Shot MultiBox Detector", - "SSD: Single Shot MultiBox Detector" + "[2] Leo Breiman.", + "[2] Leo Breiman." ], [ - "reference", - "author", - 2956849475535726296, + "sentence", + "", + 14718288547983000340, "TEXT", - "#/texts/107", + "#/texts/101", 1.0, - 5088659084289352829, - 5811844525036759114, + 12717495542064580601, + 7141139343320249805, 18446744073709551615, 18446744073709551615, - 4, - 17, - 4, - 17, - 1, - 3, + 23, + 38, + 23, + 38, + 6, + 9, true, - "Joseph Redmon", - "Joseph Redmon" + "Random Forests.", + "Random Forests." ], [ - "reference", - "author", - 2956849475535726296, + "sentence", + "", + 14718288547983000340, "TEXT", - "#/texts/107", + "#/texts/101", 1.0, - 417695209021750783, - 13441950925666715191, + 378110784683181940, + 10684547640314883302, 18446744073709551615, 18446744073709551615, - 19, - 40, - 19, - 40, - 4, - 7, + 39, + 82, + 39, + 82, + 9, + 22, true, - "Santosh Kumar Divvala", - "Santosh Kumar Divvala" + "Machine Learning 45, 1 (01 Oct 2001), 5-32.", + "Machine Learning 45, 1 (01 Oct 2001), 5-32." ], [ - "reference", - "author", - 2956849475535726296, + "term", + "single-term", + 14718288547983000340, "TEXT", - "#/texts/107", + "#/texts/101", 1.0, - 141995704861070506, - 13286696794844996383, + 4182884638369411954, + 3549752055104827895, 18446744073709551615, 18446744073709551615, - 42, - 58, - 42, - 58, - 8, - 12, - true, - "Ross B. Girshick", - "Ross B. Girshick" - ], - [ - "reference", - "author", - 2956849475535726296, - "TEXT", - "#/texts/107", - 1.0, - 16947174234018208722, - 13965552924856577071, - 18446744073709551615, - 18446744073709551615, - 64, - 76, - 64, - 76, - 14, - 17, - true, - "Ali Farhadi.", - "Ali Farhadi." - ], - [ - "reference", - "citation-number", - 2956849475535726296, - "TEXT", - "#/texts/107", - 1.0, - 12178341415895577838, - 11018125289094672461, - 18446744073709551615, - 18446744073709551615, - 0, - 3, - 0, - 3, - 0, + 4, + 15, + 4, + 15, 1, + 3, true, - "[8]", - "[8]" + "Leo Breiman", + "Leo Breiman" ], [ - "reference", - "container-title", - 2956849475535726296, + "term", + "single-term", + 14718288547983000340, "TEXT", - "#/texts/107", + "#/texts/101", 1.0, - 17631274803144515959, - 18105892991402137032, + 2109081024677782429, + 14560503901773287747, 18446744073709551615, 18446744073709551615, - 140, - 203, - 140, - 203, - 32, - 41, + 23, + 37, + 23, + 37, + 6, + 8, true, - "2016 IEEE Conference on Computer Vision and Pattern Recognition", - "2016 IEEE Conference on Computer Vision and Pattern Recognition" + "Random Forests", + "Random Forests" ], [ - "reference", - "container-title", - 2956849475535726296, + "term", + "single-term", + 14718288547983000340, "TEXT", - "#/texts/107", + "#/texts/101", 1.0, - 389609625526699487, - 17849764824838617245, + 13278563109182224937, + 9894237306486099503, 18446744073709551615, 18446744073709551615, - 205, - 209, - 205, - 209, - 42, - 43, + 39, + 55, + 39, + 55, + 9, + 11, true, - "CVPR", - "CVPR" + "Machine Learning", + "Machine Learning" ], [ - "reference", - "date", - 2956849475535726296, + "term", + "single-term", + 14718288547983000340, "TEXT", - "#/texts/107", + "#/texts/101", 1.0, - 389609625548777056, - 17837801987031958568, + 12178341415896271308, + 12282147181195563083, 18446744073709551615, 18446744073709551615, - 77, - 81, - 77, - 81, + 66, + 69, + 66, + 69, + 16, 17, - 18, - true, - "2016", - "2016" - ], - [ - "reference", - "date", - 2956849475535726296, - "TEXT", - "#/texts/107", - 1.0, - 389609625548777056, - 17837801987031982734, - 18446744073709551615, - 18446744073709551615, - 212, - 216, - 212, - 216, - 45, - 46, true, - "2016", - "2016" + "Oct", + "Oct" ], [ "reference", - "pages", - 2956849475535726296, + "author", + 16943780574244090186, "TEXT", - "#/texts/107", + "#/texts/102", 1.0, - 8104408789271407267, - 9641140559480270364, + 1401374873664364883, + 11647727014815681179, 18446744073709551615, 18446744073709551615, - 219, - 226, - 219, - 226, - 48, - 49, + 4, + 14, + 4, + 14, + 1, + 4, true, - "779-788", - "779-788" + "R. Cattoni", + "R. Cattoni" ], [ "reference", - "title", - 2956849475535726296, + "author", + 16943780574244090186, "TEXT", - "#/texts/107", + "#/texts/102", 1.0, - 5895818558987270699, - 2974553673873283962, + 8489759580118410179, + 13292301803598722609, 18446744073709551615, 18446744073709551615, - 83, - 138, - 83, - 138, - 19, - 31, + 16, + 26, + 16, + 26, + 5, + 8, true, - "You Only Look Once: Unified, Real-Time Object Detection", - "You Only Look Once: Unified, Real-Time Object Detection" + "T. Coianiz", + "T. Coianiz" ], [ "reference", "author", - 6623297047995432604, + 16943780574244090186, "TEXT", - "#/texts/108", + "#/texts/102", 1.0, - 5088659084289352829, - 16235259739729085297, + 6842824740074268202, + 13861579202330443089, 18446744073709551615, 18446744073709551615, - 4, - 17, - 4, - 17, - 1, - 3, + 28, + 40, + 28, + 40, + 9, + 12, true, - "Joseph Redmon", - "Joseph Redmon" + "S. Messelodi", + "S. Messelodi" ], [ "reference", "author", - 6623297047995432604, + 16943780574244090186, "TEXT", - "#/texts/108", + "#/texts/102", 1.0, - 16947174234018208722, - 7021580680610188634, + 3186691256225071720, + 5893020180892593571, 18446744073709551615, 18446744073709551615, - 22, - 34, - 22, - 34, - 4, - 7, + 46, + 59, + 46, + 59, + 14, + 20, true, - "Ali Farhadi.", - "Ali Farhadi." + "C. M. Modena.", + "C. M. Modena." ], [ "reference", "citation-number", - 6623297047995432604, + 16943780574244090186, "TEXT", - "#/texts/108", + "#/texts/102", 1.0, - 12178341415895577640, - 5338477872773862060, + 12178341415895577000, + 12922636114896239788, 18446744073709551615, 18446744073709551615, 0, @@ -81407,2518 +83410,2749 @@ 0, 1, true, - "[9]", - "[9]" + "[3]", + "[3]" ], [ "reference", "date", - 6623297047995432604, + 16943780574244090186, "TEXT", - "#/texts/108", + "#/texts/102", 1.0, - 389609625548777056, - 2625243571990787508, + 389609625536085742, + 14383425253514843049, 18446744073709551615, 18446744073709551615, - 35, - 39, - 35, - 39, - 7, - 8, + 60, + 64, + 60, + 64, + 20, + 21, true, - "2016", - "2016" + "1998", + "1998" ], [ "reference", - "date", - 6623297047995432604, + "title", + 16943780574244090186, "TEXT", - "#/texts/108", + "#/texts/102", 1.0, - 389609625548777056, - 2625243571990783197, + 10272469742902868819, + 13721964765306049914, 18446744073709551615, 18446744073709551615, - 110, - 114, - 110, - 114, - 21, + 66, + 145, + 66, + 145, 22, + 33, true, - "2016", - "2016" + "Geometric layout analysis techniques for document image understanding: a review", + "Geometric layout analysis techniques for document image understanding: a review" ], [ - "reference", - "author", - 2507285765516108280, + "numval", + "year", + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 9337887504118347047, - 4966377796769374289, + 389609625548757410, + 11746200903899729970, 18446744073709551615, 18446744073709551615, - 5, - 17, - 5, - 17, - 1, - 3, + 132, + 136, + 129, + 133, + 27, + 28, true, - "Shaoqing Ren", - "Shaoqing Ren" + "2005", + "2005" ], [ - "reference", - "author", - 2507285765516108280, + "numval", + "fval", + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 7339447509685488310, - 1490181006860316744, + 8104408072666212330, + 4511393581502851323, 18446744073709551615, 18446744073709551615, - 19, - 29, - 19, - 29, - 4, - 6, + 264, + 271, + 261, + 268, + 57, + 58, true, - "Kaiming He", - "Kaiming He" + "10.1007", + "10.1007" ], [ - "reference", - "author", - 2507285765516108280, + "numval", + "irng", + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 13123599834782083842, - 7292467665049010344, + 16380810033755625172, + 7099395661617449598, 18446744073709551615, 18446744073709551615, - 31, - 44, - 31, - 44, - 7, - 9, + 240, + 246, + 237, + 243, + 47, + 48, true, - "Ross Girshick", - "Ross Girshick" + "92-103", + "92-103" ], [ - "reference", - "author", - 2507285765516108280, + "numval", + "ival", + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 2904781337729160811, - 16221483782846728585, + 14654384880687695893, + 11510483770024350538, 18446744073709551615, 18446744073709551615, - 50, - 59, - 50, + 272, + 280, + 269, + 277, 59, - 11, - 14, + 60, true, - "Jian Sun.", - "Jian Sun." + "11551362", + "11551362" ], [ - "reference", - "citation-number", - 2507285765516108280, + "numval", + "ival", + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 389609625697296215, - 1913545593953328211, + 17767354399704235153, + 16958274266350069298, 18446744073709551615, 18446744073709551615, - 0, - 4, - 0, - 4, - 0, - 1, + 281, + 282, + 278, + 279, + 61, + 62, true, - "[10]", - "[10]" + "9", + "9" ], [ - "reference", - "container-title", - 2507285765516108280, + "link", + "url", + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 17791264228691503041, - 2574823334558986016, + 9115058383761225167, + 648438667166468655, 18446744073709551615, 18446744073709551615, - 146, - 201, - 146, - 201, - 30, - 38, + 248, + 282, + 245, + 279, + 49, + 62, true, - "In Advances in Neural Information Processing Systems 28", - "In Advances in Neural Information Processing Systems 28" + "https://doi.org/10.1007/11551362_9", + "https://doi.org/10.1007/11551362_9" ], [ - "reference", - "date", - 2507285765516108280, + "link", + "doi", + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 389609625548777059, - 1924763351573441882, + 5111704410202581687, + 8009806043582861918, 18446744073709551615, 18446744073709551615, - 60, - 64, - 60, - 64, - 14, - 15, + 256, + 282, + 253, + 279, + 53, + 62, true, - "2015", - "2015" + "doi.org/10.1007/11551362_9", + "doi.org/10.1007/11551362_9" ], [ - "reference", - "editor", - 2507285765516108280, + "name", + "name-concatenation", + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 358905225071115951, - 17349611132874933119, + 1570415358681803520, + 1677517284588620982, 18446744073709551615, 18446744073709551615, - 203, - 268, - 203, - 268, - 39, - 63, + 4, + 15, + 4, + 15, + 1, + 4, true, - "C. Cortes, N. D. Lawrence, D. D. Lee, M. Sugiyama, and R. Garnett", - "C. Cortes, N. D. Lawrence, D. D. Lee, M. Sugiyama, and R. Garnett" + "Jean-Pierre", + "Jean-Pierre" ], [ - "reference", - "editor", - 2507285765516108280, + "name", + "name-concatenation", + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 12178341415896120351, - 93479150678904308, + 14652280738021138433, + 16881362210242463731, 18446744073709551615, 18446744073709551615, - 270, - 273, - 270, - 273, - 64, - 65, + 114, + 122, + 111, + 119, + 22, + 25, true, - "Eds", - "Eds" + "Jean-Luc", + "Jean-Luc" ], [ + "parenthesis", "reference", - "publisher", - 2507285765516108280, + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 9270493059688133028, - 15988999060375847661, + 12178341415895577065, + 17281225859930936863, 18446744073709551615, 18446744073709551615, - 277, - 294, - 277, - 294, - 68, - 70, + 0, + 3, + 0, + 3, + 0, + 1, true, - "Curran Associates", - "Curran Associates" + "[4]", + "[4]" ], [ - "reference", - "publisher", - 2507285765516108280, + "expression", + "wtoken-concatenation", + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 12178341415896263797, - 93480841238860416, + 12178341415895577065, + 17281225859930936863, 18446744073709551615, 18446744073709551615, - 296, - 299, - 296, - 299, - 71, - 72, + 0, + 3, + 0, + 3, + 0, + 1, true, - "Inc", - "Inc" + "[4]", + "[4]" ], [ - "reference", - "title", - 2507285765516108280, + "sentence", + "", + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 695901516261617265, - 14331097264748910677, + 1454337940209217576, + 15847786010983528714, 18446744073709551615, 18446744073709551615, - 66, - 144, - 66, - 144, - 16, - 29, + 0, + 131, + 0, + 128, + 0, + 27, true, - "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks", - "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" + "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier.", + "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier." ], [ - "reference", - "url", - 2507285765516108280, + "sentence", + "", + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 3374974501831695503, - 17450904193872703176, + 980690805708056428, + 10837072452839845549, 18446744073709551615, 18446744073709551615, - 309, - 420, - 309, - 420, - 76, - 78, + 138, + 191, + 135, + 188, + 29, + 39, true, - "http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks", - "http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks" + "From Legacy Documents to XML: A Conversion Framework.", + "From Legacy Documents to XML: A Conversion Framework." ], [ - "reference", - "url", - 2507285765516108280, + "sentence", + "", + 8004985786049140169, "TEXT", - "#/texts/109", + "#/texts/103", 1.0, - 12178341415895634440, - 93706065194188109, + 9562882546299798530, + 8479999919787955978, 18446744073709551615, 18446744073709551615, - 422, - 425, - 422, - 425, - 79, - 80, + 192, + 247, + 189, + 244, + 39, + 49, true, - "pdf", - "pdf" + "Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103.", + "Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103." ], [ - "reference", - "author", - 14905276480471286920, + "term", + "enum-term-mark-4", + 8004985786049140169, "TEXT", - "#/texts/110", + "#/texts/103", 1.0, - 4686361850733567621, - 5253767773577297512, + 17602133858301996024, + 15696295076363719071, 18446744073709551615, 18446744073709551615, - 5, - 20, - 5, - 20, - 1, - 5, + 9, + 118, + 9, + 115, + 3, + 23, true, - "Peter W J Staar", - "Peter W J Staar" + "Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean", + "Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean" ], [ - "reference", - "author", - 14905276480471286920, + "term", + "single-term", + 8004985786049140169, "TEXT", - "#/texts/110", + "#/texts/103", 1.0, - 1571808557594152175, - 1746337992895366641, + 10319721072853010428, + 6489882772390524296, 18446744073709551615, 18446744073709551615, + 9, 22, - 35, + 9, 22, - 35, + 3, + 5, + true, + "Pierre Chanod", + "Pierre Chanod" + ], + [ + "term", + "single-term", + 8004985786049140169, + "TEXT", + "#/texts/103", + 1.0, + 7554933550167443736, + 13411551703313480687, + 18446744073709551615, + 18446744073709551615, + 24, + 41, + 24, + 41, 6, 8, true, - "Michele Dolfi", - "Michele Dolfi" + "Boris Chidlovskii", + "Boris Chidlovskii" ], [ - "reference", - "author", - 14905276480471286920, + "term", + "single-term", + 8004985786049140169, "TEXT", - "#/texts/110", + "#/texts/103", 1.0, - 9737597816447750448, - 2973540942666074124, + 16299981998052668228, + 10120159009512117499, 18446744073709551615, 18446744073709551615, - 37, - 51, - 37, - 51, + 43, + 56, + 43, + 55, 9, 11, true, - "Christoph Auer", - "Christoph Auer" + "Herv\u00e9 Dejean", + "Herv\u00e9 Dejean" ], [ - "reference", - "author", - 14905276480471286920, + "term", + "single-term", + 8004985786049140169, "TEXT", - "#/texts/110", + "#/texts/103", 1.0, - 13732913329338511598, - 166477832047526898, + 12186041413076963653, + 1815357622671572381, 18446744073709551615, 18446744073709551615, + 58, + 72, 57, - 70, - 57, - 70, - 13, - 16, + 71, + 12, + 14, true, - "Costas Bekas.", - "Costas Bekas." + "Olivier Fambon", + "Olivier Fambon" ], [ - "reference", - "citation-number", - 14905276480471286920, + "term", + "single-term", + 8004985786049140169, "TEXT", - "#/texts/110", + "#/texts/103", 1.0, - 389609625697296278, - 16564150102059325413, + 10757542349073996342, + 681372576460736923, 18446744073709551615, 18446744073709551615, - 0, - 4, - 0, - 4, - 0, - 1, + 74, + 91, + 73, + 88, + 15, + 17, true, - "[11]", - "[11]" + "J\u00e9r\u00f4me Fuselier", + "J\u00e9r\u00f4me Fuselier" ], [ - "reference", - "date", - 14905276480471286920, + "term", + "single-term", + 8004985786049140169, "TEXT", - "#/texts/110", + "#/texts/103", 1.0, - 389609625548777054, - 16555452686088781228, + 17756104824925179897, + 12319066590629211102, 18446744073709551615, 18446744073709551615, - 71, - 75, - 71, - 75, - 16, - 17, + 93, + 108, + 90, + 105, + 18, + 20, true, - "2018", - "2018" + "Thierry Jacquin", + "Thierry Jacquin" ], [ - "reference", - "title", - 14905276480471286920, + "term", + "single-term", + 8004985786049140169, "TEXT", - "#/texts/110", + "#/texts/103", 1.0, - 16083247419427271197, - 18033265608713009513, + 5537218577218077560, + 2866017161052533450, 18446744073709551615, 18446744073709551615, - 77, - 133, - 77, - 133, - 18, + 119, + 130, + 116, + 127, + 24, 26, true, - "Corpus Conversion Service poster at the SysML conference", - "Corpus Conversion Service poster at the SysML conference" + "Luc Meunier", + "Luc Meunier" ], [ - "reference", - "url", - 14905276480471286920, + "term", + "single-term", + 8004985786049140169, "TEXT", - "#/texts/110", + "#/texts/103", 1.0, - 18429963590603622561, - 12432928173216692023, + 4381219347563518937, + 4225593426044066727, 18446744073709551615, 18446744073709551615, - 135, - 166, - 135, - 166, - 27, - 31, + 143, + 159, + 140, + 156, + 30, + 32, true, - "http://www.sysml.cc/doc/ 76.pdf", - "http://www.sysml.cc/doc/ 76.pdf" + "Legacy Documents", + "Legacy Documents" ], [ - "numval", - "ival", - 16709517892596982787, - "TABLE", - "#/tables/0", + "term", + "single-term", + 8004985786049140169, + "TEXT", + "#/texts/103", 1.0, - 15441160910541481072, - 14925187714232052101, - 2, - 1, - 0, - 2, - 0, - 2, - 0, - 2, + 15039437164843108785, + 909619298391709716, + 18446744073709551615, + 18446744073709551615, + 170, + 190, + 167, + 187, + 36, + 38, true, - "72", - "72" + "Conversion Framework", + "Conversion Framework" ], [ - "numval", - "ival", - 16709517892596982787, - "TABLE", - "#/tables/0", + "term", + "single-term", + 8004985786049140169, + "TEXT", + "#/texts/103", 1.0, - 17767354399704235156, - 6061612085784771330, - 2, - 2, - 0, - 1, - 0, - 1, - 0, - 1, + 14297842595136370149, + 6710149947699048907, + 18446744073709551615, + 18446744073709551615, + 192, + 218, + 189, + 215, + 39, + 42, true, - "4", - "4" + "Springer Berlin Heidelberg", + "Springer Berlin Heidelberg" ], [ - "numval", - "fval", - 16709517892596982787, - "TABLE", - "#/tables/0", + "term", + "single-term", + 8004985786049140169, + "TEXT", + "#/texts/103", 1.0, - 389609625535995626, - 16087508952769745788, - 2, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 16381206535680833456, + 11649598247391704340, + 18446744073709551615, + 18446744073709551615, + 220, + 226, + 217, + 223, + 43, + 44, true, - "0.97", - "0.97" + "Berlin", + "Berlin" ], [ - "numval", - "fval", - 16709517892596982787, - "TABLE", - "#/tables/0", + "term", + "single-term", + 8004985786049140169, + "TEXT", + "#/texts/103", 1.0, - 389609625535995627, - 16087508952857503563, - 2, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 4638979131570902619, + 4604157803470259425, + 18446744073709551615, + 18446744073709551615, + 228, + 238, + 225, + 235, + 45, + 46, true, - "0.98", - "0.98" + "Heidelberg", + "Heidelberg" + ], + [ + "verb", + "single-verb", + 8004985786049140169, + "TEXT", + "#/texts/103", + 1.0, + 12178341415895541463, + 17281175695706441462, + 18446744073709551615, + 18446744073709551615, + 163, + 166, + 160, + 163, + 33, + 34, + true, + "XML", + "XML" + ], + [ + "conn", + "single-conn", + 8004985786049140169, + "TEXT", + "#/texts/103", + 1.0, + 389609625538216073, + 11746398078428097470, + 18446744073709551615, + 18446744073709551615, + 138, + 142, + 135, + 139, + 29, + 30, + true, + "From", + "From" + ], + [ + "conn", + "single-conn", + 8004985786049140169, + "TEXT", + "#/texts/103", + 1.0, + 15441160910541485865, + 2839021695369005356, + 18446744073709551615, + 18446744073709551615, + 160, + 162, + 157, + 159, + 32, + 33, + true, + "to", + "to" ], [ "numval", - "ival", - 16709517892596982787, - "TABLE", - "#/tables/0", + "year", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 17767354399704235162, - 6061612085904261025, - 3, - 0, - 5, - 6, + 389609625548777059, + 1587769393776818040, + 18446744073709551615, + 18446744073709551615, + 19, + 23, + 19, + 23, + 4, 5, - 6, - 1, - 2, true, - "2", - "2" + "2015", + "2015" ], [ "numval", - "ival", - 16709517892596982787, - "TABLE", - "#/tables/0", + "year", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 17767354399704235153, - 6061612080706226208, - 3, - 1, - 0, - 1, - 0, - 1, - 0, - 1, + 389609625548777059, + 1587769393776763538, + 18446744073709551615, + 18446744073709551615, + 59, + 63, + 59, + 63, + 13, + 14, true, - "9", - "9" + "2015", + "2015" ], [ "numval", - "fval", - 16709517892596982787, - "TABLE", - "#/tables/0", + "year", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 12178341415896431533, - 7910815507560570273, - 3, - 2, - 0, - 3, - 0, - 3, - 0, - 3, + 389609625548777059, + 1587769393776757579, + 18446744073709551615, + 18446744073709551615, + 216, + 220, + 216, + 220, + 53, + 54, true, - "0.1", - "0.1" + "2015", + "2015" ], [ "numval", - "ival", - 16709517892596982787, - "TABLE", - "#/tables/0", + "fval", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 17767354399704235160, - 6061612085871184177, - 3, - 3, - 0, - 1, - 0, - 1, - 0, - 1, + 8104408072666216409, + 14220288417264645869, + 18446744073709551615, + 18446744073709551615, + 203, + 210, + 203, + 210, + 49, + 50, true, - "0", - "0" + "10.1109", + "10.1109" ], [ "numval", - "ival", - 16709517892596982787, - "TABLE", - "#/tables/0", + "irng", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 15441160910541481353, - 14925187695918906548, - 3, - 3, - 4, - 6, - 4, - 6, - 2, - 4, + 10303975503395430788, + 13846363068497305469, + 18446744073709551615, + 18446744073709551615, + 176, + 185, + 176, + 185, + 39, + 40, true, - "99", - "99" + "1440-1448", + "1440-1448" ], [ "numval", "ival", - 16709517892596982787, - "TABLE", - "#/tables/0", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 17767354399704235160, - 6061612085871196320, - 3, - 4, - 0, - 1, - 0, - 1, - 0, - 1, + 15441160910541481979, + 3495651879263029623, + 18446744073709551615, + 18446744073709551615, + 127, + 129, + 127, + 129, + 26, + 27, true, - "0", - "0" + "15", + "15" ], [ "numval", "ival", - 16709517892596982787, - "TABLE", - "#/tables/0", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 15441160910541481352, - 14925187696052464601, - 3, - 4, - 4, - 6, - 4, - 6, - 2, - 4, + 12178341415896420726, + 15205590251298949236, + 18446744073709551615, + 18446744073709551615, + 221, + 224, + 221, + 224, + 55, + 56, true, - "98", - "98" + "169", + "169" ], [ - "term", - "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + "link", + "url", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 389609625541547546, - 16087857384187763916, - 0, - 1, - 0, - 4, - 0, - 4, - 0, - 1, + 8704287819835955947, + 1152182854074722114, + 18446744073709551615, + 18446744073709551615, + 187, + 224, + 187, + 224, + 41, + 56, true, - "Time", - "Time" + "https://doi.org/10.1109/ICCV.2015.169", + "https://doi.org/10.1109/ICCV.2015.169" ], [ - "term", - "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + "link", + "doi", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 14635106751859230946, - 2707820963080644969, - 0, - 1, - 8, - 16, - 8, - 16, - 2, - 3, + 16500190859490903724, + 11881156334101563754, + 18446744073709551615, + 18446744073709551615, + 195, + 224, + 195, + 224, + 45, + 56, true, - "solution", - "solution" + "doi.org/10.1109/ICCV.2015.169", + "doi.org/10.1109/ICCV.2015.169" ], [ - "term", - "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + "parenthesis", + "reference", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 389609625541547546, - 16087857384187759837, - 0, - 2, + 12178341415895577901, + 15205622006266309913, + 18446744073709551615, + 18446744073709551615, 0, - 4, + 3, 0, - 4, + 3, 0, 1, true, - "Time", - "Time" + "[5]", + "[5]" ], [ - "term", - "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + "parenthesis", + "round brackets", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 14635106751859230946, - 2707820963080690456, - 0, - 2, - 8, - 16, - 8, - 16, - 2, - 3, + 16380808314424790428, + 13641327031679638352, + 18446744073709551615, + 18446744073709551615, + 113, + 119, + 113, + 119, + 20, + 23, true, - "solution", - "solution" + "(ICCV)", + "(ICCV)" ], [ - "term", - "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + "parenthesis", + "round brackets", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 2026722887841362187, - 16381659398929700384, - 0, - 3, - 0, - 11, - 0, - 11, - 0, - 1, + 7096417115544771815, + 11395153180835677355, + 18446744073709551615, + 18446744073709551615, + 120, + 130, + 120, + 130, + 23, + 28, true, - "Performance", - "Performance" + "(ICCV '15)", + "(ICCV '15)" ], [ - "term", - "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + "expression", + "word-concatenation", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 2026722887841362187, - 16381659398929704944, - 0, - 4, + 329104162326555074, + 11875090144383732350, + 18446744073709551615, + 18446744073709551615, + 30, + 35, + 30, + 35, + 7, + 8, + true, + "R-CNN", + "R-CNN" + ], + [ + "expression", + "wtoken-concatenation", + 12744546813104546377, + "TEXT", + "#/texts/104", + 1.0, + 12178341415895577901, + 15205622006266309913, + 18446744073709551615, + 18446744073709551615, 0, - 11, + 3, 0, - 11, + 3, 0, 1, true, - "Performance", - "Performance" + "[5]", + "[5]" ], [ - "term", - "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + "sentence", + "", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 1656308328846415489, - 13536633304648648095, - 1, - 2, + 9476456823999413175, + 1770001191116537879, + 18446744073709551615, + 18446744073709551615, 0, - 10, + 18, 0, - 10, + 18, 0, - 1, + 4, true, - "Prediction", - "Prediction" + "[5] Ross Girshick.", + "[5] Ross Girshick." ], [ - "term", - "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + "sentence", + "", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 16381206531053803330, - 16833791661508729184, - 2, - 0, - 0, - 6, - 0, + 2498161724771347283, + 9048804449047555840, + 18446744073709551615, + 18446744073709551615, + 25, + 36, + 25, + 36, 6, - 0, - 1, + 9, + true, + "Fast R-CNN.", + "Fast R-CNN." + ], + [ + "sentence", + "", + 12744546813104546377, + "TEXT", + "#/texts/104", + 1.0, + 15441257360238259822, + 5601527666409867545, + 18446744073709551615, + 18446744073709551615, + 37, + 131, + 37, + 131, + 9, + 29, + true, + "In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15).", + "In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15)." + ], + [ + "sentence", + "", + 12744546813104546377, + "TEXT", + "#/texts/104", + 1.0, + 1794451012634991315, + 7573806955644456863, + 18446744073709551615, + 18446744073709551615, + 132, + 186, + 132, + 186, + 29, + 41, true, - "Faster", - "Faster" + "IEEE Computer Society, Washington, DC, USA, 1440-1448.", + "IEEE Computer Society, Washington, DC, USA, 1440-1448." ], [ "term", "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 389609625547525555, - 16087961378783525633, - 2, - 0, - 7, - 11, - 7, - 11, - 2, + 13123599834782083842, + 8538907007420179436, + 18446744073709551615, + 18446744073709551615, + 4, + 17, + 4, + 17, + 1, 3, true, - "RCNN", - "RCNN" + "Ross Girshick", + "Ross Girshick" ], [ "term", "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 329104161533766742, - 6816206597211363397, - 2, - 1, - 3, - 8, - 3, + 15491004285883184028, + 17483261521377705764, + 18446744073709551615, + 18446744073709551615, + 25, + 35, + 25, + 35, + 6, 8, - 2, - 3, true, - "hours", - "hours" + "Fast R-CNN", + "Fast R-CNN" ], [ "term", "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 12178341415895638619, - 7911352193610305315, - 2, - 2, - 2, - 5, - 2, - 5, - 1, - 2, + 10113513805742010945, + 11458139500661842431, + 18446744073709551615, + 18446744073709551615, + 64, + 93, + 64, + 93, + 14, + 17, true, - "sec", - "sec" + "IEEE International Conference", + "IEEE International Conference" ], [ "term", "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 329104161538395681, - 6804090298571621973, - 3, - 0, - 0, - 5, - 0, - 5, - 0, - 1, + 10222924410753703457, + 248762853253947982, + 18446744073709551615, + 18446744073709551615, + 97, + 112, + 97, + 112, + 18, + 20, true, - "YOLOv", - "YOLOv" + "Computer Vision", + "Computer Vision" ], [ "term", "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 329104161533766742, - 6816206597185260967, - 3, - 1, - 2, - 7, - 2, - 7, - 1, - 2, + 12763303431451614333, + 16935168498405799510, + 18446744073709551615, + 18446744073709551615, + 132, + 153, + 132, + 153, + 29, + 32, true, - "hours", - "hours" + "IEEE Computer Society", + "IEEE Computer Society" ], [ "term", "single-term", - 16709517892596982787, - "TABLE", - "#/tables/0", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 12178341415895638619, - 7911352193599591381, - 3, - 2, - 4, - 7, - 4, - 7, - 3, - 4, + 17329186159823478547, + 16920991383284353179, + 18446744073709551615, + 18446744073709551615, + 40, + 51, + 40, + 51, + 10, + 11, true, - "sec", - "sec" + "Proceedings", + "Proceedings" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "term", + "single-term", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 15441160910541480975, - 14063371777824517040, - 2, - 2, - 0, - 2, - 0, - 2, - 0, - 2, + 389609625537760670, + 1654267914364558446, + 18446744073709551615, + 18446744073709551615, + 114, + 118, + 114, + 118, + 21, + 22, true, - "75", - "75" + "ICCV", + "ICCV" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "term", + "single-term", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 17767354399704235160, - 15803300100226782560, - 2, - 3, - 0, - 1, - 0, - 1, - 0, - 1, + 389609625537760670, + 1654267914364557852, + 18446744073709551615, + 18446744073709551615, + 121, + 125, + 121, + 125, + 24, + 25, true, - "0", - "0" + "ICCV", + "ICCV" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "term", + "single-term", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 17767354399704235160, - 15803300100226770099, - 2, - 4, - 0, - 1, - 0, - 1, - 0, - 1, + 5589693159453375122, + 13837519084974782204, + 18446744073709551615, + 18446744073709551615, + 155, + 165, + 155, + 165, + 33, + 34, true, - "0", - "0" + "Washington", + "Washington" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "term", + "single-term", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 17767354399704235160, - 15803300100226790530, - 2, - 5, - 0, - 1, - 0, - 1, - 0, - 1, + 15441160910541480769, + 3495651894226244878, + 18446744073709551615, + 18446744073709551615, + 167, + 169, + 167, + 169, + 35, + 36, true, - "0", - "0" + "DC", + "DC" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "term", + "single-term", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 17767354399704235160, - 15803300100227143682, - 2, - 6, - 0, - 1, - 0, - 1, - 0, - 1, + 12178341415895650394, + 15205628192038338337, + 18446744073709551615, + 18446744073709551615, + 171, + 174, + 171, + 174, + 37, + 38, true, - "0", - "0" + "USA", + "USA" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "conn", + "single-conn", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 17767354399704235160, - 15803300100226706291, - 2, - 7, - 0, - 1, - 0, - 1, - 0, - 1, + 15441160910541480354, + 3495651908653608597, + 18446744073709551615, + 18446744073709551615, + 37, + 39, + 37, + 39, + 9, + 10, true, - "0", - "0" + "In", + "In" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "conn", + "single-conn", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 17767354399704235161, - 15803300100445307688, - 3, - 1, - 0, - 1, - 0, - 1, - 0, - 1, + 16381206565712212855, + 2695983015266329611, + 18446744073709551615, + 18446744073709551615, + 52, + 58, + 52, + 58, + 11, + 13, true, - "1", - "1" + "of the", + "of the" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "conn", + "single-conn", + 12744546813104546377, + "TEXT", + "#/texts/104", 1.0, - 12178341415896199541, - 5837267533537259043, - 3, - 2, - 0, - 3, - 0, - 3, - 0, - 3, + 15441160910541485678, + 3495646783295715187, + 18446744073709551615, + 18446744073709551615, + 94, + 96, + 94, + 96, + 17, + 18, true, - "670", - "670" + "on", + "on" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 16061746189176848219, + "TEXT", + "#/texts/105", 1.0, - 17767354399704235160, - 15803300100227552359, - 3, - 3, - 0, - 1, - 0, - 1, - 0, + 141995704861070506, + 4358412458884164235, + 18446744073709551615, + 18446744073709551615, + 4, + 20, + 4, + 20, 1, + 5, true, - "0", - "0" + "Ross B. Girshick", + "Ross B. Girshick" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 16061746189176848219, + "TEXT", + "#/texts/105", 1.0, - 17767354399704235160, - 15803300100227564948, - 3, - 4, - 0, - 1, - 0, - 1, - 0, - 1, + 16700235966000105766, + 16857612526578801697, + 18446744073709551615, + 18446744073709551615, + 22, + 34, + 22, + 34, + 6, + 8, true, - "0", - "0" + "Jeff Donahue", + "Jeff Donahue" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 16061746189176848219, + "TEXT", + "#/texts/105", 1.0, - 17767354399704235160, - 15803300100227560517, - 3, - 5, - 0, - 1, - 0, - 1, - 0, - 1, + 3125822382074464058, + 13386372949081827875, + 18446744073709551615, + 18446744073709551615, + 36, + 50, + 36, + 50, + 9, + 11, true, - "0", - "0" + "Trevor Darrell", + "Trevor Darrell" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 16061746189176848219, + "TEXT", + "#/texts/105", 1.0, - 17767354399704235160, - 15803300100226887469, - 3, - 6, - 0, - 1, - 0, - 1, - 0, - 1, + 10076860098015848351, + 1698280748488935181, + 18446744073709551615, + 18446744073709551615, + 56, + 71, + 56, + 71, + 13, + 16, true, - "0", - "0" + "Jitendra Malik.", + "Jitendra Malik." ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "citation-number", + 16061746189176848219, + "TEXT", + "#/texts/105", 1.0, - 17767354399704235160, - 15803300100224577771, - 4, - 1, + 12178341415895577964, + 1023751500620290990, + 18446744073709551615, + 18446744073709551615, 0, - 1, + 3, 0, - 1, + 3, 0, 1, true, - "0", - "0" + "[6]", + "[6]" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "date", + 16061746189176848219, + "TEXT", + "#/texts/105", 1.0, - 17767354399704235160, - 15803300100224140347, - 4, - 2, - 0, - 1, - 0, - 1, - 0, - 1, + 389609625548777061, + 894814354396885943, + 18446744073709551615, + 18446744073709551615, + 72, + 76, + 72, + 76, + 16, + 17, true, - "0", - "0" + "2013", + "2013" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "date", + 16061746189176848219, + "TEXT", + "#/texts/105", 1.0, - 12178341415896434935, - 5837266946220083063, - 4, - 3, - 0, - 3, - 0, - 3, - 0, - 3, + 389609625548777061, + 894814354396890826, + 18446744073709551615, + 18446744073709551615, + 180, + 184, + 180, + 184, + 32, + 33, true, - "325", - "325" + "2013", + "2013" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "journal", + 16061746189176848219, + "TEXT", + "#/texts/105", 1.0, - 17767354399704235160, - 15803300100224164889, - 4, - 4, - 0, - 1, - 0, - 1, - 0, - 1, + 389609625536419383, + 889446752040326567, + 18446744073709551615, + 18446744073709551615, + 160, + 164, + 160, + 164, + 29, + 30, true, - "0", - "0" + "CoRR", + "CoRR" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "title", + 16061746189176848219, + "TEXT", + "#/texts/105", 1.0, - 17767354399704235160, - 15803300100224136680, + 4208693923929480551, + 3754197794849426338, + 18446744073709551615, + 18446744073709551615, + 78, + 158, + 78, + 158, + 18, + 28, + true, + "Rich feature hierarchies for accurate object detection and semantic segmentation", + "Rich feature hierarchies for accurate object detection and semantic segmentation" + ], + [ + "reference", + "author", + 11872392946390819176, + "TEXT", + "#/texts/106", + 1.0, + 8106351942713029604, + 15468997146309510455, + 18446744073709551615, + 18446744073709551615, 4, - 5, - 0, - 1, - 0, - 1, - 0, + 11, + 4, + 11, 1, + 3, true, - "0", - "0" + "Wei Liu", + "Wei Liu" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 11872392946390819176, + "TEXT", + "#/texts/106", 1.0, - 17767354399704235160, - 15803300100224550808, + 7132768279271695, + 1832821379686674159, + 18446744073709551615, + 18446744073709551615, + 13, + 30, + 13, + 30, 4, 6, - 0, - 1, - 0, - 1, - 0, - 1, true, - "0", - "0" + "Dragomir Anguelov", + "Dragomir Anguelov" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 11872392946390819176, + "TEXT", + "#/texts/106", 1.0, - 17767354399704235161, - 15803300100440618586, - 5, - 1, - 0, - 1, - 0, - 1, - 0, - 1, + 12871845148221275510, + 11451573001119547147, + 18446744073709551615, + 18446744073709551615, + 32, + 45, + 32, + 45, + 7, + 9, true, - "1", - "1" + "Dumitru Erhan", + "Dumitru Erhan" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 11872392946390819176, + "TEXT", + "#/texts/106", 1.0, - 15441160910541481861, - 14063371834761578936, - 5, - 2, - 0, - 2, - 0, - 2, - 0, - 2, + 6963214204149412896, + 11905902671968880924, + 18446744073709551615, + 18446744073709551615, + 47, + 64, + 47, + 64, + 10, + 12, true, - "17", - "17" + "Christian Szegedy", + "Christian Szegedy" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 11872392946390819176, + "TEXT", + "#/texts/106", 1.0, - 17767354399704235160, - 15803300100334898942, - 5, - 3, - 0, - 1, - 0, - 1, - 0, - 1, + 1399468129531522089, + 15637271748350955016, + 18446744073709551615, + 18446744073709551615, + 66, + 76, + 66, + 76, + 13, + 15, true, - "0", - "0" + "Scott Reed", + "Scott Reed" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 11872392946390819176, + "TEXT", + "#/texts/106", 1.0, - 329104147816412516, - 1837047046804924097, - 5, - 4, - 0, - 5, - 0, - 5, - 0, - 5, + 12712965187511148158, + 5061563798042056469, + 18446744073709551615, + 18446744073709551615, + 78, + 91, + 78, + 91, + 16, + 20, true, - "56460", - "56460" + "Cheng-Yang Fu", + "Cheng-Yang Fu" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 11872392946390819176, + "TEXT", + "#/texts/106", 1.0, - 15441160910541481978, - 14063371734014592858, - 5, - 5, - 0, - 2, - 0, - 2, - 0, - 2, + 3733048493609069913, + 12058083979397468329, + 18446744073709551615, + 18446744073709551615, + 97, + 115, + 97, + 115, + 22, + 27, true, - "14", - "14" + "Alexander C. Berg.", + "Alexander C. Berg." ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "citation-number", + 11872392946390819176, + "TEXT", + "#/texts/106", 1.0, - 17767354399704235160, - 15803300100334911438, - 5, - 6, - 0, - 1, - 0, - 1, - 0, - 1, - true, - "0", - "0" - ], - [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", - 1.0, - 17767354399704235160, - 15803300100335666817, - 6, - 1, + 12178341415895577775, + 16834182135958034128, + 18446744073709551615, + 18446744073709551615, 0, - 1, + 3, 0, - 1, + 3, 0, 1, true, - "0", - "0" + "[7]", + "[7]" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "date", + 11872392946390819176, + "TEXT", + "#/texts/106", 1.0, - 17767354399704235160, - 15803300100335695504, - 6, - 2, - 0, - 1, - 0, - 1, - 0, - 1, + 389609625548777056, + 12418382060406794776, + 18446744073709551615, + 18446744073709551615, + 116, + 120, + 116, + 120, + 27, + 28, true, - "0", - "0" + "2016", + "2016" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "doi", + 11872392946390819176, + "TEXT", + "#/texts/106", 1.0, - 17767354399704235160, - 15803300100335691427, - 6, - 3, - 0, - 1, - 0, - 1, - 0, - 1, + 3534146179424153776, + 1525705277889903310, + 18446744073709551615, + 18446744073709551615, + 206, + 224, + 206, + 224, + 44, + 45, true, - "0", - "0" + "https://doi.org/10", + "https://doi.org/10" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "doi", + 11872392946390819176, + "TEXT", + "#/texts/106", 1.0, - 17767354399704235156, - 15803300100423374160, - 6, - 4, - 0, - 1, - 0, - 1, - 0, - 1, + 3493950482346635177, + 14172820134834639105, + 18446744073709551615, + 18446744073709551615, + 226, + 250, + 226, + 250, + 46, + 54, true, - "4", - "4" + "1007/978-3-319-46448-0_2", + "1007/978-3-319-46448-0_2" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "location", + 11872392946390819176, + "TEXT", + "#/texts/106", 1.0, - 389609625655502523, - 1616926330272763134, - 6, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 389609625536506042, + 12420143175742824125, + 18446744073709551615, + 18446744073709551615, + 193, + 197, + 193, + 197, + 40, + 41, true, - "4223", - "4223" + "Cham", + "Cham" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "title", + 11872392946390819176, + "TEXT", + "#/texts/106", 1.0, - 15441160910541481788, - 14063371729666955983, - 6, - 6, - 0, - 2, - 0, - 2, - 0, - 2, + 10201684882899222639, + 16463858842282873959, + 18446744073709551615, + 18446744073709551615, + 122, + 156, + 122, + 156, + 29, + 35, true, - "26", - "26" + "SSD: Single Shot MultiBox Detector", + "SSD: Single Shot MultiBox Detector" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 2956849475535726296, + "TEXT", + "#/texts/107", 1.0, - 17767354399704235160, - 15803300100335410790, - 7, - 1, - 0, - 1, - 0, - 1, - 0, + 5088659084289352829, + 5811844525036759114, + 18446744073709551615, + 18446744073709551615, + 4, + 17, + 4, + 17, 1, + 3, true, - "0", - "0" + "Joseph Redmon", + "Joseph Redmon" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 2956849475535726296, + "TEXT", + "#/texts/107", 1.0, - 17767354399704235160, - 15803300100335431255, + 417695209021750783, + 13441950925666715191, + 18446744073709551615, + 18446744073709551615, + 19, + 40, + 19, + 40, + 4, 7, - 2, - 0, - 1, - 0, - 1, - 0, - 1, true, - "0", - "0" + "Santosh Kumar Divvala", + "Santosh Kumar Divvala" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 2956849475535726296, + "TEXT", + "#/texts/107", 1.0, - 17767354399704235160, - 15803300100335435332, - 7, - 3, - 0, - 1, - 0, - 1, - 0, - 1, + 141995704861070506, + 13286696794844996383, + 18446744073709551615, + 18446744073709551615, + 42, + 58, + 42, + 58, + 8, + 12, true, - "0", - "0" + "Ross B. Girshick", + "Ross B. Girshick" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 2956849475535726296, + "TEXT", + "#/texts/107", 1.0, - 17767354399704235160, - 15803300100335423029, - 7, - 4, - 0, - 1, - 0, - 1, - 0, - 1, + 16947174234018208722, + 13965552924856577071, + 18446744073709551615, + 18446744073709551615, + 64, + 76, + 64, + 76, + 14, + 17, true, - "0", - "0" + "Ali Farhadi.", + "Ali Farhadi." ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "citation-number", + 2956849475535726296, + "TEXT", + "#/texts/107", 1.0, - 17767354399704235161, - 15803300100417827613, - 7, - 5, + 12178341415895577838, + 11018125289094672461, + 18446744073709551615, + 18446744073709551615, 0, - 1, + 3, 0, - 1, + 3, 0, 1, true, - "1", - "1" + "[8]", + "[8]" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "container-title", + 2956849475535726296, + "TEXT", + "#/texts/107", 1.0, - 389609625549028785, - 1615012629921730407, - 7, - 6, - 0, - 4, - 0, - 4, - 0, - 4, + 17631274803144515959, + 18105892991402137032, + 18446744073709551615, + 18446744073709551615, + 140, + 203, + 140, + 203, + 32, + 41, true, - "3418", - "3418" + "2016 IEEE Conference on Computer Vision and Pattern Recognition", + "2016 IEEE Conference on Computer Vision and Pattern Recognition" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "container-title", + 2956849475535726296, + "TEXT", + "#/texts/107", 1.0, - 12178341415896426714, - 5837506952496953864, - 8, - 1, - 0, - 3, - 0, - 3, - 0, - 3, + 389609625526699487, + 17849764824838617245, + 18446744073709551615, + 18446744073709551615, + 205, + 209, + 205, + 209, + 42, + 43, true, - "100", - "100" + "CVPR", + "CVPR" ], [ - "numval", - "fval", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "date", + 2956849475535726296, + "TEXT", + "#/texts/107", 1.0, - 329104147618004574, - 1850939892712171199, - 8, - 2, - 0, - 5, - 0, - 5, - 0, - 5, + 389609625548777056, + 17837801987031958568, + 18446744073709551615, + 18446744073709551615, + 77, + 81, + 77, + 81, + 17, + 18, true, - "99.85", - "99.85" + "2016", + "2016" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "date", + 2956849475535726296, + "TEXT", + "#/texts/107", 1.0, - 12178341415896426714, - 5837506952496995114, - 8, - 3, - 0, - 3, - 0, - 3, - 0, - 3, + 389609625548777056, + 17837801987031982734, + 18446744073709551615, + 18446744073709551615, + 212, + 216, + 212, + 216, + 45, + 46, true, - "100", - "100" + "2016", + "2016" ], [ - "numval", - "fval", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "title", + 2956849475535726296, + "TEXT", + "#/texts/107", 1.0, - 329104147617972634, - 1850962284269103299, - 8, - 4, - 0, - 5, - 0, - 5, - 0, - 5, + 5895818558987270699, + 2974553673873283962, + 18446744073709551615, + 18446744073709551615, + 83, + 138, + 83, + 138, + 19, + 31, true, - "99.94", - "99.94" + "You Only Look Once: Unified, Real-Time Object Detection", + "You Only Look Once: Unified, Real-Time Object Detection" ], [ - "numval", - "fval", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 6623297047995432604, + "TEXT", + "#/texts/108", 1.0, - 329104147617973201, - 1850962300609404744, - 8, - 5, - 0, - 5, - 0, - 5, - 0, - 5, + 5088659084289352829, + 16235259739729085297, + 18446744073709551615, + 18446744073709551615, + 4, + 17, + 4, + 17, + 1, + 3, true, - "99.24", - "99.24" + "Joseph Redmon", + "Joseph Redmon" ], [ - "numval", - "fval", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 6623297047995432604, + "TEXT", + "#/texts/108", 1.0, - 329104147617972639, - 1850962292247387325, - 8, - 6, - 0, - 5, - 0, - 5, - 0, - 5, + 16947174234018208722, + 7021580680610188634, + 18446744073709551615, + 18446744073709551615, + 22, + 34, + 22, + 34, + 4, + 7, true, - "99.97", - "99.97" + "Ali Farhadi.", + "Ali Farhadi." ], [ - "numval", - "fval", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "citation-number", + 6623297047995432604, + "TEXT", + "#/texts/108", 1.0, - 329104147618821186, - 1850927305919343478, - 9, - 1, + 12178341415895577640, + 5338477872773862060, + 18446744073709551615, + 18446744073709551615, 0, - 5, + 3, 0, - 5, + 3, 0, - 5, + 1, true, - "97.40", - "97.40" + "[9]", + "[9]" ], [ - "numval", - "fval", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "date", + 6623297047995432604, + "TEXT", + "#/texts/108", 1.0, - 329104147618821120, - 1850927276933057753, - 9, - 2, - 0, - 5, - 0, - 5, - 0, - 5, + 389609625548777056, + 2625243571990787508, + 18446744073709551615, + 18446744073709551615, + 35, + 39, + 35, + 39, + 7, + 8, true, - "97.52", - "97.52" + "2016", + "2016" ], [ - "numval", - "ival", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "date", + 6623297047995432604, + "TEXT", + "#/texts/108", 1.0, - 12178341415896426714, - 5837506952496145978, - 9, - 3, - 0, - 3, - 0, - 3, - 0, - 3, + 389609625548777056, + 2625243571990783197, + 18446744073709551615, + 18446744073709551615, + 110, + 114, + 110, + 114, + 21, + 22, true, - "100", - "100" + "2016", + "2016" ], [ - "numval", - "fval", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 2507285765516108280, + "TEXT", + "#/texts/109", 1.0, - 329104147617972625, - 1850962284440201056, - 9, - 4, - 0, - 5, - 0, + 9337887504118347047, + 4966377796769374289, + 18446744073709551615, + 18446744073709551615, 5, - 0, + 17, 5, + 17, + 1, + 3, true, - "99.99", - "99.99" + "Shaoqing Ren", + "Shaoqing Ren" ], [ - "numval", - "fval", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 2507285765516108280, + "TEXT", + "#/texts/109", 1.0, - 329104147617972438, - 1850969319560412727, - 9, - 5, - 0, - 5, - 0, - 5, - 0, - 5, + 7339447509685488310, + 1490181006860316744, + 18446744073709551615, + 18446744073709551615, + 19, + 29, + 19, + 29, + 4, + 6, true, - "99.64", - "99.64" + "Kaiming He", + "Kaiming He" ], [ - "numval", - "fval", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 2507285765516108280, + "TEXT", + "#/texts/109", 1.0, - 329104147617973201, - 1850962300608540590, + 13123599834782083842, + 7292467665049010344, + 18446744073709551615, + 18446744073709551615, + 31, + 44, + 31, + 44, + 7, 9, - 6, - 0, - 5, - 0, - 5, - 0, - 5, true, - "99.24", - "99.24" + "Ross Girshick", + "Ross Girshick" ], [ - "term", - "single-term", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 2507285765516108280, + "TEXT", + "#/texts/109", 1.0, - 329104161624445793, - 421775788727950862, - 0, - 2, - 10, - 15, - 10, - 15, - 1, - 2, + 2904781337729160811, + 16221483782846728585, + 18446744073709551615, + 18446744073709551615, + 50, + 59, + 50, + 59, + 11, + 14, true, - "label", - "label" + "Jian Sun.", + "Jian Sun." ], [ - "term", - "single-term", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "citation-number", + 2507285765516108280, + "TEXT", + "#/texts/109", 1.0, - 329104161624445793, - 421775788727954943, + 389609625697296215, + 1913545593953328211, + 18446744073709551615, + 18446744073709551615, 0, - 3, - 10, - 15, - 10, - 15, - 1, - 2, - true, - "label", - "label" - ], - [ - "term", - "single-term", - 16041588621504517180, - "TABLE", - "#/tables/1", - 1.0, - 329104161624445793, - 421775788728023184, + 4, 0, 4, - 10, - 15, - 10, - 15, - 1, - 2, - true, - "label", - "label" - ], - [ - "term", - "single-term", - 16041588621504517180, - "TABLE", - "#/tables/1", - 1.0, - 329104161624445793, - 421775788727994369, 0, - 5, - 10, - 15, - 10, - 15, 1, - 2, true, - "label", - "label" + "[10]", + "[10]" ], [ - "term", - "single-term", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "container-title", + 2507285765516108280, + "TEXT", + "#/texts/109", 1.0, - 329104161624445793, - 421775788727998642, - 0, - 6, - 10, - 15, - 10, - 15, - 1, - 2, + 17791264228691503041, + 2574823334558986016, + 18446744073709551615, + 18446744073709551615, + 146, + 201, + 146, + 201, + 30, + 38, true, - "label", - "label" + "In Advances in Neural Information Processing Systems 28", + "In Advances in Neural Information Processing Systems 28" ], [ - "term", - "single-term", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "date", + 2507285765516108280, + "TEXT", + "#/texts/109", 1.0, - 329104161624445793, - 421775788728004387, - 0, - 7, - 10, - 15, - 10, + 389609625548777059, + 1924763351573441882, + 18446744073709551615, + 18446744073709551615, + 60, + 64, + 60, + 64, + 14, 15, - 1, - 2, true, - "label", - "label" + "2015", + "2015" ], [ - "term", - "single-term", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "title", + 2507285765516108280, + "TEXT", + "#/texts/109", 1.0, - 16381206482755721438, - 6650922822359027250, - 1, - 1, - 0, - 6, - 0, - 6, - 0, - 2, + 695901516261617265, + 14331097264748910677, + 18446744073709551615, + 18446744073709551615, + 66, + 144, + 66, + 144, + 16, + 29, true, - "T itle", - "T itle" + "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks", + "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" ], [ - "term", - "single-term", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "url", + 2507285765516108280, + "TEXT", + "#/texts/109", 1.0, - 16381206562428603067, - 4879747397715854867, - 1, - 3, - 0, - 6, - 0, - 6, - 0, - 1, + 3374974501831695503, + 17450904193872703176, + 18446744073709551615, + 18446744073709551615, + 309, + 420, + 309, + 420, + 76, + 78, true, - "Author", - "Author" + "http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks", + "http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks" ], [ - "term", - "single-term", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "url", + 2507285765516108280, + "TEXT", + "#/texts/109", 1.0, - 14652314692921799233, - 10657560030989347793, - 1, - 4, - 0, - 8, - 0, - 8, - 0, - 1, + 12178341415895634440, + 93706065194188109, + 18446744073709551615, + 18446744073709551615, + 422, + 425, + 422, + 425, + 79, + 80, true, - "Subtitle", - "Subtitle" + "pdf", + "pdf" ], [ - "term", - "single-term", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 14905276480471286920, + "TEXT", + "#/texts/110", 1.0, - 15441160910541487879, - 14063371787734206200, - 1, + 4686361850733567621, + 5253767773577297512, + 18446744073709551615, + 18446744073709551615, 5, - 0, - 2, - 0, - 2, - 0, + 20, + 5, + 20, 1, + 5, true, - "Te", - "Te" + "Peter W J Staar", + "Peter W J Staar" ], [ - "term", - "single-term", - 16041588621504517180, - "TABLE", - "#/tables/1", + "reference", + "author", + 14905276480471286920, + "TEXT", + "#/texts/110", 1.0, - 8106352219904586209, - 11031918618165802042, - 1, + 1571808557594152175, + 1746337992895366641, + 18446744073709551615, + 18446744073709551615, + 22, + 35, + 22, + 35, 6, + 8, + true, + "Michele Dolfi", + "Michele Dolfi" + ], + [ + "reference", + "author", + 14905276480471286920, + "TEXT", + "#/texts/110", + 1.0, + 9737597816447750448, + 2973540942666074124, + 18446744073709551615, + 18446744073709551615, + 37, + 51, + 37, + 51, + 9, + 11, + true, + "Christoph Auer", + "Christoph Auer" + ], + [ + "reference", + "author", + 14905276480471286920, + "TEXT", + "#/texts/110", + 1.0, + 13732913329338511598, + 166477832047526898, + 18446744073709551615, + 18446744073709551615, + 57, + 70, + 57, + 70, + 13, + 16, + true, + "Costas Bekas.", + "Costas Bekas." + ], + [ + "reference", + "citation-number", + 14905276480471286920, + "TEXT", + "#/texts/110", + 1.0, + 389609625697296278, + 16564150102059325413, + 18446744073709551615, + 18446744073709551615, 0, - 7, + 4, 0, - 7, + 4, 0, 1, true, - "Picture", - "Picture" + "[11]", + "[11]" ], [ - "term", - "single-term", - 16041588621504517180, + "reference", + "date", + 14905276480471286920, + "TEXT", + "#/texts/110", + 1.0, + 389609625548777054, + 16555452686088781228, + 18446744073709551615, + 18446744073709551615, + 71, + 75, + 71, + 75, + 16, + 17, + true, + "2018", + "2018" + ], + [ + "reference", + "title", + 14905276480471286920, + "TEXT", + "#/texts/110", + 1.0, + 16083247419427271197, + 18033265608713009513, + 18446744073709551615, + 18446744073709551615, + 77, + 133, + 77, + 133, + 18, + 26, + true, + "Corpus Conversion Service poster at the SysML conference", + "Corpus Conversion Service poster at the SysML conference" + ], + [ + "reference", + "url", + 14905276480471286920, + "TEXT", + "#/texts/110", + 1.0, + 18429963590603622561, + 12432928173216692023, + 18446744073709551615, + 18446744073709551615, + 135, + 166, + 135, + 166, + 27, + 31, + true, + "http://www.sysml.cc/doc/ 76.pdf", + "http://www.sysml.cc/doc/ 76.pdf" + ], + [ + "numval", + "ival", + 16709517892596982787, "TABLE", - "#/tables/1", + "#/tables/0", 1.0, - 4682222921140874465, - 7249830264234759527, + 15441160910541481072, + 14925187714232052101, 2, + 1, 0, + 2, 0, - 10, - 0, - 10, + 2, 0, 2, true, - "true label", - "true label" + "72", + "72" ], [ - "term", - "single-term", - 16041588621504517180, + "numval", + "ival", + 16709517892596982787, "TABLE", - "#/tables/1", + "#/tables/0", 1.0, - 329104161841334670, - 340356054356328763, + 17767354399704235156, + 6061612085784771330, + 2, 2, - 1, 0, - 5, + 1, 0, - 5, + 1, 0, 1, true, - "Title", - "Title" + "4", + "4" ], [ - "term", - "single-term", - 16041588621504517180, + "numval", + "fval", + 16709517892596982787, "TABLE", - "#/tables/1", + "#/tables/0", 1.0, - 15894660811414869651, - 783966399713762072, + 389609625535995626, + 16087508952769745788, + 2, 3, 0, + 4, 0, - 17, - 0, - 17, + 4, 0, - 3, + 4, true, - "true label Author", - "true label Author" + "0.97", + "0.97" ], [ - "term", - "single-term", - 16041588621504517180, + "numval", + "fval", + 16709517892596982787, "TABLE", - "#/tables/1", + "#/tables/0", 1.0, - 13831724478157455165, - 14474441641223495436, + 389609625535995627, + 16087508952857503563, + 2, 4, 0, + 4, 0, - 19, - 0, - 19, + 4, 0, - 3, + 4, true, - "true label Subtitle", - "true label Subtitle" + "0.98", + "0.98" ], [ - "term", - "single-term", - 16041588621504517180, + "numval", + "ival", + 16709517892596982787, "TABLE", - "#/tables/1", + "#/tables/0", 1.0, - 1941183530690029532, - 8884273177240528491, - 5, - 0, - 0, - 15, - 0, - 15, - 0, + 17767354399704235162, + 6061612085904261025, 3, + 0, + 5, + 6, + 5, + 6, + 1, + 2, true, - "true label Text", - "true label Text" + "2", + "2" ], [ - "term", - "single-term", - 16041588621504517180, + "numval", + "ival", + 16709517892596982787, "TABLE", - "#/tables/1", + "#/tables/0", 1.0, - 3368831387223592374, - 12898366621225531538, - 6, - 0, + 17767354399704235153, + 6061612080706226208, + 3, + 1, 0, - 18, + 1, 0, - 18, + 1, 0, - 3, + 1, true, - "true label Picture", - "true label Picture" + "9", + "9" ], [ - "term", - "single-term", - 16041588621504517180, + "numval", + "fval", + 16709517892596982787, "TABLE", - "#/tables/1", + "#/tables/0", 1.0, - 6024510248149615245, - 9009738277270760030, - 7, - 0, + 12178341415896431533, + 7910815507560570273, + 3, + 2, 0, - 16, + 3, 0, - 16, + 3, 0, 3, true, - "true label Table", - "true label Table" + "0.1", + "0.1" ], [ - "term", - "single-term", - 16041588621504517180, + "numval", + "ival", + 16709517892596982787, "TABLE", - "#/tables/1", + "#/tables/0", 1.0, - 15894660815285786750, - 1728790590792959237, - 8, - 0, + 17767354399704235160, + 6061612085871184177, + 3, + 3, 0, - 17, + 1, 0, - 17, + 1, 0, - 3, + 1, true, - "true label Recall", - "true label Recall" + "0", + "0" ], [ - "term", - "single-term", - 16041588621504517180, + "numval", + "ival", + 16709517892596982787, "TABLE", - "#/tables/1", + "#/tables/0", 1.0, - 16647754341621779036, - 9808370296852837060, - 9, - 0, - 0, - 20, - 0, - 20, - 0, + 15441160910541481353, + 14925187695918906548, + 3, 3, + 4, + 6, + 4, + 6, + 2, + 4, true, - "true label Precision", - "true label Precision" + "99", + "99" ], [ "numval", "ival", - 14817357053216629605, + 16709517892596982787, "TABLE", - "#/tables/2", + "#/tables/0", 1.0, - 15441160910541481352, - 14633884986579423126, - 1, - 1, + 17767354399704235160, + 6061612085871196320, + 3, + 4, 0, - 2, + 1, 0, - 2, + 1, 0, - 2, + 1, true, - "98", - "98" + "0", + "0" ], [ "numval", "ival", - 14817357053216629605, + 16709517892596982787, "TABLE", - "#/tables/2", + "#/tables/0", 1.0, - 15441160910541481358, - 14633884986289176499, - 1, - 1, - 5, - 7, - 5, - 7, + 15441160910541481352, + 14925187696052464601, 3, - 5, + 4, + 4, + 6, + 4, + 6, + 2, + 4, true, - "96", - "96" + "98", + "98" ], [ "numval", "ival", - 14817357053216629605, + 16041588621504517180, "TABLE", - "#/tables/2", + "#/tables/1", 1.0, - 15441160910541481353, - 14633884986629840445, - 1, + 15441160910541480975, + 14063371777824517040, + 2, 2, 0, 2, @@ -83927,3271 +86161,6918 @@ 0, 2, true, - "99", - "99" + "75", + "75" ], [ "numval", "ival", - 14817357053216629605, + 16041588621504517180, "TABLE", - "#/tables/2", + "#/tables/1", 1.0, - 15441160910541481394, - 14633884986969604250, - 1, + 17767354399704235160, + 15803300100226782560, 2, - 5, - 7, - 5, - 7, 3, - 5, + 0, + 1, + 0, + 1, + 0, + 1, true, - "83", - "83" + "0", + "0" ], [ "numval", "ival", - 14817357053216629605, + 16041588621504517180, "TABLE", - "#/tables/2", + "#/tables/1", 1.0, - 15441160910541481353, - 14633884986621746969, + 17767354399704235160, + 15803300100226770099, 2, - 1, + 4, 0, - 2, + 1, 0, - 2, + 1, 0, - 2, + 1, true, - "99", - "99" + "0", + "0" ], [ "numval", "ival", - 14817357053216629605, + 16041588621504517180, "TABLE", - "#/tables/2", + "#/tables/1", 1.0, - 15441160910541486270, - 14633895233084857259, + 17767354399704235160, + 15803300100226790530, 2, - 1, - 5, - 7, - 5, - 7, - 3, 5, - true, - "46", - "46" + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" ], [ "numval", "ival", - 14817357053216629605, + 16041588621504517180, "TABLE", - "#/tables/2", + "#/tables/1", 1.0, - 15441160910541481353, - 14633884986621702026, + 17767354399704235160, + 15803300100227143682, + 2, + 6, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100226706291, + 2, + 7, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235161, + 15803300100445307688, + 3, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 12178341415896199541, + 5837267533537259043, + 3, + 2, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "670", + "670" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100227552359, + 3, + 3, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100227564948, + 3, + 4, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100227560517, + 3, + 5, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100226887469, + 3, + 6, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100224577771, + 4, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100224140347, + 4, + 2, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 12178341415896434935, + 5837266946220083063, + 4, + 3, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "325", + "325" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100224164889, + 4, + 4, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100224136680, + 4, + 5, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100224550808, + 4, + 6, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235161, + 15803300100440618586, + 5, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 15441160910541481861, + 14063371834761578936, + 5, + 2, + 0, 2, + 0, + 2, + 0, + 2, + true, + "17", + "17" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100334898942, + 5, + 3, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 329104147816412516, + 1837047046804924097, + 5, + 4, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "56460", + "56460" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 15441160910541481978, + 14063371734014592858, + 5, + 5, + 0, 2, 0, 2, 0, 2, + true, + "14", + "14" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100334911438, + 5, + 6, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100335666817, + 6, + 1, + 0, + 1, 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100335695504, + 6, 2, + 0, + 1, + 0, + 1, + 0, + 1, true, - "99", - "99" + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100335691427, + 6, + 3, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235156, + 15803300100423374160, + 6, + 4, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "4", + "4" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 389609625655502523, + 1616926330272763134, + 6, + 5, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "4223", + "4223" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 15441160910541481788, + 14063371729666955983, + 6, + 6, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "26", + "26" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100335410790, + 7, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100335431255, + 7, + 2, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100335435332, + 7, + 3, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235160, + 15803300100335423029, + 7, + 4, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 17767354399704235161, + 15803300100417827613, + 7, + 5, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 389609625549028785, + 1615012629921730407, + 7, + 6, + 0, + 4, + 0, + 4, + 0, + 4, + true, + "3418", + "3418" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 12178341415896426714, + 5837506952496953864, + 8, + 1, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "100", + "100" + ], + [ + "numval", + "fval", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 329104147618004574, + 1850939892712171199, + 8, + 2, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "99.85", + "99.85" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 12178341415896426714, + 5837506952496995114, + 8, + 3, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "100", + "100" + ], + [ + "numval", + "fval", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 329104147617972634, + 1850962284269103299, + 8, + 4, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "99.94", + "99.94" + ], + [ + "numval", + "fval", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 329104147617973201, + 1850962300609404744, + 8, + 5, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "99.24", + "99.24" + ], + [ + "numval", + "fval", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 329104147617972639, + 1850962292247387325, + 8, + 6, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "99.97", + "99.97" + ], + [ + "numval", + "fval", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 329104147618821186, + 1850927305919343478, + 9, + 1, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "97.40", + "97.40" + ], + [ + "numval", + "fval", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 329104147618821120, + 1850927276933057753, + 9, + 2, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "97.52", + "97.52" + ], + [ + "numval", + "ival", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 12178341415896426714, + 5837506952496145978, + 9, + 3, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "100", + "100" + ], + [ + "numval", + "fval", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 329104147617972625, + 1850962284440201056, + 9, + 4, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "99.99", + "99.99" + ], + [ + "numval", + "fval", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 329104147617972438, + 1850969319560412727, + 9, + 5, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "99.64", + "99.64" + ], + [ + "numval", + "fval", + 16041588621504517180, + "TABLE", + "#/tables/1", + 1.0, + 329104147617973201, + 1850962300608540590, + 9, + 6, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "99.24", + "99.24" + ], + [ + "numval", + "ival", + 14817357053216629605, + "TABLE", + "#/tables/2", + 1.0, + 15441160910541481352, + 14633884986579423126, + 1, + 1, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "98", + "98" + ], + [ + "numval", + "ival", + 14817357053216629605, + "TABLE", + "#/tables/2", + 1.0, + 15441160910541481358, + 14633884986289176499, + 1, + 1, + 5, + 7, + 5, + 7, + 3, + 5, + true, + "96", + "96" + ], + [ + "numval", + "ival", + 14817357053216629605, + "TABLE", + "#/tables/2", + 1.0, + 15441160910541481353, + 14633884986629840445, + 1, + 2, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "99", + "99" + ], + [ + "numval", + "ival", + 14817357053216629605, + "TABLE", + "#/tables/2", + 1.0, + 15441160910541481394, + 14633884986969604250, + 1, + 2, + 5, + 7, + 5, + 7, + 3, + 5, + true, + "83", + "83" + ], + [ + "numval", + "ival", + 14817357053216629605, + "TABLE", + "#/tables/2", + 1.0, + 15441160910541481353, + 14633884986621746969, + 2, + 1, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "99", + "99" + ], + [ + "numval", + "ival", + 14817357053216629605, + "TABLE", + "#/tables/2", + 1.0, + 15441160910541486270, + 14633895233084857259, + 2, + 1, + 5, + 7, + 5, + 7, + 3, + 5, + true, + "46", + "46" + ], + [ + "numval", + "ival", + 14817357053216629605, + "TABLE", + "#/tables/2", + 1.0, + 15441160910541481353, + 14633884986621702026, + 2, + 2, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "99", + "99" + ], + [ + "numval", + "ival", + 14817357053216629605, + "TABLE", + "#/tables/2", + 1.0, + 15441160910541486209, + 14633895297101973839, + 2, + 2, + 5, + 7, + 5, + 7, + 3, + 5, + true, + "58", + "58" + ], + [ + "numval", + "ival", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 17767354399704235161, + 13228032993797423919, + 18446744073709551615, + 18446744073709551615, + 7, + 8, + 7, + 8, + 1, + 2, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 17767354399704235157, + 13228032993803529186, + 18446744073709551615, + 18446744073709551615, + 105, + 106, + 105, + 106, + 19, + 20, + true, + "5", + "5" + ], + [ + "numval", + "ival", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 17767354399704235161, + 13228032993797530125, + 18446744073709551615, + 18446744073709551615, + 120, + 121, + 120, + 121, + 23, + 24, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 17767354399704235162, + 13228032993848454727, + 18446744073709551615, + 18446744073709551615, + 181, + 182, + 181, + 182, + 36, + 37, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 17767354399704235163, + 13228032993697401650, + 18446744073709551615, + 18446744073709551615, + 258, + 259, + 258, + 259, + 51, + 52, + true, + "3", + "3" + ], + [ + "numval", + "ival", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 17767354399704235156, + 13228032991959390894, + 18446744073709551615, + 18446744073709551615, + 333, + 334, + 333, + 334, + 67, + 68, + true, + "4", + "4" + ], + [ + "numval", + "ival", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 17767354399704235157, + 13228032993803604970, + 18446744073709551615, + 18446744073709551615, + 457, + 458, + 457, + 458, + 89, + 90, + true, + "5", + "5" + ], + [ + "parenthesis", + "reference", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 12178341415896395122, + 15471976562902371808, + 18446744073709551615, + 18446744073709551615, + 119, + 122, + 119, + 122, + 22, + 25, + true, + "(1)", + "(1)" + ], + [ + "parenthesis", + "reference", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 12178341415896395187, + 15471976561751523231, + 18446744073709551615, + 18446744073709551615, + 180, + 183, + 180, + 183, + 35, + 38, + true, + "(2)", + "(2)" + ], + [ + "parenthesis", + "reference", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 12178341415896394992, + 15471989829567529905, + 18446744073709551615, + 18446744073709551615, + 257, + 260, + 257, + 260, + 50, + 53, + true, + "(3)", + "(3)" + ], + [ + "parenthesis", + "reference", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 12178341415896395057, + 15471989927377316313, + 18446744073709551615, + 18446744073709551615, + 332, + 335, + 332, + 335, + 66, + 69, + true, + "(4)", + "(4)" + ], + [ + "parenthesis", + "reference", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 12178341415896395383, + 15471976563062610688, + 18446744073709551615, + 18446744073709551615, + 456, + 459, + 456, + 459, + 88, + 91, + true, + "(5)", + "(5)" + ], + [ + "parenthesis", + "round brackets", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 11911205317521683928, + 15134175519852772608, + 18446744073709551615, + 18446744073709551615, + 514, + 526, + 514, + 526, + 99, + 106, + true, + "(e. g. JSON)", + "(e. g. JSON)" + ], + [ + "parenthesis", + "round brackets", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 8106340651774642079, + 3024242714742348397, + 18446744073709551615, + 18446744073709551615, + 757, + 764, + 757, + 764, + 147, + 150, + true, + "(green)", + "(green)" + ], + [ + "parenthesis", + "round brackets", + 16535999405521191333, + "TEXT", + "#/figures/0/captions/0", + 1.0, + 14654063138065257290, + 9093442344855846630, + 18446744073709551615, + 18446744073709551615, + 811, + 819, + 811, + 819, + 158, + 161, + true, + "(orange)", + "(orange)" + ], + [ + "numval", + "ival", + 9115121388992506886, + "TEXT", + "#/figures/1/captions/0", + 1.0, + 17767354399704235163, + 17568367315278556853, + 18446744073709551615, + 18446744073709551615, + 7, + 8, + 7, + 8, + 1, + 2, + true, + "3", + "3" + ], + [ + "numval", + "ival", + 9115121388992506886, + "TEXT", + "#/figures/1/captions/0", + 1.0, + 15441160910541481983, + 7680851430180773188, + 18446744073709551615, + 18446744073709551615, + 93, + 95, + 93, + 95, + 19, + 20, + true, + "11", + "11" + ], + [ + "parenthesis", + "reference", + 9115121388992506886, + "TEXT", + "#/figures/1/captions/0", + 1.0, + 389609625697296278, + 800054087129621790, + 18446744073709551615, + 18446744073709551615, + 92, + 96, + 92, + 96, + 18, + 21, + true, + "[11]", + "[11]" + ], + [ + "numval", + "ival", + 14775249782836392461, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 17767354399704235162, + 4069058057296791001, + 18446744073709551615, + 18446744073709551615, + 7, + 8, + 7, + 8, + 1, + 2, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 14775249782836392461, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 15441160910541481983, + 3058496575073634767, + 18446744073709551615, + 18446744073709551615, + 84, + 86, + 84, + 86, + 18, + 19, + true, + "11", + "11" + ], + [ + "numval", + "ival", + 14775249782836392461, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 17767354399704235161, + 4069058058050517746, + 18446744073709551615, + 18446744073709551615, + 221, + 222, + 221, + 222, + 51, + 52, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 14775249782836392461, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 17767354399704235162, + 4069058057296774344, + 18446744073709551615, + 18446744073709551615, + 242, + 243, + 242, + 243, + 58, + 59, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 14775249782836392461, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 17767354399704235163, + 4069058057175817631, + 18446744073709551615, + 18446744073709551615, + 271, + 272, + 271, + 272, + 66, + 67, + true, + "3", + "3" + ], + [ + "parenthesis", + "reference", + 14775249782836392461, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 12178341415896395122, + 6612944927672955769, + 18446744073709551615, + 18446744073709551615, + 220, + 223, + 220, + 223, + 50, + 53, + true, + "(1)", + "(1)" + ], + [ + "parenthesis", + "reference", + 14775249782836392461, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 12178341415896395187, + 6612944887316523872, + 18446744073709551615, + 18446744073709551615, + 241, + 244, + 241, + 244, + 57, + 60, + true, + "(2)", + "(2)" + ], + [ + "parenthesis", + "reference", + 14775249782836392461, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 12178341415896394992, + 6612946277324983566, + 18446744073709551615, + 18446744073709551615, + 270, + 273, + 270, + 273, + 65, + 68, + true, + "(3)", + "(3)" + ], + [ + "parenthesis", + "reference", + 14775249782836392461, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 389609625697296278, + 16009962889508799152, + 18446744073709551615, + 18446744073709551615, + 83, + 87, + 83, + 87, + 17, + 20, + true, + "[11]", + "[11]" + ], + [ + "parenthesis", + "round brackets", + 14775249782836392461, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 4893178966255234284, + 433949853421102113, + 18446744073709551615, + 18446744073709551615, + 175, + 185, + 175, + 185, + 37, + 41, + true, + "(or cells)", + "(or cells)" + ], + [ + "parenthesis", + "round brackets", + 14775249782836392461, + "TEXT", + "#/figures/2/captions/0", + 1.0, + 557909325422029114, + 528772335313425350, + 18446744073709551615, + 18446744073709551615, + 369, + 388, + 369, + 388, + 84, + 90, + true, + "(such as in tables)", + "(such as in tables)" + ], + [ + "numval", + "ival", + 7479698582664857938, + "TEXT", + "#/figures/3/captions/0", + 1.0, + 17767354399704235156, + 7996590972891274382, + 18446744073709551615, + 18446744073709551615, + 7, + 8, + 7, + 8, + 1, + 2, + true, + "4", + "4" + ], + [ + "conn", + "single-conn", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 3512299892331381400, + 9603465650093366657, + 18446744073709551615, + 18446744073709551615, + 847, + 856, + 847, + 856, + 154, + 156, + true, + "towards a", + "towards a" + ], + [ + "term", + "single-term", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 13157956405326233364, + 1973865905648942248, + 18446744073709551615, + 18446744073709551615, + 857, + 885, + 857, + 885, + 156, + 159, + true, + "knowledge discovery platform", + "knowledge discovery platform" + ], + [ + "conn", + "single-conn", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 12178341415895625940, + 5663899155610829905, + 18446744073709551615, + 18446744073709551615, + 886, + 889, + 886, + 889, + 159, + 160, + true, + "for", + "for" + ], + [ + "term", + "single-term", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 6167933651658664291, + 4335834381973654488, + 18446744073709551615, + 18446744073709551615, + 890, + 899, + 890, + 899, + 160, + 161, + true, + "documents", + "documents" + ], + [ + "verb", + "compound-verb", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 5518720680045185536, + 10887625114734223201, + 18446744073709551615, + 18446744073709551615, + 904, + 914, + 904, + 914, + 163, + 165, + true, + "have opted", + "have opted" + ], + [ + "conn", + "single-conn", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 8106397727991264470, + 13733498763290197426, + 18446744073709551615, + 18446744073709551615, + 915, + 922, + 915, + 922, + 165, + 167, + true, + "for the", + "for the" + ], + [ + "term", + "single-term", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 2940970869648856259, + 4641698687139622359, + 18446744073709551615, + 18446744073709551615, + 923, + 938, + 923, + 938, + 167, + 169, + true, + "second approach", + "second approach" + ], + [ + "conn", + "single-conn", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 15441160910541486538, + 16301782677078975107, + 18446744073709551615, + 18446744073709551615, + 939, + 941, + 939, + 941, + 169, + 170, + true, + "in", + "in" + ], + [ + "term", + "single-term", + 7935233310532930917, + "TEXT", + "#/texts/16", + 1.0, + 14635106751859230946, + 4735627980056120373, + 18446744073709551615, + 18446744073709551615, + 946, + 954, + 946, + 954, + 171, + 172, + true, + "solution", + "solution" + ] + ], + "headers": [ + "type", + "subtype", + "subj_hash", + "subj_name", + "subj_path", + "conf", + "hash", + "ihash", + "coor_i", + "coor_j", + "char_i", + "char_j", + "ctok_i", + "ctok_j", + "wtok_i", + "wtok_j", + "wtok-match", + "name", + "original" + ] + }, + "meta": [ + { + "$ref": "#/footnotes/0" + }, + { + "$ref": "#/footnotes/1" + }, + { + "$ref": "#/footnotes/2" + }, + { + "$ref": "#/footnotes/3" + }, + { + "$ref": "#/footnotes/4" + }, + { + "$ref": "#/footnotes/5" + }, + { + "$ref": "#/figures/0/captions/0" + }, + { + "$ref": "#/footnotes/6" + }, + { + "$ref": "#/footnotes/7" + }, + { + "$ref": "#/footnotes/8" + }, + { + "$ref": "#/footnotes/9" + }, + { + "$ref": "#/footnotes/10" + }, + { + "$ref": "#/figures/2/captions/0" + }, + { + "$ref": "#/figures/1/captions/0" + }, + { + "$ref": "#/footnotes/11" + }, + { + "$ref": "#/footnotes/12" + }, + { + "$ref": "#/footnotes/13" + }, + { + "$ref": "#/figures/3/captions/0" + }, + { + "$ref": "#/figures/4/captions/0" + }, + { + "$ref": "#/footnotes/14" + }, + { + "$ref": "#/footnotes/15" + }, + { + "$ref": "#/footnotes/16" + }, + { + "$ref": "#/footnotes/17" + }, + { + "$ref": "#/footnotes/18" + }, + { + "$ref": "#/footnotes/19" + }, + { + "$ref": "#/figures/6/captions/0" + }, + { + "$ref": "#/footnotes/20" + }, + { + "$ref": "#/figures/7/captions/0" + }, + { + "$ref": "#/footnotes/21" + }, + { + "$ref": "#/footnotes/22" + }, + { + "$ref": "#/footnotes/23" + } + ], + "model-application": { + "message": "success", + "success": true + }, + "other": [], + "page-dimensions": [ + { + "height": 792.0, + "page": 1, + "width": 612.0 + }, + { + "height": 792.0, + "page": 2, + "width": 612.0 + }, + { + "height": 792.0, + "page": 3, + "width": 612.0 + }, + { + "height": 792.0, + "page": 4, + "width": 612.0 + }, + { + "height": 792.0, + "page": 5, + "width": 612.0 + }, + { + "height": 792.0, + "page": 6, + "width": 612.0 + }, + { + "height": 792.0, + "page": 7, + "width": 612.0 + }, + { + "height": 792.0, + "page": 8, + "width": 612.0 + }, + { + "height": 792.0, + "page": 9, + "width": 612.0 + } + ], + "page-elements": [ + { + "bbox": [ + 18.34, + 232.0, + 36.34, + 586.4 + ], + "iref": "#/texts/0", + "name": "text", + "orig-order": 19, + "page": 1, + "span": [ + 0, + 38 + ], + "sref": "#/page-elements/0", + "text-order": 0, + "type": "paragraph" + }, + { + "bbox": [ + 61.47, + 672.09, + 552.8, + 708.43 + ], + "iref": "#/texts/1", + "name": "title", + "orig-order": 0, + "page": 1, + "span": [ + 0, + 84 + ], + "sref": "#/page-elements/1", + "text-order": 1, + "type": "title" + }, + { + "bbox": [ + 158.55, + 646.95, + 454.45, + 658.0 + ], + "iref": "#/texts/2", + "name": "text", + "orig-order": 1, + "page": 1, + "span": [ + 0, + 60 + ], + "sref": "#/page-elements/2", + "text-order": 2, + "type": "paragraph" + }, + { + "bbox": [ + 179.65, + 635.43, + 433.14, + 644.7 + ], + "iref": "#/texts/3", + "name": "text", + "orig-order": 2, + "page": 1, + "span": [ + 0, + 30 + ], + "sref": "#/page-elements/3", + "text-order": 3, + "type": "paragraph" + }, + { + "bbox": [ + 277.59, + 623.47, + 335.41, + 632.38 + ], + "iref": "#/texts/4", + "name": "text", + "orig-order": 3, + "page": 1, + "span": [ + 0, + 12 + ], + "sref": "#/page-elements/4", + "text-order": 4, + "type": "paragraph" + }, + { + "bbox": [ + 255.33, + 611.52, + 357.64, + 621.19 + ], + "iref": "#/texts/5", + "name": "text", + "orig-order": 4, + "page": 1, + "span": [ + 0, + 24 + ], + "sref": "#/page-elements/5", + "text-order": 5, + "type": "paragraph" + }, + { + "bbox": [ + 53.51, + 592.31, + 112.67, + 602.28 + ], + "iref": "#/texts/6", + "name": "subtitle-level-1", + "orig-order": 5, + "page": 1, + "span": [ + 0, + 8 + ], + "sref": "#/page-elements/6", + "text-order": 6, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 317.73, + 592.25, + 421.26, + 602.36 + ], + "iref": "#/texts/7", + "name": "subtitle-level-1", + "orig-order": 13, + "page": 1, + "span": [ + 0, + 14 + ], + "sref": "#/page-elements/7", + "text-order": 7, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 53.47, + 326.91, + 295.66, + 586.98 + ], + "iref": "#/texts/8", + "name": "text", + "orig-order": 6, + "page": 1, + "span": [ + 0, + 1554 + ], + "sref": "#/page-elements/8", + "text-order": 8, + "type": "paragraph" + }, + { + "bbox": [ + 53.51, + 294.88, + 138.15, + 302.34 + ], + "iref": "#/texts/9", + "name": "subtitle-level-1", + "orig-order": 7, + "page": 1, + "span": [ + 0, + 21 + ], + "sref": "#/page-elements/9", + "text-order": 9, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 53.2, + 235.05, + 295.44, + 292.11 + ], + "iref": "#/texts/10", + "name": "text", + "orig-order": 8, + "page": 1, + "span": [ + 0, + 366 + ], + "sref": "#/page-elements/10", + "text-order": 10, + "type": "paragraph" + }, + { + "bbox": [ + 53.8, + 121.27, + 294.28, + 176.02 + ], + "iref": "#/footnotes/0", + "name": "footnote", + "orig-order": 9, + "page": 1, + "span": [ + 0, + 585 + ], + "sref": "#/page-elements/11", + "text-order": 11, + "type": "footnote" + }, + { + "bbox": [ + 53.57, + 112.36, + 215.34, + 118.82 + ], + "iref": "#/footnotes/1", + "name": "footnote", + "orig-order": 10, + "page": 1, + "span": [ + 0, + 53 + ], + "sref": "#/page-elements/12", + "text-order": 12, + "type": "footnote" + }, + { + "bbox": [ + 53.27, + 94.72, + 286.81, + 110.13 + ], + "iref": "#/footnotes/2", + "name": "footnote", + "orig-order": 11, + "page": 1, + "span": [ + 0, + 124 + ], + "sref": "#/page-elements/13", + "text-order": 13, + "type": "footnote" + }, + { + "bbox": [ + 52.78, + 87.54, + 173.61, + 94.19 + ], + "iref": "#/footnotes/3", + "name": "footnote", + "orig-order": 12, + "page": 1, + "span": [ + 0, + 39 + ], + "sref": "#/page-elements/14", + "text-order": 14, + "type": "footnote" + }, + { + "bbox": [ + 317.63, + 337.25, + 559.69, + 586.99 + ], + "iref": "#/texts/11", + "name": "text", + "orig-order": 14, + "page": 1, + "span": [ + 0, + 1532 + ], + "sref": "#/page-elements/15", + "text-order": 15, + "type": "paragraph" + }, + { + "bbox": [ + 317.95, + 183.76, + 559.78, + 334.59 + ], + "iref": "#/texts/12", + "name": "text", + "orig-order": 15, + "page": 1, + "span": [ + 0, + 891 + ], + "sref": "#/page-elements/16", + "text-order": 16, + "type": "paragraph" + }, + { + "bbox": [ + 317.95, + 150.97, + 559.45, + 181.17 + ], + "iref": "#/texts/13", + "name": "text", + "orig-order": 16, + "page": 1, + "span": [ + 0, + 200 + ], + "sref": "#/page-elements/17", + "text-order": 17, + "type": "paragraph" + }, + { + "bbox": [ + 317.54, + 100.92, + 559.15, + 123.96 + ], + "iref": "#/footnotes/4", + "name": "footnote", + "orig-order": 17, + "page": 1, + "span": [ + 0, + 185 + ], + "sref": "#/page-elements/18", + "text-order": 18, + "type": "footnote" + }, + { + "bbox": [ + 317.55, + 84.35, + 559.42, + 99.16 + ], + "iref": "#/footnotes/5", + "name": "footnote", + "orig-order": 18, + "page": 1, + "span": [ + 0, + 130 + ], + "sref": "#/page-elements/19", + "text-order": 19, + "type": "footnote" + }, + { + "bbox": [ + 57.06, + 581.52, + 566.22, + 706.0 + ], + "iref": "#/figures/0", + "name": "picture", + "orig-order": 20, + "page": 2, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/20", + "text-order": 20, + "type": "figure" + }, + { + "bbox": [ + 53.5, + 488.93, + 560.56, + 562.76 + ], + "iref": "#/figures/0/captions/0", + "name": "caption", + "orig-order": 21, + "page": 2, + "span": [ + 0, + 820 + ], + "sref": "#/page-elements/21", + "text-order": 21, + "type": "caption" + }, + { + "bbox": [ + 53.47, + 394.94, + 295.54, + 468.71 + ], + "iref": "#/texts/14", + "name": "text", + "orig-order": 22, + "page": 2, + "span": [ + 0, + 409 + ], + "sref": "#/page-elements/22", + "text-order": 22, + "type": "paragraph" + }, + { + "bbox": [ + 53.58, + 370.01, + 173.48, + 380.56 + ], + "iref": "#/texts/15", + "name": "subtitle-level-1", + "orig-order": 23, + "page": 2, + "span": [ + 0, + 18 + ], + "sref": "#/page-elements/23", + "text-order": 23, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 53.47, + 203.97, + 295.7, + 365.41 + ], + "iref": "#/texts/16", + "name": "text", + "orig-order": 24, + "page": 2, + "span": [ + 0, + 955 + ], + "sref": "#/page-elements/24", + "text-order": 24, + "type": "paragraph" + }, + { + "bbox": [ + 53.8, + 148.85, + 295.54, + 201.03 + ], + "iref": "#/texts/17", + "name": "text", + "orig-order": 25, + "page": 2, + "span": [ + 0, + 337 + ], + "sref": "#/page-elements/25", + "text-order": 25, + "type": "paragraph" + }, + { + "bbox": [ + 53.53, + 119.32, + 137.15, + 125.85 + ], + "iref": "#/footnotes/6", + "name": "footnote", + "orig-order": 26, + "page": 2, + "span": [ + 0, + 32 + ], + "sref": "#/page-elements/26", + "text-order": 26, + "type": "footnote" + }, + { + "bbox": [ + 53.36, + 110.38, + 128.94, + 116.81 + ], + "iref": "#/footnotes/7", + "name": "footnote", + "orig-order": 27, + "page": 2, + "span": [ + 0, + 31 + ], + "sref": "#/page-elements/27", + "text-order": 27, + "type": "footnote" + }, + { + "bbox": [ + 53.8, + 101.63, + 125.09, + 108.06 + ], + "iref": "#/footnotes/8", + "name": "footnote", + "orig-order": 28, + "page": 2, + "span": [ + 0, + 28 + ], + "sref": "#/page-elements/28", + "text-order": 28, + "type": "footnote" + }, + { + "bbox": [ + 53.8, + 93.08, + 128.45, + 99.42 + ], + "iref": "#/footnotes/9", + "name": "footnote", + "orig-order": 29, + "page": 2, + "span": [ + 0, + 29 + ], + "sref": "#/page-elements/29", + "text-order": 29, + "type": "footnote" + }, + { + "bbox": [ + 53.66, + 84.44, + 246.72, + 90.72 + ], + "iref": "#/footnotes/10", + "name": "footnote", + "orig-order": 30, + "page": 2, + "span": [ + 0, + 68 + ], + "sref": "#/page-elements/30", + "text-order": 30, + "type": "footnote" + }, + { + "bbox": [ + 317.7, + 416.85, + 560.56, + 468.71 + ], + "iref": "#/texts/18", + "name": "text", + "orig-order": 31, + "page": 2, + "span": [ + 0, + 325 + ], + "sref": "#/page-elements/31", + "text-order": 31, + "type": "paragraph" + }, + { + "bbox": [ + 317.95, + 392.14, + 440.26, + 402.9 + ], + "iref": "#/texts/19", + "name": "subtitle-level-1", + "orig-order": 32, + "page": 2, + "span": [ + 0, + 17 + ], + "sref": "#/page-elements/32", + "text-order": 32, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 317.95, + 357.82, + 559.08, + 387.75 + ], + "iref": "#/texts/20", + "name": "text", + "orig-order": 33, + "page": 2, + "span": [ + 0, + 174 + ], + "sref": "#/page-elements/33", + "text-order": 33, + "type": "paragraph" + }, + { + "bbox": [ + 317.63, + 248.02, + 559.86, + 354.87 + ], + "iref": "#/texts/21", + "name": "text", + "orig-order": 34, + "page": 2, + "span": [ + 0, + 594 + ], + "sref": "#/page-elements/34", + "text-order": 34, + "type": "paragraph" + }, + { + "bbox": [ + 317.95, + 83.84, + 559.73, + 245.28 + ], + "iref": "#/texts/22", + "name": "text", + "orig-order": 35, + "page": 2, + "span": [ + 0, + 983 + ], + "sref": "#/page-elements/35", + "text-order": 35, + "type": "paragraph" + }, + { + "bbox": [ + 56.01, + 558.75, + 290.99, + 709.73 + ], + "iref": "#/figures/1", + "name": "picture", + "orig-order": 49, + "page": 3, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/36", + "text-order": 36, + "type": "figure" + }, + { + "bbox": [ + 321.39, + 558.68, + 554.25, + 709.93 + ], + "iref": "#/figures/2", + "name": "picture", + "orig-order": 50, + "page": 3, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/37", + "text-order": 37, + "type": "figure" + }, + { + "bbox": [ + 53.8, + 472.75, + 295.53, + 546.56 + ], + "iref": "#/figures/2/captions/0", + "name": "caption", + "orig-order": 36, + "page": 3, + "span": [ + 0, + 389 + ], + "sref": "#/page-elements/38", + "text-order": 38, + "type": "caption" + }, + { + "bbox": [ + 53.8, + 294.83, + 295.11, + 445.89 + ], + "iref": "#/texts/23", + "name": "text", + "orig-order": 37, + "page": 3, + "span": [ + 0, + 916 + ], + "sref": "#/page-elements/39", + "text-order": 39, + "type": "paragraph" + }, + { + "bbox": [ + 53.8, + 272.24, + 137.17, + 282.39 + ], + "iref": "#/texts/24", + "name": "subtitle-level-1", + "orig-order": 38, + "page": 3, + "span": [ + 0, + 14 + ], + "sref": "#/page-elements/40", + "text-order": 40, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 53.63, + 214.84, + 295.61, + 266.9 + ], + "iref": "#/texts/25", + "name": "text", + "orig-order": 39, + "page": 3, + "span": [ + 0, + 280 + ], + "sref": "#/page-elements/41", + "text-order": 41, + "type": "paragraph" + }, + { + "bbox": [ + 53.5, + 82.78, + 295.53, + 212.11 + ], + "iref": "#/texts/26", + "name": "text", + "orig-order": 40, + "page": 3, + "span": [ + 0, + 799 ], - [ - "numval", - "ival", - 14817357053216629605, - "TABLE", - "#/tables/2", - 1.0, - 15441160910541486209, - 14633895297101973839, - 2, - 2, - 5, - 7, - 5, - 7, - 3, - 5, - true, - "58", - "58" - ] - ], - "headers": [ - "type", - "subtype", - "subj_hash", - "subj_name", - "subj_path", - "conf", - "hash", - "ihash", - "coor_i", - "coor_j", - "char_i", - "char_j", - "ctok_i", - "ctok_j", - "wtok_i", - "wtok_j", - "wtok-match", - "name", - "original" - ] - }, - "meta": [ + "sref": "#/page-elements/42", + "text-order": 42, + "type": "paragraph" + }, { - "$ref": "#/footnotes/0" + "bbox": [ + 317.95, + 494.07, + 559.71, + 546.47 + ], + "iref": "#/figures/1/captions/0", + "name": "caption", + "orig-order": 41, + "page": 3, + "span": [ + 0, + 272 + ], + "sref": "#/page-elements/43", + "text-order": 43, + "type": "caption" }, { - "$ref": "#/footnotes/1" + "bbox": [ + 317.27, + 451.83, + 558.41, + 470.9 + ], + "iref": "#/texts/27", + "name": "text", + "orig-order": 42, + "page": 3, + "span": [ + 0, + 93 + ], + "sref": "#/page-elements/44", + "text-order": 44, + "type": "paragraph" }, { - "$ref": "#/footnotes/2" + "bbox": [ + 317.95, + 429.5, + 445.89, + 439.36 + ], + "iref": "#/texts/28", + "name": "subtitle-level-1", + "orig-order": 43, + "page": 3, + "span": [ + 0, + 24 + ], + "sref": "#/page-elements/45", + "text-order": 45, + "type": "subtitle-level-1" }, { - "$ref": "#/footnotes/3" + "bbox": [ + 317.63, + 306.61, + 559.02, + 424.21 + ], + "iref": "#/texts/29", + "name": "text", + "orig-order": 44, + "page": 3, + "span": [ + 0, + 669 + ], + "sref": "#/page-elements/46", + "text-order": 46, + "type": "paragraph" }, { - "$ref": "#/footnotes/4" + "bbox": [ + 317.95, + 152.94, + 559.03, + 303.66 + ], + "iref": "#/texts/30", + "name": "text", + "orig-order": 45, + "page": 3, + "span": [ + 0, + 900 + ], + "sref": "#/page-elements/47", + "text-order": 47, + "type": "paragraph" }, { - "$ref": "#/footnotes/5" + "bbox": [ + 317.54, + 119.96, + 560.23, + 150.07 + ], + "iref": "#/texts/31", + "name": "text", + "orig-order": 46, + "page": 3, + "span": [ + 0, + 199 + ], + "sref": "#/page-elements/48", + "text-order": 48, + "type": "paragraph" }, { - "$ref": "#/figures/0/captions/0" + "bbox": [ + 317.85, + 91.71, + 558.2, + 106.65 + ], + "iref": "#/footnotes/11", + "name": "footnote", + "orig-order": 47, + "page": 3, + "span": [ + 0, + 102 + ], + "sref": "#/page-elements/49", + "text-order": 49, + "type": "footnote" }, { - "$ref": "#/footnotes/6" + "bbox": [ + 317.95, + 83.37, + 397.4, + 89.81 + ], + "iref": "#/footnotes/12", + "name": "footnote", + "orig-order": 48, + "page": 3, + "span": [ + 0, + 34 + ], + "sref": "#/page-elements/50", + "text-order": 50, + "type": "footnote" }, { - "$ref": "#/footnotes/7" + "bbox": [ + 53.8, + 608.74, + 295.54, + 704.43 + ], + "iref": "#/texts/32", + "name": "text", + "orig-order": 51, + "page": 4, + "span": [ + 0, + 542 + ], + "sref": "#/page-elements/51", + "text-order": 51, + "type": "paragraph" }, { - "$ref": "#/footnotes/8" + "bbox": [ + 53.8, + 574.22, + 231.57, + 596.98 + ], + "iref": "#/texts/33", + "name": "subtitle-level-1", + "orig-order": 52, + "page": 4, + "span": [ + 0, + 51 + ], + "sref": "#/page-elements/52", + "text-order": 52, + "type": "subtitle-level-1" }, { - "$ref": "#/footnotes/9" + "bbox": [ + 53.8, + 473.11, + 295.53, + 568.88 + ], + "iref": "#/texts/34", + "name": "text", + "orig-order": 53, + "page": 4, + "span": [ + 0, + 557 + ], + "sref": "#/page-elements/53", + "text-order": 53, + "type": "paragraph" }, { - "$ref": "#/footnotes/10" + "bbox": [ + 53.25, + 319.61, + 295.61, + 470.25 + ], + "iref": "#/texts/35", + "name": "text", + "orig-order": 54, + "page": 4, + "span": [ + 0, + 919 + ], + "sref": "#/page-elements/54", + "text-order": 54, + "type": "paragraph" }, { - "$ref": "#/figures/2/captions/0" + "bbox": [ + 53.47, + 154.74, + 296.04, + 316.66 + ], + "iref": "#/texts/36", + "name": "text", + "orig-order": 55, + "page": 4, + "span": [ + 0, + 1011 + ], + "sref": "#/page-elements/55", + "text-order": 55, + "type": "paragraph" }, { - "$ref": "#/figures/1/captions/0" + "bbox": [ + 53.8, + 121.91, + 295.53, + 152.28 + ], + "iref": "#/texts/37", + "name": "text", + "orig-order": 56, + "page": 4, + "span": [ + 0, + 195 + ], + "sref": "#/page-elements/56", + "text-order": 56, + "type": "paragraph" }, { - "$ref": "#/footnotes/11" + "bbox": [ + 53.39, + 83.17, + 294.92, + 113.59 + ], + "iref": "#/footnotes/13", + "name": "footnote", + "orig-order": 57, + "page": 4, + "span": [ + 0, + 290 + ], + "sref": "#/page-elements/57", + "text-order": 57, + "type": "footnote" }, { - "$ref": "#/footnotes/12" + "bbox": [ + 326.25, + 539.86, + 548.16, + 703.53 + ], + "iref": "#/figures/3", + "name": "picture", + "orig-order": 58, + "page": 4, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/58", + "text-order": 58, + "type": "figure" }, { - "$ref": "#/footnotes/13" + "bbox": [ + 317.63, + 415.02, + 560.18, + 522.07 + ], + "iref": "#/figures/3/captions/0", + "name": "caption", + "orig-order": 59, + "page": 4, + "span": [ + 0, + 576 + ], + "sref": "#/page-elements/59", + "text-order": 59, + "type": "caption" }, { - "$ref": "#/figures/3/captions/0" + "bbox": [ + 317.95, + 304.0, + 559.15, + 388.98 + ], + "iref": "#/texts/38", + "name": "text", + "orig-order": 60, + "page": 4, + "span": [ + 0, + 539 + ], + "sref": "#/page-elements/60", + "text-order": 60, + "type": "paragraph" }, { - "$ref": "#/figures/4/captions/0" + "bbox": [ + 317.95, + 268.24, + 522.75, + 291.0 + ], + "iref": "#/texts/39", + "name": "subtitle-level-1", + "orig-order": 61, + "page": 4, + "span": [ + 0, + 55 + ], + "sref": "#/page-elements/61", + "text-order": 61, + "type": "subtitle-level-1" }, { - "$ref": "#/footnotes/14" + "bbox": [ + 317.94, + 166.98, + 559.77, + 263.01 + ], + "iref": "#/texts/40", + "name": "text", + "orig-order": 62, + "page": 4, + "span": [ + 0, + 605 + ], + "sref": "#/page-elements/62", + "text-order": 62, + "type": "paragraph" }, { - "$ref": "#/footnotes/15" + "bbox": [ + 317.95, + 83.13, + 560.15, + 157.56 + ], + "iref": "#/texts/41", + "name": "text", + "orig-order": 63, + "page": 4, + "span": [ + 0, + 466 + ], + "sref": "#/page-elements/63", + "text-order": 63, + "type": "paragraph" }, { - "$ref": "#/footnotes/16" + "bbox": [ + 55.4, + 459.44, + 294.02, + 709.2 + ], + "iref": "#/figures/4", + "name": "picture", + "orig-order": 79, + "page": 5, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/64", + "text-order": 64, + "type": "figure" }, { - "$ref": "#/footnotes/17" + "bbox": [ + 53.77, + 404.84, + 296.92, + 446.17 + ], + "iref": "#/figures/4/captions/0", + "name": "caption", + "orig-order": 64, + "page": 5, + "span": [ + 0, + 228 + ], + "sref": "#/page-elements/65", + "text-order": 65, + "type": "caption" }, { - "$ref": "#/footnotes/18" + "bbox": [ + 53.8, + 353.97, + 295.17, + 383.9 + ], + "iref": "#/texts/42", + "name": "text", + "orig-order": 65, + "page": 5, + "span": [ + 0, + 199 + ], + "sref": "#/page-elements/66", + "text-order": 66, + "type": "paragraph" }, { - "$ref": "#/footnotes/19" + "bbox": [ + 53.8, + 332.05, + 294.43, + 351.07 + ], + "iref": "#/texts/43", + "name": "text", + "orig-order": 66, + "page": 5, + "span": [ + 0, + 105 + ], + "sref": "#/page-elements/67", + "text-order": 67, + "type": "paragraph" }, { - "$ref": "#/figures/6/captions/0" + "bbox": [ + 117.81, + 304.81, + 294.53, + 327.96 + ], + "iref": "#/texts/44", + "name": "formula", + "orig-order": 67, + "page": 5, + "span": [ + 0, + 73 + ], + "sref": "#/page-elements/68", + "text-order": 68, + "type": "equation" }, { - "$ref": "#/footnotes/20" + "bbox": [ + 53.47, + 280.88, + 294.51, + 300.03 + ], + "iref": "#/texts/45", + "name": "text", + "orig-order": 68, + "page": 5, + "span": [ + 0, + 124 + ], + "sref": "#/page-elements/69", + "text-order": 69, + "type": "paragraph" }, { - "$ref": "#/figures/7/captions/0" + "bbox": [ + 53.8, + 154.58, + 295.14, + 272.49 + ], + "iref": "#/texts/46", + "name": "text", + "orig-order": 69, + "page": 5, + "span": [ + 0, + 715 + ], + "sref": "#/page-elements/70", + "text-order": 70, + "type": "paragraph" }, { - "$ref": "#/footnotes/21" + "bbox": [ + 53.8, + 121.7, + 295.16, + 151.63 + ], + "iref": "#/texts/47", + "name": "text", + "orig-order": 70, + "page": 5, + "span": [ + 0, + 172 + ], + "sref": "#/page-elements/71", + "text-order": 71, + "type": "paragraph" }, { - "$ref": "#/footnotes/22" + "bbox": [ + 53.8, + 99.62, + 294.57, + 118.86 + ], + "iref": "#/texts/48", + "name": "text", + "orig-order": 71, + "page": 5, + "span": [ + 0, + 125 + ], + "sref": "#/page-elements/72", + "text-order": 72, + "type": "paragraph" }, { - "$ref": "#/footnotes/23" - } - ], - "model-application": { - "message": "success", - "success": true - }, - "other": [], - "page-dimensions": [ + "bbox": [ + 53.39, + 83.28, + 294.56, + 89.64 + ], + "iref": "#/footnotes/14", + "name": "footnote", + "orig-order": 72, + "page": 5, + "span": [ + 0, + 93 + ], + "sref": "#/page-elements/73", + "text-order": 73, + "type": "footnote" + }, { - "height": 792.0, - "page": 1, - "width": 612.0 + "bbox": [ + 316.99, + 622.02, + 560.1, + 706.83 + ], + "iref": "#/tables/0/captions/0", + "name": "text", + "orig-order": 73, + "page": 5, + "span": [ + 0, + 461 + ], + "sref": "#/page-elements/74", + "text-order": 74, + "type": "paragraph" }, { - "height": 792.0, - "page": 2, - "width": 612.0 + "bbox": [ + 334.48, + 554.59, + 541.17, + 609.5 + ], + "iref": "#/tables/0", + "name": "table", + "orig-order": 74, + "page": 5, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/75", + "text-order": 75, + "type": "table" }, { - "height": 792.0, - "page": 3, - "width": 612.0 + "bbox": [ + 317.38, + 468.09, + 559.94, + 520.12 + ], + "iref": "#/texts/49", + "name": "text", + "orig-order": 75, + "page": 5, + "span": [ + 0, + 337 + ], + "sref": "#/page-elements/76", + "text-order": 76, + "type": "paragraph" }, { - "height": 792.0, - "page": 4, - "width": 612.0 + "bbox": [ + 317.63, + 303.89, + 561.69, + 465.33 + ], + "iref": "#/texts/50", + "name": "text", + "orig-order": 76, + "page": 5, + "span": [ + 0, + 955 + ], + "sref": "#/page-elements/77", + "text-order": 77, + "type": "paragraph" }, { - "height": 792.0, + "bbox": [ + 317.63, + 149.81, + 560.16, + 300.94 + ], + "iref": "#/texts/51", + "name": "text", + "orig-order": 77, "page": 5, - "width": 612.0 + "span": [ + 0, + 913 + ], + "sref": "#/page-elements/78", + "text-order": 78, + "type": "paragraph" }, { - "height": 792.0, - "page": 6, - "width": 612.0 + "bbox": [ + 317.63, + 84.71, + 559.69, + 147.52 + ], + "iref": "#/texts/52", + "name": "text", + "orig-order": 78, + "page": 5, + "span": [ + 0, + 398 + ], + "sref": "#/page-elements/79", + "text-order": 79, + "type": "paragraph" }, { - "height": 792.0, - "page": 7, - "width": 612.0 + "bbox": [ + 53.5, + 654.89, + 295.75, + 706.83 + ], + "iref": "#/texts/53", + "name": "text", + "orig-order": 80, + "page": 6, + "span": [ + 0, + 310 + ], + "sref": "#/page-elements/80", + "text-order": 80, + "type": "paragraph" }, { - "height": 792.0, - "page": 8, - "width": 612.0 + "bbox": [ + 54.41, + 497.83, + 294.07, + 642.21 + ], + "iref": "#/tables/1", + "name": "table", + "orig-order": 81, + "page": 6, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/81", + "text-order": 81, + "type": "table" }, { - "height": 792.0, - "page": 9, - "width": 612.0 - } - ], - "page-elements": [ + "bbox": [ + 53.47, + 321.07, + 295.62, + 471.56 + ], + "iref": "#/texts/54", + "name": "text", + "orig-order": 82, + "page": 6, + "span": [ + 0, + 867 + ], + "sref": "#/page-elements/82", + "text-order": 82, + "type": "paragraph" + }, { "bbox": [ - 18.340225219726562, - 231.99996948242188, - 36.339778900146484, - 586.4000244140625 + 53.8, + 236.99, + 295.53, + 311.13 ], - "iref": "#/texts/0", + "iref": "#/texts/55", "name": "text", - "orig-order": 19, - "page": 1, + "orig-order": 83, + "page": 6, "span": [ 0, - 38 + 460 ], - "sref": "#/page-elements/0", - "text-order": 0, + "sref": "#/page-elements/83", + "text-order": 83, "type": "paragraph" }, { "bbox": [ - 61.47460174560547, - 672.0942993164062, - 552.7999877929688, - 708.4287719726562 + 53.8, + 127.09, + 295.61, + 234.11 ], - "iref": "#/texts/1", - "name": "title", - "orig-order": 0, - "page": 1, + "iref": "#/texts/56", + "name": "text", + "orig-order": 84, + "page": 6, "span": [ 0, - 84 + 635 ], - "sref": "#/page-elements/1", - "text-order": 1, - "type": "title" + "sref": "#/page-elements/84", + "text-order": 84, + "type": "paragraph" }, { "bbox": [ - 158.54901123046875, - 646.95166015625, - 454.4521484375, - 657.9959716796875 + 53.8, + 83.63, + 295.54, + 124.52 ], - "iref": "#/texts/2", + "iref": "#/texts/57", "name": "text", - "orig-order": 1, - "page": 1, + "orig-order": 85, + "page": 6, "span": [ 0, - 60 + 256 ], - "sref": "#/page-elements/2", - "text-order": 2, + "sref": "#/page-elements/85", + "text-order": 85, "type": "paragraph" }, { "bbox": [ - 179.6484832763672, - 635.4270629882812, - 433.13836669921875, - 644.6961059570312 + 317.5, + 643.87, + 560.35, + 706.83 ], - "iref": "#/texts/3", + "iref": "#/tables/1/captions/0", "name": "text", - "orig-order": 2, - "page": 1, + "orig-order": 86, + "page": 6, "span": [ 0, - 30 + 356 ], - "sref": "#/page-elements/3", - "text-order": 3, + "sref": "#/page-elements/86", + "text-order": 86, "type": "paragraph" }, { "bbox": [ - 277.5870056152344, - 623.4720458984375, - 335.40997314453125, - 632.3786010742188 + 369.79, + 587.85, + 506.93, + 631.52 ], - "iref": "#/texts/4", + "iref": "#/tables/2", + "name": "table", + "orig-order": 87, + "page": 6, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/87", + "text-order": 87, + "type": "table" + }, + { + "bbox": [ + 317.95, + 505.95, + 559.69, + 568.76 + ], + "iref": "#/texts/58", "name": "text", - "orig-order": 3, - "page": 1, + "orig-order": 88, + "page": 6, "span": [ 0, - 12 + 346 ], - "sref": "#/page-elements/4", - "text-order": 4, + "sref": "#/page-elements/88", + "text-order": 88, "type": "paragraph" }, { "bbox": [ - 255.3256378173828, - 611.5160522460938, - 357.6419982910156, - 621.1870727539062 + 317.92, + 384.98, + 559.32, + 503.0 ], - "iref": "#/texts/5", + "iref": "#/texts/59", "name": "text", - "orig-order": 4, - "page": 1, + "orig-order": 89, + "page": 6, "span": [ 0, - 24 + 689 ], - "sref": "#/page-elements/5", - "text-order": 5, + "sref": "#/page-elements/89", + "text-order": 89, "type": "paragraph" }, { "bbox": [ - 53.50812911987305, - 592.31494140625, - 112.67424011230469, - 602.275634765625 + 317.88, + 351.97, + 559.69, + 382.45 ], - "iref": "#/texts/6", - "name": "subtitle-level-1", - "orig-order": 5, - "page": 1, + "iref": "#/texts/60", + "name": "text", + "orig-order": 90, + "page": 6, "span": [ 0, - 8 + 198 ], - "sref": "#/page-elements/6", - "text-order": 6, - "type": "subtitle-level-1" + "sref": "#/page-elements/90", + "text-order": 90, + "type": "paragraph" }, { "bbox": [ - 317.7327880859375, - 592.2473754882812, - 421.26416015625, - 602.3604125976562 + 317.91, + 253.73, + 558.79, + 349.58 ], - "iref": "#/texts/7", - "name": "subtitle-level-1", - "orig-order": 13, - "page": 1, + "iref": "#/texts/61", + "name": "text", + "orig-order": 91, + "page": 6, "span": [ 0, - 14 + 558 ], - "sref": "#/page-elements/7", - "text-order": 7, - "type": "subtitle-level-1" + "sref": "#/page-elements/91", + "text-order": 91, + "type": "paragraph" }, { "bbox": [ - 53.474998474121094, - 326.9052734375, - 295.66064453125, - 586.9752197265625 + 317.63, + 165.81, + 558.55, + 250.78 ], - "iref": "#/texts/8", + "iref": "#/texts/62", "name": "text", - "orig-order": 6, - "page": 1, + "orig-order": 92, + "page": 6, "span": [ 0, - 1554 + 531 ], - "sref": "#/page-elements/8", - "text-order": 8, + "sref": "#/page-elements/92", + "text-order": 92, "type": "paragraph" }, { "bbox": [ - 53.51100158691406, - 294.8792724609375, - 138.14549255371094, - 302.33953857421875 + 317.95, + 144.36, + 388.19, + 154.56 ], - "iref": "#/texts/9", + "iref": "#/texts/63", "name": "subtitle-level-1", - "orig-order": 7, - "page": 1, + "orig-order": 93, + "page": 6, "span": [ 0, - 21 + 12 ], - "sref": "#/page-elements/9", - "text-order": 9, + "sref": "#/page-elements/93", + "text-order": 93, "type": "subtitle-level-1" }, { "bbox": [ - 53.20000076293945, - 235.04745483398438, - 295.4400329589844, - 292.11370849609375 + 317.95, + 97.82, + 558.7, + 139.16 ], - "iref": "#/texts/10", + "iref": "#/texts/64", "name": "text", - "orig-order": 8, - "page": 1, + "orig-order": 94, + "page": 6, "span": [ 0, - 366 + 277 ], - "sref": "#/page-elements/10", - "text-order": 10, + "sref": "#/page-elements/94", + "text-order": 94, "type": "paragraph" }, { "bbox": [ - 53.79800033569336, - 121.27276611328125, - 294.28240966796875, - 176.01959228515625 + 317.54, + 83.17, + 398.95, + 89.63 ], - "iref": "#/footnotes/0", + "iref": "#/footnotes/15", "name": "footnote", - "orig-order": 9, - "page": 1, + "orig-order": 95, + "page": 6, "span": [ 0, - 585 + 35 ], - "sref": "#/page-elements/11", - "text-order": 11, + "sref": "#/page-elements/95", + "text-order": 95, "type": "footnote" }, { "bbox": [ - 53.56800079345703, - 112.3555908203125, - 215.3354034423828, - 118.82350158691406 + 53.8, + 687.81, + 296.07, + 706.83 ], - "iref": "#/footnotes/1", - "name": "footnote", - "orig-order": 10, - "page": 1, + "iref": "#/texts/65", + "name": "text", + "orig-order": 96, + "page": 7, "span": [ 0, - 53 + 104 ], - "sref": "#/page-elements/12", - "text-order": 12, - "type": "footnote" + "sref": "#/page-elements/96", + "text-order": 96, + "type": "paragraph" }, { "bbox": [ - 53.268001556396484, - 94.71673583984375, - 286.8135986328125, - 110.1262435913086 + 52.97, + 452.91, + 291.52, + 672.65 ], - "iref": "#/footnotes/2", - "name": "footnote", - "orig-order": 11, - "page": 1, + "iref": "#/texts/66", + "name": "text", + "orig-order": 97, + "page": 7, "span": [ 0, - 124 + 723 ], - "sref": "#/page-elements/13", - "text-order": 13, - "type": "footnote" + "sref": "#/page-elements/97", + "text-order": 97, + "type": "paragraph" }, { "bbox": [ - 52.780723571777344, - 87.53521728515625, - 173.61199951171875, - 94.18523406982422 + 53.8, + 388.87, + 295.07, + 430.4 ], - "iref": "#/footnotes/3", - "name": "footnote", - "orig-order": 12, - "page": 1, + "iref": "#/texts/67", + "name": "text", + "orig-order": 98, + "page": 7, "span": [ 0, - 39 + 226 ], - "sref": "#/page-elements/14", - "text-order": 14, - "type": "footnote" + "sref": "#/page-elements/98", + "text-order": 98, + "type": "paragraph" }, { "bbox": [ - 317.6319885253906, - 337.24517822265625, - 559.6874389648438, - 586.986328125 + 53.47, + 301.73, + 295.79, + 386.56 ], - "iref": "#/texts/11", + "iref": "#/texts/68", "name": "text", - "orig-order": 14, - "page": 1, + "orig-order": 99, + "page": 7, "span": [ 0, - 1532 + 530 ], - "sref": "#/page-elements/15", - "text-order": 15, + "sref": "#/page-elements/99", + "text-order": 99, "type": "paragraph" }, { "bbox": [ - 317.9549865722656, - 183.756591796875, - 559.7752075195312, - 334.59222412109375 + 53.8, + 265.37, + 287.75, + 288.26 ], - "iref": "#/texts/12", - "name": "text", - "orig-order": 15, - "page": 1, + "iref": "#/texts/69", + "name": "subtitle-level-1", + "orig-order": 100, + "page": 7, "span": [ 0, - 891 + 61 ], - "sref": "#/page-elements/16", - "text-order": 16, - "type": "paragraph" + "sref": "#/page-elements/100", + "text-order": 100, + "type": "subtitle-level-1" }, { "bbox": [ - 317.9549865722656, - 150.97491455078125, - 559.4527587890625, - 181.16822814941406 + 53.47, + 130.9, + 295.88, + 260.06 ], - "iref": "#/texts/13", + "iref": "#/texts/70", "name": "text", - "orig-order": 16, - "page": 1, + "orig-order": 101, + "page": 7, "span": [ 0, - 200 + 777 ], - "sref": "#/page-elements/17", - "text-order": 17, + "sref": "#/page-elements/101", + "text-order": 101, "type": "paragraph" }, { "bbox": [ - 317.54400634765625, - 100.9158935546875, - 559.1497192382812, - 123.9642333984375 + 53.8, + 107.44, + 150.55, + 117.82 ], - "iref": "#/footnotes/4", - "name": "footnote", - "orig-order": 17, - "page": 1, + "iref": "#/texts/71", + "name": "subtitle-level-1", + "orig-order": 102, + "page": 7, "span": [ 0, - 185 + 19 ], - "sref": "#/page-elements/18", - "text-order": 18, - "type": "footnote" + "sref": "#/page-elements/102", + "text-order": 102, + "type": "subtitle-level-1" }, { "bbox": [ - 317.54779052734375, - 84.349853515625, - 559.419189453125, - 99.1622314453125 + 53.8, + 83.7, + 295.59, + 102.8 ], - "iref": "#/footnotes/5", - "name": "footnote", - "orig-order": 18, - "page": 1, + "iref": "#/texts/72", + "name": "text", + "orig-order": 103, + "page": 7, "span": [ 0, - 130 + 127 ], - "sref": "#/page-elements/19", - "text-order": 19, - "type": "footnote" + "sref": "#/page-elements/103", + "text-order": 103, + "type": "paragraph" }, { "bbox": [ - 57.056358337402344, - 581.521484375, - 566.21923828125, - 705.9985961914062 + 319.47, + 591.07, + 563.42, + 707.4 ], - "iref": "#/figures/0", + "iref": "#/figures/5", "name": "picture", - "orig-order": 20, - "page": 2, + "orig-order": 104, + "page": 7, "span": [ 0, 0 ], - "sref": "#/page-elements/20", - "text-order": 20, + "sref": "#/page-elements/104", + "text-order": 104, "type": "figure" }, { "bbox": [ - 53.502044677734375, - 488.92645263671875, - 560.5620727539062, - 562.7618408203125 + 317.95, + 491.02, + 561.24, + 575.87 ], - "iref": "#/figures/0/captions/0", - "name": "caption", - "orig-order": 21, - "page": 2, + "iref": "#/figures/5/captions/0", + "name": "text", + "orig-order": 105, + "page": 7, "span": [ 0, - 820 + 462 ], - "sref": "#/page-elements/21", - "text-order": 21, - "type": "caption" + "sref": "#/page-elements/105", + "text-order": 105, + "type": "paragraph" }, { "bbox": [ - 53.474998474121094, - 394.9362487792969, - 295.5370178222656, - 468.70623779296875 + 317.95, + 444.97, + 558.5, + 464.15 ], - "iref": "#/texts/14", + "iref": "#/texts/73", "name": "text", - "orig-order": 22, - "page": 2, + "orig-order": 106, + "page": 7, "span": [ 0, - 409 + 97 ], - "sref": "#/page-elements/22", - "text-order": 22, + "sref": "#/page-elements/106", + "text-order": 106, "type": "paragraph" }, { "bbox": [ - 53.575965881347656, - 370.0102844238281, - 173.47894287109375, - 380.5594177246094 + 327.92, + 375.72, + 560.43, + 438.8 ], - "iref": "#/texts/15", - "name": "subtitle-level-1", - "orig-order": 23, - "page": 2, + "iref": "#/texts/74", + "name": "list-item", + "orig-order": 107, + "page": 7, "span": [ 0, - 18 + 307 ], - "sref": "#/page-elements/23", - "text-order": 23, - "type": "subtitle-level-1" + "sref": "#/page-elements/107", + "text-order": 107, + "type": "paragraph" }, { "bbox": [ - 53.474998474121094, - 203.9712677001953, - 295.7048645019531, - 365.4122314453125 + 326.9, + 244.05, + 561.55, + 372.78 ], - "iref": "#/texts/16", - "name": "text", - "orig-order": 24, - "page": 2, + "iref": "#/texts/75", + "name": "list-item", + "orig-order": 108, + "page": 7, "span": [ 0, - 955 + 702 ], - "sref": "#/page-elements/24", - "text-order": 24, + "sref": "#/page-elements/108", + "text-order": 108, "type": "paragraph" }, { "bbox": [ - 53.79800033569336, - 148.84925842285156, - 295.53668212890625, - 201.0292205810547 + 327.49, + 133.85, + 560.6, + 241.11 ], - "iref": "#/texts/17", - "name": "text", - "orig-order": 25, - "page": 2, + "iref": "#/texts/76", + "name": "list-item", + "orig-order": 109, + "page": 7, "span": [ 0, - 337 + 613 ], - "sref": "#/page-elements/25", - "text-order": 25, + "sref": "#/page-elements/109", + "text-order": 109, "type": "paragraph" }, { "bbox": [ - 53.52906036376953, - 119.31765747070312, - 137.14767456054688, - 125.85260009765625 + 317.54, + 110.22, + 398.99, + 117.17 ], - "iref": "#/footnotes/6", + "iref": "#/footnotes/16", "name": "footnote", - "orig-order": 26, - "page": 2, + "orig-order": 110, + "page": 7, "span": [ 0, 32 ], - "sref": "#/page-elements/26", - "text-order": 26, + "sref": "#/page-elements/110", + "text-order": 110, "type": "footnote" }, { "bbox": [ - 53.36406707763672, - 110.3837890625, - 128.93763732910156, - 116.80622863769531 + 317.54, + 101.19, + 400.17, + 108.29 ], - "iref": "#/footnotes/7", + "iref": "#/footnotes/17", "name": "footnote", - "orig-order": 27, - "page": 2, + "orig-order": 111, + "page": 7, "span": [ 0, - 31 + 32 ], - "sref": "#/page-elements/27", - "text-order": 27, + "sref": "#/page-elements/111", + "text-order": 111, "type": "footnote" }, { "bbox": [ - 53.797996520996094, - 101.62908935546875, - 125.09330749511719, - 108.06022644042969 + 317.54, + 92.26, + 382.04, + 99.16 ], - "iref": "#/footnotes/8", + "iref": "#/footnotes/18", "name": "footnote", - "orig-order": 28, - "page": 2, + "orig-order": 112, + "page": 7, "span": [ 0, 28 ], - "sref": "#/page-elements/28", - "text-order": 28, + "sref": "#/page-elements/112", + "text-order": 112, "type": "footnote" }, { "bbox": [ - 53.797996520996094, - 93.07965087890625, - 128.44528198242188, - 99.42169189453125 + 317.54, + 83.07, + 407.59, + 90.22 ], - "iref": "#/footnotes/9", + "iref": "#/footnotes/19", "name": "footnote", - "orig-order": 29, - "page": 2, + "orig-order": 113, + "page": 7, "span": [ 0, - 29 + 36 ], - "sref": "#/page-elements/29", - "text-order": 29, + "sref": "#/page-elements/113", + "text-order": 113, "type": "footnote" }, { "bbox": [ - 53.66099548339844, - 84.4400634765625, - 246.72222900390625, - 90.71622467041016 + 58.86, + 545.36, + 300.35, + 702.74 ], - "iref": "#/footnotes/10", - "name": "footnote", - "orig-order": 30, - "page": 2, + "iref": "#/figures/6", + "name": "picture", + "orig-order": 114, + "page": 8, "span": [ 0, - 68 + 0 ], - "sref": "#/page-elements/30", - "text-order": 30, - "type": "footnote" + "sref": "#/page-elements/114", + "text-order": 114, + "type": "figure" }, { "bbox": [ - 317.69781494140625, - 416.852783203125, - 560.5628051757812, - 468.70623779296875 + 53.8, + 474.98, + 297.01, + 526.87 ], - "iref": "#/texts/18", - "name": "text", - "orig-order": 31, - "page": 2, + "iref": "#/figures/6/captions/0", + "name": "caption", + "orig-order": 115, + "page": 8, "span": [ 0, - 325 + 281 ], - "sref": "#/page-elements/31", - "text-order": 31, - "type": "paragraph" + "sref": "#/page-elements/115", + "text-order": 115, + "type": "caption" }, { "bbox": [ - 317.9549865722656, - 392.1359558105469, - 440.25689697265625, - 402.8952941894531 + 78.21, + 422.14, + 295.53, + 452.21 ], - "iref": "#/texts/19", - "name": "subtitle-level-1", - "orig-order": 32, - "page": 2, + "iref": "#/texts/77", + "name": "text", + "orig-order": 116, + "page": 8, "span": [ 0, - 17 + 125 ], - "sref": "#/page-elements/32", - "text-order": 32, - "type": "subtitle-level-1" + "sref": "#/page-elements/116", + "text-order": 116, + "type": "paragraph" }, { "bbox": [ - 317.9549865722656, - 357.8152770996094, - 559.0849609375, - 387.74822998046875 + 63.44, + 300.48, + 295.36, + 419.33 ], - "iref": "#/texts/20", - "name": "text", - "orig-order": 33, - "page": 2, + "iref": "#/texts/78", + "name": "list-item", + "orig-order": 117, + "page": 8, "span": [ 0, - 174 + 633 ], - "sref": "#/page-elements/33", - "text-order": 33, + "sref": "#/page-elements/117", + "text-order": 117, "type": "paragraph" }, { "bbox": [ - 317.6319885253906, - 248.01971435546875, - 559.85888671875, - 354.8722229003906 + 53.8, + 199.98, + 295.31, + 296.09 ], - "iref": "#/texts/21", + "iref": "#/texts/79", "name": "text", - "orig-order": 34, - "page": 2, + "orig-order": 118, + "page": 8, "span": [ 0, - 594 + 565 ], - "sref": "#/page-elements/34", - "text-order": 34, + "sref": "#/page-elements/118", + "text-order": 118, "type": "paragraph" }, { "bbox": [ - 317.9549865722656, - 83.84225463867188, - 559.7321166992188, - 245.28321838378906 + 53.8, + 101.78, + 295.53, + 197.46 ], - "iref": "#/texts/22", + "iref": "#/texts/80", "name": "text", - "orig-order": 35, - "page": 2, + "orig-order": 119, + "page": 8, "span": [ 0, - 983 + 605 ], - "sref": "#/page-elements/35", - "text-order": 35, + "sref": "#/page-elements/119", + "text-order": 119, "type": "paragraph" }, { "bbox": [ - 56.01094436645508, - 558.7518920898438, - 290.9949645996094, - 709.7254028320312 + 53.39, + 83.28, + 137.42, + 89.79 ], - "iref": "#/figures/1", - "name": "picture", - "orig-order": 49, - "page": 3, + "iref": "#/footnotes/20", + "name": "footnote", + "orig-order": 120, + "page": 8, "span": [ 0, - 0 + 31 ], - "sref": "#/page-elements/36", - "text-order": 36, - "type": "figure" + "sref": "#/page-elements/120", + "text-order": 120, + "type": "footnote" }, { "bbox": [ - 321.3935546875, - 558.6827392578125, - 554.2520751953125, - 709.9332885742188 + 321.94, + 587.77, + 563.51, + 702.51 ], - "iref": "#/figures/2", + "iref": "#/figures/7", "name": "picture", - "orig-order": 50, - "page": 3, + "orig-order": 121, + "page": 8, "span": [ 0, 0 ], - "sref": "#/page-elements/37", - "text-order": 37, + "sref": "#/page-elements/121", + "text-order": 121, "type": "figure" }, { "bbox": [ - 53.79798889160156, - 472.7510986328125, - 295.5321960449219, - 546.5648193359375 + 317.95, + 538.82, + 558.4, + 568.87 ], - "iref": "#/figures/2/captions/0", + "iref": "#/figures/7/captions/0", "name": "caption", - "orig-order": 36, - "page": 3, + "orig-order": 122, + "page": 8, "span": [ 0, - 389 + 149 ], - "sref": "#/page-elements/38", - "text-order": 38, + "sref": "#/page-elements/122", + "text-order": 122, "type": "caption" }, { "bbox": [ - 53.79800033569336, - 294.82501220703125, - 295.1091003417969, - 445.8912353515625 + 317.95, + 490.72, + 558.58, + 509.75 ], - "iref": "#/texts/23", + "iref": "#/texts/81", "name": "text", - "orig-order": 37, - "page": 3, + "orig-order": 123, + "page": 8, "span": [ 0, - 916 + 87 ], - "sref": "#/page-elements/39", - "text-order": 39, + "sref": "#/page-elements/123", + "text-order": 123, "type": "paragraph" }, { "bbox": [ - 53.79800033569336, - 272.23712158203125, - 137.17259216308594, - 282.3864440917969 + 317.95, + 466.66, + 398.98, + 476.84 ], - "iref": "#/texts/24", + "iref": "#/texts/82", "name": "subtitle-level-1", - "orig-order": 38, - "page": 3, + "orig-order": 124, + "page": 8, "span": [ 0, 14 ], - "sref": "#/page-elements/40", - "text-order": 40, + "sref": "#/page-elements/124", + "text-order": 124, "type": "subtitle-level-1" }, { "bbox": [ - 53.62799835205078, - 214.8350830078125, - 295.6110534667969, - 266.9012145996094 + 317.95, + 408.9, + 559.32, + 463.36 ], - "iref": "#/texts/25", + "iref": "#/texts/83", "name": "text", - "orig-order": 39, - "page": 3, + "orig-order": 125, + "page": 8, "span": [ 0, - 280 + 302 ], - "sref": "#/page-elements/41", - "text-order": 41, + "sref": "#/page-elements/125", + "text-order": 125, "type": "paragraph" }, { "bbox": [ - 53.50199890136719, - 82.777099609375, - 295.5345458984375, - 212.1072235107422 + 317.95, + 332.04, + 559.69, + 406.3 ], - "iref": "#/texts/26", + "iref": "#/texts/84", "name": "text", - "orig-order": 40, - "page": 3, + "orig-order": 126, + "page": 8, "span": [ 0, - 799 + 445 ], - "sref": "#/page-elements/42", - "text-order": 42, + "sref": "#/page-elements/126", + "text-order": 126, "type": "paragraph" }, { "bbox": [ - 317.9456481933594, - 494.0677185058594, - 559.70654296875, - 546.4738159179688 - ], - "iref": "#/figures/1/captions/0", - "name": "caption", - "orig-order": 41, - "page": 3, - "span": [ - 0, - 272 - ], - "sref": "#/page-elements/43", - "text-order": 43, - "type": "caption" - }, - { - "bbox": [ - 317.2680969238281, - 451.83447265625, - 558.4124755859375, - 470.8982238769531 + 317.92, + 277.73, + 559.69, + 329.58 ], - "iref": "#/texts/27", + "iref": "#/texts/85", "name": "text", - "orig-order": 42, - "page": 3, + "orig-order": 127, + "page": 8, "span": [ 0, - 93 + 307 ], - "sref": "#/page-elements/44", - "text-order": 44, + "sref": "#/page-elements/127", + "text-order": 127, "type": "paragraph" }, { "bbox": [ - 317.9549865722656, - 429.5016174316406, - 445.8941345214844, - 439.35626220703125 - ], - "iref": "#/texts/28", - "name": "subtitle-level-1", - "orig-order": 43, - "page": 3, - "span": [ - 0, - 24 - ], - "sref": "#/page-elements/45", - "text-order": 45, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 317.6319885253906, - 306.6052551269531, - 559.0227661132812, - 424.2102355957031 + 317.64, + 200.84, + 559.69, + 274.79 ], - "iref": "#/texts/29", + "iref": "#/texts/86", "name": "text", - "orig-order": 44, - "page": 3, + "orig-order": 128, + "page": 8, "span": [ 0, - 669 + 438 ], - "sref": "#/page-elements/46", - "text-order": 46, + "sref": "#/page-elements/128", + "text-order": 128, "type": "paragraph" }, { "bbox": [ - 317.9549865722656, - 152.94097900390625, - 559.0300903320312, - 303.6622314453125 + 317.95, + 177.28, + 438.01, + 187.38 ], - "iref": "#/texts/30", - "name": "text", - "orig-order": 45, - "page": 3, + "iref": "#/texts/87", + "name": "subtitle-level-1", + "orig-order": 129, + "page": 8, "span": [ 0, - 900 + 22 ], - "sref": "#/page-elements/47", - "text-order": 47, - "type": "paragraph" + "sref": "#/page-elements/129", + "text-order": 129, + "type": "subtitle-level-1" }, { "bbox": [ - 317.542236328125, - 119.9617919921875, - 560.2256469726562, - 150.07322692871094 + 317.73, + 119.97, + 558.45, + 171.94 ], - "iref": "#/texts/31", + "iref": "#/texts/88", "name": "text", - "orig-order": 46, - "page": 3, + "orig-order": 130, + "page": 8, "span": [ 0, - 199 + 320 ], - "sref": "#/page-elements/48", - "text-order": 48, + "sref": "#/page-elements/130", + "text-order": 130, "type": "paragraph" }, { "bbox": [ - 317.8511962890625, - 91.70623779296875, - 558.1990356445312, - 106.6500244140625 + 317.54, + 93.01, + 382.23, + 99.58 ], - "iref": "#/footnotes/11", + "iref": "#/footnotes/21", "name": "footnote", - "orig-order": 47, - "page": 3, + "orig-order": 131, + "page": 8, "span": [ 0, - 102 + 29 ], - "sref": "#/page-elements/49", - "text-order": 49, + "sref": "#/page-elements/131", + "text-order": 131, "type": "footnote" }, { "bbox": [ - 317.9549865722656, - 83.3656005859375, - 397.3962707519531, - 89.81023406982422 + 317.54, + 84.26, + 382.03, + 90.77 ], - "iref": "#/footnotes/12", + "iref": "#/footnotes/22", "name": "footnote", - "orig-order": 48, - "page": 3, + "orig-order": 132, + "page": 8, "span": [ 0, - 34 + 27 ], - "sref": "#/page-elements/50", - "text-order": 50, + "sref": "#/page-elements/132", + "text-order": 132, "type": "footnote" }, { "bbox": [ - 53.79800033569336, - 608.7432250976562, - 295.53790283203125, - 704.4302368164062 + 53.8, + 619.7, + 295.09, + 706.54 ], - "iref": "#/texts/32", + "iref": "#/texts/89", "name": "text", - "orig-order": 51, - "page": 4, + "orig-order": 133, + "page": 9, "span": [ 0, - 542 + 504 ], - "sref": "#/page-elements/51", - "text-order": 51, + "sref": "#/page-elements/133", + "text-order": 133, "type": "paragraph" }, { "bbox": [ - 53.79800033569336, - 574.2219848632812, - 231.56687927246094, - 596.9802856445312 + 53.47, + 421.44, + 295.7, + 616.76 ], - "iref": "#/texts/33", - "name": "subtitle-level-1", - "orig-order": 52, - "page": 4, + "iref": "#/texts/90", + "name": "text", + "orig-order": 134, + "page": 9, "span": [ 0, - 51 + 1164 ], - "sref": "#/page-elements/52", - "text-order": 52, - "type": "subtitle-level-1" + "sref": "#/page-elements/134", + "text-order": 134, + "type": "paragraph" }, { "bbox": [ - 53.79800033569336, - 473.106689453125, - 295.5303955078125, - 568.8822021484375 + 53.72, + 396.84, + 144.17, + 407.84 ], - "iref": "#/texts/34", - "name": "text", - "orig-order": 53, - "page": 4, + "iref": "#/texts/91", + "name": "subtitle-level-1", + "orig-order": 135, + "page": 9, "span": [ 0, - 557 + 12 ], - "sref": "#/page-elements/53", - "text-order": 53, - "type": "paragraph" + "sref": "#/page-elements/135", + "text-order": 135, + "type": "subtitle-level-1" }, { "bbox": [ - 53.250999450683594, - 319.60626220703125, - 295.61322021484375, - 470.2522277832031 + 53.38, + 340.67, + 295.54, + 392.52 ], - "iref": "#/texts/35", + "iref": "#/texts/92", "name": "text", - "orig-order": 54, - "page": 4, + "orig-order": 136, + "page": 9, "span": [ 0, - 919 + 276 ], - "sref": "#/page-elements/54", - "text-order": 54, + "sref": "#/page-elements/136", + "text-order": 136, "type": "paragraph" }, { "bbox": [ - 53.474998474121094, - 154.744140625, - 296.03668212890625, - 316.6632385253906 + 53.8, + 263.88, + 295.62, + 337.73 ], - "iref": "#/texts/36", + "iref": "#/texts/93", "name": "text", - "orig-order": 55, - "page": 4, + "orig-order": 137, + "page": 9, "span": [ 0, - 1011 + 468 ], - "sref": "#/page-elements/55", - "text-order": 55, + "sref": "#/page-elements/137", + "text-order": 137, "type": "paragraph" }, { "bbox": [ - 53.79800033569336, - 121.91156005859375, - 295.533203125, - 152.2802276611328 + 53.8, + 131.82, + 295.61, + 261.01 ], - "iref": "#/texts/37", + "iref": "#/texts/94", "name": "text", - "orig-order": 56, - "page": 4, + "orig-order": 138, + "page": 9, "span": [ 0, - 195 + 808 ], - "sref": "#/page-elements/56", - "text-order": 56, + "sref": "#/page-elements/138", + "text-order": 138, "type": "paragraph" }, { "bbox": [ - 53.387001037597656, - 83.17366027832031, - 294.92218017578125, - 113.5859375 + 53.8, + 84.16, + 295.29, + 106.99 ], - "iref": "#/footnotes/13", + "iref": "#/footnotes/23", "name": "footnote", - "orig-order": 57, - "page": 4, + "orig-order": 139, + "page": 9, "span": [ 0, - 290 + 237 ], - "sref": "#/page-elements/57", - "text-order": 57, + "sref": "#/page-elements/139", + "text-order": 139, "type": "footnote" }, { "bbox": [ - 326.25421142578125, - 539.8611450195312, - 548.1567993164062, - 703.5318603515625 + 317.69, + 684.98, + 559.69, + 704.43 ], - "iref": "#/figures/3", - "name": "picture", - "orig-order": 58, - "page": 4, + "iref": "#/texts/95", + "name": "text", + "orig-order": 140, + "page": 9, "span": [ 0, - 0 + 119 ], - "sref": "#/page-elements/58", - "text-order": 58, - "type": "figure" + "sref": "#/page-elements/140", + "text-order": 140, + "type": "paragraph" }, { "bbox": [ - 317.6319885253906, - 415.019287109375, - 560.175537109375, - 522.0748291015625 + 317.95, + 663.34, + 438.23, + 673.75 ], - "iref": "#/figures/3/captions/0", - "name": "caption", - "orig-order": 59, - "page": 4, + "iref": "#/texts/96", + "name": "subtitle-level-1", + "orig-order": 141, + "page": 9, "span": [ 0, - 576 + 15 ], - "sref": "#/page-elements/59", - "text-order": 59, - "type": "caption" + "sref": "#/page-elements/141", + "text-order": 141, + "type": "subtitle-level-1" }, { "bbox": [ - 317.9549865722656, - 304.00390625, - 559.1529541015625, - 388.9792175292969 + 317.68, + 639.63, + 564.39, + 658.7 ], - "iref": "#/texts/38", + "iref": "#/texts/97", "name": "text", - "orig-order": 60, - "page": 4, + "orig-order": 142, + "page": 9, "span": [ 0, - 539 + 127 ], - "sref": "#/page-elements/60", - "text-order": 60, + "sref": "#/page-elements/142", + "text-order": 142, "type": "paragraph" }, { "bbox": [ - 317.9549865722656, - 268.2449951171875, - 522.75146484375, - 291.0042724609375 - ], - "iref": "#/texts/39", - "name": "subtitle-level-1", - "orig-order": 61, - "page": 4, - "span": [ - 0, - 55 - ], - "sref": "#/page-elements/61", - "text-order": 61, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 317.9437561035156, - 166.98492431640625, - 559.7679443359375, - 263.0128173828125 + 317.45, + 584.83, + 571.96, + 636.8 ], - "iref": "#/texts/40", + "iref": "#/texts/98", "name": "text", - "orig-order": 62, - "page": 4, + "orig-order": 143, + "page": 9, "span": [ 0, - 605 + 269 ], - "sref": "#/page-elements/62", - "text-order": 62, + "sref": "#/page-elements/143", + "text-order": 143, "type": "paragraph" }, { "bbox": [ - 317.9549865722656, - 83.1304931640625, - 560.1549682617188, - 157.56007385253906 + 317.95, + 563.32, + 391.3, + 573.29 ], - "iref": "#/texts/41", - "name": "text", - "orig-order": 63, - "page": 4, + "iref": "#/texts/99", + "name": "subtitle-level-1", + "orig-order": 144, + "page": 9, "span": [ 0, - 466 + 10 ], - "sref": "#/page-elements/63", - "text-order": 63, - "type": "paragraph" + "sref": "#/page-elements/144", + "text-order": 144, + "type": "subtitle-level-1" }, { "bbox": [ - 55.4039421081543, - 459.4396667480469, - 294.0187072753906, - 709.196533203125 + 320.02, + 529.34, + 560.26, + 559.62 ], - "iref": "#/figures/4", - "name": "picture", - "orig-order": 79, - "page": 5, + "iref": "#/texts/100", + "name": "list-item", + "orig-order": 145, + "page": 9, "span": [ 0, - 0 + 280 ], - "sref": "#/page-elements/64", - "text-order": 64, - "type": "figure" + "sref": "#/page-elements/145", + "text-order": 145, + "type": "paragraph" }, { "bbox": [ - 53.76737594604492, - 404.8351745605469, - 296.919189453125, - 446.1678161621094 + 321.42, + 513.5, + 559.07, + 527.74 ], - "iref": "#/figures/4/captions/0", - "name": "caption", - "orig-order": 64, - "page": 5, + "iref": "#/texts/101", + "name": "list-item", + "orig-order": 146, + "page": 9, "span": [ 0, - 228 + 122 ], - "sref": "#/page-elements/65", - "text-order": 65, - "type": "caption" + "sref": "#/page-elements/146", + "text-order": 146, + "type": "paragraph" }, { "bbox": [ - 53.79800033569336, - 353.96826171875, - 295.1701354980469, - 383.9022216796875 + 321.33, + 489.45, + 559.14, + 511.8 ], - "iref": "#/texts/42", - "name": "text", - "orig-order": 65, - "page": 5, + "iref": "#/texts/102", + "name": "list-item", + "orig-order": 147, + "page": 9, "span": [ 0, - 199 + 164 ], - "sref": "#/page-elements/66", - "text-order": 66, + "sref": "#/page-elements/147", + "text-order": 147, "type": "paragraph" }, { "bbox": [ - 53.79800033569336, - 332.0502624511719, - 294.4319152832031, - 351.070068359375 + 321.15, + 457.44, + 559.23, + 487.89 ], - "iref": "#/texts/43", - "name": "text", - "orig-order": 66, - "page": 5, + "iref": "#/texts/103", + "name": "list-item", + "orig-order": 148, + "page": 9, "span": [ 0, - 105 + 282 ], - "sref": "#/page-elements/67", - "text-order": 67, + "sref": "#/page-elements/148", + "text-order": 148, "type": "paragraph" }, { "bbox": [ - 117.81383514404297, - 304.81182861328125, - 294.531494140625, - 327.9595642089844 + 321.21, + 433.31, + 559.07, + 456.01 ], - "iref": "#/texts/44", - "name": "formula", - "orig-order": 67, - "page": 5, + "iref": "#/texts/104", + "name": "list-item", + "orig-order": 149, + "page": 9, "span": [ 0, - 73 + 224 ], - "sref": "#/page-elements/68", - "text-order": 68, - "type": "equation" + "sref": "#/page-elements/149", + "text-order": 149, + "type": "paragraph" }, { "bbox": [ - 53.474998474121094, - 280.8752746582031, - 294.5132751464844, - 300.0321044921875 + 321.44, + 409.4, + 558.46, + 432.1 ], - "iref": "#/texts/45", - "name": "text", - "orig-order": 68, - "page": 5, + "iref": "#/texts/105", + "name": "list-item", + "orig-order": 150, + "page": 9, "span": [ 0, - 124 + 233 ], - "sref": "#/page-elements/69", - "text-order": 69, + "sref": "#/page-elements/150", + "text-order": 150, "type": "paragraph" }, { "bbox": [ - 53.79800033569336, - 154.57626342773438, - 295.1409912109375, - 272.4860534667969 + 320.99, + 378.04, + 559.8, + 408.19 ], - "iref": "#/texts/46", - "name": "text", - "orig-order": 69, - "page": 5, + "iref": "#/texts/106", + "name": "list-item", + "orig-order": 151, + "page": 9, "span": [ 0, - 715 + 250 ], - "sref": "#/page-elements/70", - "text-order": 70, + "sref": "#/page-elements/151", + "text-order": 151, "type": "paragraph" }, { "bbox": [ - 53.79800033569336, - 121.69925689697266, - 295.1619567871094, - 151.6332244873047 + 321.16, + 346.16, + 560.1, + 376.45 ], - "iref": "#/texts/47", - "name": "text", - "orig-order": 70, - "page": 5, + "iref": "#/texts/107", + "name": "list-item", + "orig-order": 152, + "page": 9, "span": [ 0, - 172 + 227 ], - "sref": "#/page-elements/71", - "text-order": 71, + "sref": "#/page-elements/152", + "text-order": 152, "type": "paragraph" }, { "bbox": [ - 53.79800033569336, - 99.61725616455078, - 294.5709533691406, - 118.8629150390625 + 320.2, + 330.22, + 558.26, + 344.58 ], - "iref": "#/texts/48", - "name": "text", - "orig-order": 71, - "page": 5, + "iref": "#/texts/108", + "name": "list-item", + "orig-order": 153, + "page": 9, "span": [ 0, - 125 + 116 ], - "sref": "#/page-elements/72", - "text-order": 72, + "sref": "#/page-elements/153", + "text-order": 153, "type": "paragraph" }, { "bbox": [ - 53.387001037597656, - 83.28266143798828, - 294.561279296875, - 89.6395263671875 + 317.96, + 274.36, + 572.77, + 328.48 ], - "iref": "#/footnotes/14", - "name": "footnote", - "orig-order": 72, - "page": 5, + "iref": "#/texts/109", + "name": "list-item", + "orig-order": 154, + "page": 9, "span": [ 0, - 93 + 425 ], - "sref": "#/page-elements/73", - "text-order": 73, - "type": "footnote" + "sref": "#/page-elements/154", + "text-order": 154, + "type": "paragraph" }, { "bbox": [ - 316.9908142089844, - 622.02099609375, - 560.0983276367188, - 706.829833984375 + 317.07, + 250.1, + 560.98, + 272.69 ], - "iref": "#/tables/0/captions/0", - "name": "text", - "orig-order": 73, - "page": 5, + "iref": "#/texts/110", + "name": "list-item", + "orig-order": 155, + "page": 9, "span": [ 0, - 461 + 166 + ], + "sref": "#/page-elements/155", + "text-order": 155, + "type": "paragraph" + } + ], + "page-footers": [], + "page-headers": [], + "properties": { + "data": [ + [ + "language", + 2942883588400666364, + "DOCUMENT", + "#", + "en", + 1.0 + ], + [ + "language", + 7377574370756688828, + "TEXT", + "#/texts/0", + "en", + 0.78 + ], + [ + "semantic", + 7377574370756688828, + "TEXT", + "#/texts/0", + "text", + 0.94 + ], + [ + "language", + 10227328696767902037, + "TEXT", + "#/texts/1", + "en", + 0.7 + ], + [ + "semantic", + 10227328696767902037, + "TEXT", + "#/texts/1", + "header", + 0.71 + ], + [ + "language", + 8770494724746327817, + "TEXT", + "#/texts/2", + "en", + 0.26 + ], + [ + "semantic", + 8770494724746327817, + "TEXT", + "#/texts/2", + "meta-data", + 0.8 + ], + [ + "language", + 18258237174351515285, + "TEXT", + "#/texts/3", + "zh", + 0.09 + ], + [ + "semantic", + 18258237174351515285, + "TEXT", + "#/texts/3", + "text", + 0.79 + ], + [ + "language", + 5704354110496947297, + "TEXT", + "#/texts/4", + "en", + 0.53 + ], + [ + "semantic", + 5704354110496947297, + "TEXT", + "#/texts/4", + "meta-data", + 1.0 + ], + [ + "language", + 11056873211244709904, + "TEXT", + "#/texts/5", + "en", + 0.49 + ], + [ + "semantic", + 11056873211244709904, + "TEXT", + "#/texts/5", + "meta-data", + 1.0 + ], + [ + "language", + 11788868678004267702, + "TEXT", + "#/texts/6", + "en", + 0.65 + ], + [ + "semantic", + 11788868678004267702, + "TEXT", + "#/texts/6", + "meta-data", + 1.0 + ], + [ + "language", + 3624246356859711021, + "TEXT", + "#/texts/7", + "en", + 0.55 + ], + [ + "semantic", + 3624246356859711021, + "TEXT", + "#/texts/7", + "header", + 1.0 + ], + [ + "language", + 17999848460847860039, + "TEXT", + "#/texts/8", + "en", + 0.92 + ], + [ + "semantic", + 17999848460847860039, + "TEXT", + "#/texts/8", + "text", + 0.97 + ], + [ + "language", + 14387482728083328702, + "TEXT", + "#/texts/9", + "en", + 0.21 + ], + [ + "semantic", + 14387482728083328702, + "TEXT", + "#/texts/9", + "header", + 0.99 + ], + [ + "language", + 11222145795862225841, + "TEXT", + "#/texts/10", + "en", + 0.49 + ], + [ + "semantic", + 11222145795862225841, + "TEXT", + "#/texts/10", + "text", + 0.86 + ], + [ + "language", + 16923207262044929933, + "TEXT", + "#/texts/11", + "en", + 0.94 + ], + [ + "semantic", + 16923207262044929933, + "TEXT", + "#/texts/11", + "text", + 0.97 + ], + [ + "language", + 3749305213430885773, + "TEXT", + "#/texts/12", + "en", + 0.95 + ], + [ + "semantic", + 3749305213430885773, + "TEXT", + "#/texts/12", + "text", + 0.98 + ], + [ + "language", + 3409470577915009676, + "TEXT", + "#/texts/13", + "en", + 0.95 + ], + [ + "semantic", + 3409470577915009676, + "TEXT", + "#/texts/13", + "text", + 0.99 + ], + [ + "language", + 17187299362680072378, + "TEXT", + "#/texts/14", + "en", + 0.92 + ], + [ + "semantic", + 17187299362680072378, + "TEXT", + "#/texts/14", + "text", + 1.0 + ], + [ + "language", + 697648145931166262, + "TEXT", + "#/texts/15", + "en", + 0.48 + ], + [ + "semantic", + 697648145931166262, + "TEXT", + "#/texts/15", + "header", + 1.0 + ], + [ + "language", + 7935233310532930917, + "TEXT", + "#/texts/16", + "en", + 0.92 + ], + [ + "semantic", + 7935233310532930917, + "TEXT", + "#/texts/16", + "text", + 0.98 + ], + [ + "language", + 2762070725424637531, + "TEXT", + "#/texts/17", + "en", + 0.97 + ], + [ + "semantic", + 2762070725424637531, + "TEXT", + "#/texts/17", + "text", + 0.98 + ], + [ + "language", + 7536915191196259776, + "TEXT", + "#/texts/18", + "en", + 0.99 + ], + [ + "semantic", + 7536915191196259776, + "TEXT", + "#/texts/18", + "text", + 0.95 + ], + [ + "language", + 11495493007651807568, + "TEXT", + "#/texts/19", + "en", + 0.31 + ], + [ + "semantic", + 11495493007651807568, + "TEXT", + "#/texts/19", + "header", + 1.0 + ], + [ + "language", + 7650015170039242996, + "TEXT", + "#/texts/20", + "en", + 0.94 + ], + [ + "semantic", + 7650015170039242996, + "TEXT", + "#/texts/20", + "text", + 1.0 + ], + [ + "language", + 14959508657858158650, + "TEXT", + "#/texts/21", + "en", + 0.96 + ], + [ + "semantic", + 14959508657858158650, + "TEXT", + "#/texts/21", + "text", + 0.97 + ], + [ + "language", + 10379300903412882972, + "TEXT", + "#/texts/22", + "en", + 0.94 + ], + [ + "semantic", + 10379300903412882972, + "TEXT", + "#/texts/22", + "text", + 0.95 + ], + [ + "language", + 4994395008195818594, + "TEXT", + "#/texts/23", + "en", + 0.96 + ], + [ + "semantic", + 4994395008195818594, + "TEXT", + "#/texts/23", + "text", + 0.97 + ], + [ + "language", + 4203835122307823579, + "TEXT", + "#/texts/24", + "en", + 0.24 + ], + [ + "semantic", + 4203835122307823579, + "TEXT", + "#/texts/24", + "header", + 1.0 + ], + [ + "language", + 13520362244078084911, + "TEXT", + "#/texts/25", + "en", + 0.75 + ], + [ + "semantic", + 13520362244078084911, + "TEXT", + "#/texts/25", + "text", + 0.98 + ], + [ + "language", + 1749622367305947670, + "TEXT", + "#/texts/26", + "en", + 0.9 + ], + [ + "semantic", + 1749622367305947670, + "TEXT", + "#/texts/26", + "text", + 0.99 + ], + [ + "language", + 11083736481641202939, + "TEXT", + "#/texts/27", + "en", + 0.92 + ], + [ + "semantic", + 11083736481641202939, + "TEXT", + "#/texts/27", + "text", + 0.96 ], - "sref": "#/page-elements/74", - "text-order": 74, - "type": "paragraph" - }, - { - "bbox": [ - 334.4774475097656, - 554.5862426757812, - 541.1703491210938, - 609.4986572265625 + [ + "language", + 15403141463083979171, + "TEXT", + "#/texts/28", + "en", + 0.68 ], - "iref": "#/tables/0", - "name": "table", - "orig-order": 74, - "page": 5, - "span": [ - 0, - 0 + [ + "semantic", + 15403141463083979171, + "TEXT", + "#/texts/28", + "header", + 1.0 ], - "sref": "#/page-elements/75", - "text-order": 75, - "type": "table" - }, - { - "bbox": [ - 317.37548828125, - 468.0936279296875, - 559.939453125, - 520.1222534179688 + [ + "language", + 12234429517419341922, + "TEXT", + "#/texts/29", + "en", + 0.93 ], - "iref": "#/texts/49", - "name": "text", - "orig-order": 75, - "page": 5, - "span": [ - 0, - 337 + [ + "semantic", + 12234429517419341922, + "TEXT", + "#/texts/29", + "text", + 1.0 ], - "sref": "#/page-elements/76", - "text-order": 76, - "type": "paragraph" - }, - { - "bbox": [ - 317.6319885253906, - 303.8862609863281, - 561.6922607421875, - 465.32720947265625 + [ + "language", + 16957857111665886816, + "TEXT", + "#/texts/30", + "en", + 0.94 ], - "iref": "#/texts/50", - "name": "text", - "orig-order": 76, - "page": 5, - "span": [ - 0, - 955 + [ + "semantic", + 16957857111665886816, + "TEXT", + "#/texts/30", + "text", + 1.0 ], - "sref": "#/page-elements/77", - "text-order": 77, - "type": "paragraph" - }, - { - "bbox": [ - 317.6319885253906, - 149.8055419921875, - 560.1611328125, - 300.9432373046875 + [ + "language", + 10390915169360946497, + "TEXT", + "#/texts/31", + "en", + 0.85 ], - "iref": "#/texts/51", - "name": "text", - "orig-order": 77, - "page": 5, - "span": [ - 0, - 913 + [ + "semantic", + 10390915169360946497, + "TEXT", + "#/texts/31", + "text", + 0.99 ], - "sref": "#/page-elements/78", - "text-order": 78, - "type": "paragraph" - }, - { - "bbox": [ - 317.6319885253906, - 84.708251953125, - 559.6876831054688, - 147.51922607421875 + [ + "language", + 15254383206256494278, + "TEXT", + "#/texts/32", + "en", + 0.94 ], - "iref": "#/texts/52", - "name": "text", - "orig-order": 78, - "page": 5, - "span": [ - 0, - 398 + [ + "semantic", + 15254383206256494278, + "TEXT", + "#/texts/32", + "text", + 0.99 ], - "sref": "#/page-elements/79", - "text-order": 79, - "type": "paragraph" - }, - { - "bbox": [ - 53.50199890136719, - 654.8878173828125, - 295.74688720703125, - 706.829833984375 + [ + "language", + 17759618186065566858, + "TEXT", + "#/texts/33", + "en", + 0.83 ], - "iref": "#/texts/53", - "name": "text", - "orig-order": 80, - "page": 6, - "span": [ - 0, - 310 + [ + "semantic", + 17759618186065566858, + "TEXT", + "#/texts/33", + "header", + 1.0 ], - "sref": "#/page-elements/80", - "text-order": 80, - "type": "paragraph" - }, - { - "bbox": [ - 54.41073989868164, - 497.82928466796875, - 294.0743103027344, - 642.206787109375 + [ + "language", + 11638821473906997927, + "TEXT", + "#/texts/34", + "en", + 0.97 ], - "iref": "#/tables/1", - "name": "table", - "orig-order": 81, - "page": 6, - "span": [ - 0, - 0 + [ + "semantic", + 11638821473906997927, + "TEXT", + "#/texts/34", + "text", + 0.98 ], - "sref": "#/page-elements/81", - "text-order": 81, - "type": "table" - }, - { - "bbox": [ - 53.474998474121094, - 321.0742492675781, - 295.6167907714844, - 471.55621337890625 + [ + "language", + 13020065077657899116, + "TEXT", + "#/texts/35", + "en", + 0.89 ], - "iref": "#/texts/54", - "name": "text", - "orig-order": 82, - "page": 6, - "span": [ - 0, - 867 + [ + "semantic", + 13020065077657899116, + "TEXT", + "#/texts/35", + "text", + 0.99 ], - "sref": "#/page-elements/82", - "text-order": 82, - "type": "paragraph" - }, - { - "bbox": [ - 53.79800033569336, - 236.98883056640625, - 295.53466796875, - 311.1290588378906 + [ + "language", + 10103841011442966464, + "TEXT", + "#/texts/36", + "en", + 0.94 ], - "iref": "#/texts/55", - "name": "text", - "orig-order": 83, - "page": 6, - "span": [ - 0, - 460 + [ + "semantic", + 10103841011442966464, + "TEXT", + "#/texts/36", + "text", + 1.0 ], - "sref": "#/page-elements/83", - "text-order": 83, - "type": "paragraph" - }, - { - "bbox": [ - 53.79800033569336, - 127.0894775390625, - 295.61102294921875, - 234.11122131347656 + [ + "language", + 10982401368140758581, + "TEXT", + "#/texts/37", + "en", + 0.96 ], - "iref": "#/texts/56", - "name": "text", - "orig-order": 84, - "page": 6, - "span": [ - 0, - 635 + [ + "semantic", + 10982401368140758581, + "TEXT", + "#/texts/37", + "text", + 1.0 ], - "sref": "#/page-elements/84", - "text-order": 84, - "type": "paragraph" - }, - { - "bbox": [ - 53.79800033569336, - 83.63025665283203, - 295.5378723144531, - 124.522216796875 + [ + "language", + 887751753527930563, + "TEXT", + "#/texts/38", + "en", + 0.95 ], - "iref": "#/texts/57", - "name": "text", - "orig-order": 85, - "page": 6, - "span": [ - 0, - 256 + [ + "semantic", + 887751753527930563, + "TEXT", + "#/texts/38", + "text", + 1.0 ], - "sref": "#/page-elements/85", - "text-order": 85, - "type": "paragraph" - }, - { - "bbox": [ - 317.49591064453125, - 643.8729248046875, - 560.350341796875, - 706.829833984375 + [ + "language", + 4695688617288377564, + "TEXT", + "#/texts/39", + "en", + 0.8 ], - "iref": "#/tables/1/captions/0", - "name": "text", - "orig-order": 86, - "page": 6, - "span": [ - 0, - 356 + [ + "semantic", + 4695688617288377564, + "TEXT", + "#/texts/39", + "header", + 1.0 ], - "sref": "#/page-elements/86", - "text-order": 86, - "type": "paragraph" - }, - { - "bbox": [ - 369.7939453125, - 587.8507080078125, - 506.9258117675781, - 631.5213012695312 + [ + "language", + 3275001812318455279, + "TEXT", + "#/texts/40", + "en", + 0.94 ], - "iref": "#/tables/2", - "name": "table", - "orig-order": 87, - "page": 6, - "span": [ - 0, - 0 + [ + "semantic", + 3275001812318455279, + "TEXT", + "#/texts/40", + "text", + 1.0 ], - "sref": "#/page-elements/87", - "text-order": 87, - "type": "table" - }, - { - "bbox": [ - 317.9549865722656, - 505.94525146484375, - 559.6949462890625, - 568.7562255859375 + [ + "language", + 15354930767839681193, + "TEXT", + "#/texts/41", + "en", + 0.9 ], - "iref": "#/texts/58", - "name": "text", - "orig-order": 88, - "page": 6, - "span": [ - 0, - 346 + [ + "semantic", + 15354930767839681193, + "TEXT", + "#/texts/41", + "text", + 1.0 ], - "sref": "#/page-elements/88", - "text-order": 88, - "type": "paragraph" - }, - { - "bbox": [ - 317.9183349609375, - 384.9764404296875, - 559.3198852539062, - 503.0022277832031 + [ + "language", + 6337233386759158728, + "TEXT", + "#/texts/42", + "en", + 0.9 ], - "iref": "#/texts/59", - "name": "text", - "orig-order": 89, - "page": 6, - "span": [ - 0, - 689 + [ + "semantic", + 6337233386759158728, + "TEXT", + "#/texts/42", + "text", + 1.0 ], - "sref": "#/page-elements/89", - "text-order": 89, - "type": "paragraph" - }, - { - "bbox": [ - 317.87957763671875, - 351.9661865234375, - 559.6873168945312, - 382.4542236328125 + [ + "language", + 2249972239307071508, + "TEXT", + "#/texts/43", + "en", + 0.82 ], - "iref": "#/texts/60", - "name": "text", - "orig-order": 90, - "page": 6, - "span": [ - 0, - 198 + [ + "semantic", + 2249972239307071508, + "TEXT", + "#/texts/43", + "text", + 1.0 ], - "sref": "#/page-elements/90", - "text-order": 90, - "type": "paragraph" - }, - { - "bbox": [ - 317.9089660644531, - 253.72625732421875, - 558.7941284179688, - 349.57720947265625 + [ + "language", + 12383805870947794174, + "TEXT", + "#/texts/44", + "en", + 0.27 ], - "iref": "#/texts/61", - "name": "text", - "orig-order": 91, - "page": 6, - "span": [ - 0, - 558 + [ + "semantic", + 12383805870947794174, + "TEXT", + "#/texts/44", + "text", + 1.0 ], - "sref": "#/page-elements/91", - "text-order": 91, - "type": "paragraph" - }, - { - "bbox": [ - 317.6319885253906, - 165.80517578125, - 558.5486450195312, - 250.78321838378906 + [ + "language", + 7053654953998543393, + "TEXT", + "#/texts/45", + "en", + 0.58 ], - "iref": "#/texts/62", - "name": "text", - "orig-order": 92, - "page": 6, - "span": [ - 0, - 531 + [ + "semantic", + 7053654953998543393, + "TEXT", + "#/texts/45", + "text", + 1.0 ], - "sref": "#/page-elements/92", - "text-order": 92, - "type": "paragraph" - }, - { - "bbox": [ - 317.9549865722656, - 144.355712890625, - 388.1922607421875, - 154.5562744140625 + [ + "language", + 15921044595687116426, + "TEXT", + "#/texts/46", + "en", + 0.95 ], - "iref": "#/texts/63", - "name": "subtitle-level-1", - "orig-order": 93, - "page": 6, - "span": [ - 0, - 12 + [ + "semantic", + 15921044595687116426, + "TEXT", + "#/texts/46", + "text", + 0.99 ], - "sref": "#/page-elements/93", - "text-order": 93, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 317.9549865722656, - 97.82269287109375, - 558.6990966796875, - 139.1552276611328 + [ + "language", + 12234068400463628788, + "TEXT", + "#/texts/47", + "en", + 0.97 ], - "iref": "#/texts/64", - "name": "text", - "orig-order": 94, - "page": 6, - "span": [ - 0, - 277 + [ + "semantic", + 12234068400463628788, + "TEXT", + "#/texts/47", + "text", + 1.0 ], - "sref": "#/page-elements/94", - "text-order": 94, - "type": "paragraph" - }, - { - "bbox": [ - 317.54400634765625, - 83.16966247558594, - 398.95098876953125, - 89.63201904296875 + [ + "language", + 4628466594790006384, + "TEXT", + "#/texts/48", + "en", + 0.92 + ], + [ + "semantic", + 4628466594790006384, + "TEXT", + "#/texts/48", + "text", + 1.0 ], - "iref": "#/footnotes/15", - "name": "footnote", - "orig-order": 95, - "page": 6, - "span": [ - 0, - 35 + [ + "language", + 9651706913678711778, + "TEXT", + "#/texts/49", + "en", + 0.92 ], - "sref": "#/page-elements/95", - "text-order": 95, - "type": "footnote" - }, - { - "bbox": [ - 53.79800033569336, - 687.81005859375, - 296.0726318359375, - 706.829833984375 + [ + "semantic", + 9651706913678711778, + "TEXT", + "#/texts/49", + "text", + 0.99 ], - "iref": "#/texts/65", - "name": "text", - "orig-order": 96, - "page": 7, - "span": [ - 0, - 104 + [ + "language", + 1363251178266051349, + "TEXT", + "#/texts/50", + "en", + 0.91 ], - "sref": "#/page-elements/96", - "text-order": 96, - "type": "paragraph" - }, - { - "bbox": [ - 52.97157669067383, - 452.9112548828125, - 291.5167236328125, - 672.6514282226562 + [ + "semantic", + 1363251178266051349, + "TEXT", + "#/texts/50", + "text", + 0.98 ], - "iref": "#/texts/66", - "name": "text", - "orig-order": 97, - "page": 7, - "span": [ - 0, - 723 + [ + "language", + 18259197018396996238, + "TEXT", + "#/texts/51", + "en", + 0.96 ], - "sref": "#/page-elements/97", - "text-order": 97, - "type": "paragraph" - }, - { - "bbox": [ - 53.79800033569336, - 388.8714294433594, - 295.0650634765625, - 430.3962097167969 + [ + "semantic", + 18259197018396996238, + "TEXT", + "#/texts/51", + "text", + 1.0 ], - "iref": "#/texts/67", - "name": "text", - "orig-order": 98, - "page": 7, - "span": [ - 0, - 226 + [ + "language", + 14663676516964431047, + "TEXT", + "#/texts/52", + "en", + 0.95 ], - "sref": "#/page-elements/98", - "text-order": 98, - "type": "paragraph" - }, - { - "bbox": [ - 53.474998474121094, - 301.7305908203125, - 295.7899169921875, - 386.56121826171875 + [ + "semantic", + 14663676516964431047, + "TEXT", + "#/texts/52", + "text", + 0.99 ], - "iref": "#/texts/68", - "name": "text", - "orig-order": 99, - "page": 7, - "span": [ - 0, - 530 + [ + "language", + 4577067829072175096, + "TEXT", + "#/texts/53", + "en", + 0.86 ], - "sref": "#/page-elements/99", - "text-order": 99, - "type": "paragraph" - }, - { - "bbox": [ - 53.79800033569336, - 265.370849609375, - 287.7526550292969, - 288.25628662109375 + [ + "semantic", + 4577067829072175096, + "TEXT", + "#/texts/53", + "text", + 0.99 ], - "iref": "#/texts/69", - "name": "subtitle-level-1", - "orig-order": 100, - "page": 7, - "span": [ - 0, - 61 + [ + "language", + 2569392033451362672, + "TEXT", + "#/texts/54", + "en", + 0.92 ], - "sref": "#/page-elements/100", - "text-order": 100, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 53.474998474121094, - 130.9022216796875, - 295.88214111328125, - 260.0602111816406 + [ + "semantic", + 2569392033451362672, + "TEXT", + "#/texts/54", + "text", + 1.0 ], - "iref": "#/texts/70", - "name": "text", - "orig-order": 101, - "page": 7, - "span": [ - 0, - 777 + [ + "language", + 14539041145469267811, + "TEXT", + "#/texts/55", + "en", + 0.92 ], - "sref": "#/page-elements/101", - "text-order": 101, - "type": "paragraph" - }, - { - "bbox": [ - 53.79800033569336, - 107.44329833984375, - 150.55332946777344, - 117.82127380371094 + [ + "semantic", + 14539041145469267811, + "TEXT", + "#/texts/55", + "text", + 0.99 ], - "iref": "#/texts/71", - "name": "subtitle-level-1", - "orig-order": 102, - "page": 7, - "span": [ - 0, - 19 + [ + "language", + 8607014065143641201, + "TEXT", + "#/texts/56", + "en", + 0.95 ], - "sref": "#/page-elements/102", - "text-order": 102, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 53.79800033569336, - 83.70025634765625, - 295.5948791503906, - 102.80303955078125 + [ + "semantic", + 8607014065143641201, + "TEXT", + "#/texts/56", + "text", + 0.99 ], - "iref": "#/texts/72", - "name": "text", - "orig-order": 103, - "page": 7, - "span": [ - 0, - 127 + [ + "language", + 1994904537764312371, + "TEXT", + "#/texts/57", + "en", + 0.95 ], - "sref": "#/page-elements/103", - "text-order": 103, - "type": "paragraph" - }, - { - "bbox": [ - 319.4678649902344, - 591.0667114257812, - 563.418212890625, - 707.4041137695312 + [ + "semantic", + 1994904537764312371, + "TEXT", + "#/texts/57", + "text", + 1.0 ], - "iref": "#/figures/5", - "name": "picture", - "orig-order": 104, - "page": 7, - "span": [ - 0, - 0 + [ + "language", + 7742256726079628058, + "TEXT", + "#/texts/58", + "en", + 0.92 ], - "sref": "#/page-elements/104", - "text-order": 104, - "type": "figure" - }, - { - "bbox": [ - 317.9549865722656, - 491.0215148925781, - 561.2398681640625, - 575.8748168945312 + [ + "semantic", + 7742256726079628058, + "TEXT", + "#/texts/58", + "text", + 1.0 ], - "iref": "#/figures/5/captions/0", - "name": "text", - "orig-order": 105, - "page": 7, - "span": [ - 0, - 462 + [ + "language", + 8810233123818174294, + "TEXT", + "#/texts/59", + "en", + 0.96 ], - "sref": "#/page-elements/105", - "text-order": 105, - "type": "paragraph" - }, - { - "bbox": [ - 317.9549865722656, - 444.9665222167969, - 558.4959106445312, - 464.1462097167969 + [ + "semantic", + 8810233123818174294, + "TEXT", + "#/texts/59", + "text", + 1.0 ], - "iref": "#/texts/73", - "name": "text", - "orig-order": 106, - "page": 7, - "span": [ - 0, - 97 + [ + "language", + 16446711449286912460, + "TEXT", + "#/texts/60", + "en", + 0.94 ], - "sref": "#/page-elements/106", - "text-order": 106, - "type": "paragraph" - }, - { - "bbox": [ - 327.9239807128906, - 375.72027587890625, - 560.4287719726562, - 438.79620361328125 + [ + "semantic", + 16446711449286912460, + "TEXT", + "#/texts/60", + "text", + 0.98 ], - "iref": "#/texts/74", - "name": "list-item", - "orig-order": 107, - "page": 7, - "span": [ - 0, - 307 + [ + "language", + 9558434107504657973, + "TEXT", + "#/texts/61", + "en", + 0.91 ], - "sref": "#/page-elements/107", - "text-order": 107, - "type": "paragraph" - }, - { - "bbox": [ - 326.89788818359375, - 244.04925537109375, - 561.5510864257812, - 372.7772216796875 + [ + "semantic", + 9558434107504657973, + "TEXT", + "#/texts/61", + "text", + 1.0 ], - "iref": "#/texts/75", - "name": "list-item", - "orig-order": 108, - "page": 7, - "span": [ - 0, - 702 + [ + "language", + 18349896906192842040, + "TEXT", + "#/texts/62", + "en", + 0.94 ], - "sref": "#/page-elements/108", - "text-order": 108, - "type": "paragraph" - }, - { - "bbox": [ - 327.4911804199219, - 133.84515380859375, - 560.5987548828125, - 241.10621643066406 + [ + "semantic", + 18349896906192842040, + "TEXT", + "#/texts/62", + "text", + 0.99 ], - "iref": "#/texts/76", - "name": "list-item", - "orig-order": 109, - "page": 7, - "span": [ - 0, - 613 + [ + "language", + 10082834006373808153, + "TEXT", + "#/texts/63", + "en", + 0.82 ], - "sref": "#/page-elements/109", - "text-order": 109, - "type": "paragraph" - }, - { - "bbox": [ - 317.54400634765625, - 110.22366333007812, - 398.9919738769531, - 117.16583251953125 + [ + "semantic", + 10082834006373808153, + "TEXT", + "#/texts/63", + "header", + 1.0 ], - "iref": "#/footnotes/16", - "name": "footnote", - "orig-order": 110, - "page": 7, - "span": [ - 0, - 32 + [ + "language", + 15253541252152665681, + "TEXT", + "#/texts/64", + "en", + 0.89 ], - "sref": "#/page-elements/110", - "text-order": 110, - "type": "footnote" - }, - { - "bbox": [ - 317.54400634765625, - 101.18707275390625, - 400.1710205078125, - 108.2861328125 + [ + "semantic", + 15253541252152665681, + "TEXT", + "#/texts/64", + "text", + 0.99 ], - "iref": "#/footnotes/17", - "name": "footnote", - "orig-order": 111, - "page": 7, - "span": [ - 0, - 32 + [ + "language", + 3904142170608486950, + "TEXT", + "#/texts/65", + "en", + 0.78 ], - "sref": "#/page-elements/111", - "text-order": 111, - "type": "footnote" - }, - { - "bbox": [ - 317.54400634765625, - 92.2611083984375, - 382.0435791015625, - 99.16375732421875 + [ + "semantic", + 3904142170608486950, + "TEXT", + "#/texts/65", + "text", + 0.52 ], - "iref": "#/footnotes/18", - "name": "footnote", - "orig-order": 112, - "page": 7, - "span": [ - 0, - 28 + [ + "language", + 6410818076508661508, + "TEXT", + "#/texts/66", + "en", + 0.35 ], - "sref": "#/page-elements/112", - "text-order": 112, - "type": "footnote" - }, - { - "bbox": [ - 317.54400634765625, - 83.07232666015625, - 407.5936279296875, - 90.22023010253906 + [ + "semantic", + 6410818076508661508, + "TEXT", + "#/texts/66", + "text", + 0.99 ], - "iref": "#/footnotes/19", - "name": "footnote", - "orig-order": 113, - "page": 7, - "span": [ - 0, - 36 + [ + "language", + 12813875992986832439, + "TEXT", + "#/texts/67", + "en", + 0.98 ], - "sref": "#/page-elements/113", - "text-order": 113, - "type": "footnote" - }, - { - "bbox": [ - 58.86375045776367, - 545.35546875, - 300.35174560546875, - 702.7379760742188 + [ + "semantic", + 12813875992986832439, + "TEXT", + "#/texts/67", + "text", + 1.0 ], - "iref": "#/figures/6", - "name": "picture", - "orig-order": 114, - "page": 8, - "span": [ - 0, - 0 + [ + "language", + 11030869010407626539, + "TEXT", + "#/texts/68", + "en", + 0.95 ], - "sref": "#/page-elements/114", - "text-order": 114, - "type": "figure" - }, - { - "bbox": [ - 53.79800033569336, - 474.97509765625, - 297.0106506347656, - 526.871826171875 + [ + "semantic", + 11030869010407626539, + "TEXT", + "#/texts/68", + "text", + 0.99 ], - "iref": "#/figures/6/captions/0", - "name": "caption", - "orig-order": 115, - "page": 8, - "span": [ - 0, - 281 + [ + "language", + 2142320548375900929, + "TEXT", + "#/texts/69", + "en", + 0.33 ], - "sref": "#/page-elements/115", - "text-order": 115, - "type": "caption" - }, - { - "bbox": [ - 78.20700073242188, - 422.1390686035156, - 295.529052734375, - 452.20721435546875 + [ + "semantic", + 2142320548375900929, + "TEXT", + "#/texts/69", + "header", + 0.87 ], - "iref": "#/texts/77", - "name": "text", - "orig-order": 116, - "page": 8, - "span": [ - 0, - 125 + [ + "language", + 12747011194397783283, + "TEXT", + "#/texts/70", + "en", + 0.96 ], - "sref": "#/page-elements/116", - "text-order": 116, - "type": "paragraph" - }, - { - "bbox": [ - 63.44164276123047, - 300.48040771484375, - 295.35687255859375, - 419.33123779296875 + [ + "semantic", + 12747011194397783283, + "TEXT", + "#/texts/70", + "text", + 0.97 ], - "iref": "#/texts/78", - "name": "list-item", - "orig-order": 117, - "page": 8, - "span": [ - 0, - 633 + [ + "language", + 174789262945188010, + "TEXT", + "#/texts/71", + "en", + 0.62 ], - "sref": "#/page-elements/117", - "text-order": 117, - "type": "paragraph" - }, - { - "bbox": [ - 53.79800033569336, - 199.9788818359375, - 295.3085021972656, - 296.0932312011719 + [ + "semantic", + 174789262945188010, + "TEXT", + "#/texts/71", + "header", + 1.0 ], - "iref": "#/texts/79", - "name": "text", - "orig-order": 118, - "page": 8, - "span": [ - 0, - 565 + [ + "language", + 7228893318503650455, + "TEXT", + "#/texts/72", + "en", + 0.94 ], - "sref": "#/page-elements/118", - "text-order": 118, - "type": "paragraph" - }, - { - "bbox": [ - 53.79800033569336, - 101.77525329589844, - 295.5303649902344, - 197.46322631835938 + [ + "semantic", + 7228893318503650455, + "TEXT", + "#/texts/72", + "text", + 1.0 ], - "iref": "#/texts/80", - "name": "text", - "orig-order": 119, - "page": 8, - "span": [ - 0, - 605 + [ + "language", + 9230667184712205690, + "TEXT", + "#/texts/73", + "en", + 0.96 ], - "sref": "#/page-elements/119", - "text-order": 119, - "type": "paragraph" - }, - { - "bbox": [ - 53.387001037597656, - 83.2796630859375, - 137.4241180419922, - 89.7896728515625 + [ + "semantic", + 9230667184712205690, + "TEXT", + "#/texts/73", + "text", + 1.0 ], - "iref": "#/footnotes/20", - "name": "footnote", - "orig-order": 120, - "page": 8, - "span": [ - 0, - 31 + [ + "language", + 17419815751432442882, + "TEXT", + "#/texts/74", + "en", + 0.86 ], - "sref": "#/page-elements/120", - "text-order": 120, - "type": "footnote" - }, - { - "bbox": [ - 321.94073486328125, - 587.7708740234375, - 563.5105590820312, - 702.5103149414062 + [ + "semantic", + 17419815751432442882, + "TEXT", + "#/texts/74", + "text", + 0.92 ], - "iref": "#/figures/7", - "name": "picture", - "orig-order": 121, - "page": 8, - "span": [ - 0, - 0 + [ + "language", + 11194226403360998426, + "TEXT", + "#/texts/75", + "en", + 0.89 ], - "sref": "#/page-elements/121", - "text-order": 121, - "type": "figure" - }, - { - "bbox": [ - 317.9549865722656, - 538.82373046875, - 558.39794921875, - 568.872802734375 + [ + "semantic", + 11194226403360998426, + "TEXT", + "#/texts/75", + "text", + 0.99 ], - "iref": "#/figures/7/captions/0", - "name": "caption", - "orig-order": 122, - "page": 8, - "span": [ - 0, - 149 + [ + "language", + 9005324696118733701, + "TEXT", + "#/texts/76", + "en", + 0.88 ], - "sref": "#/page-elements/122", - "text-order": 122, - "type": "caption" - }, - { - "bbox": [ - 317.9549865722656, - 490.72216796875, - 558.5828857421875, - 509.7472229003906 + [ + "semantic", + 9005324696118733701, + "TEXT", + "#/texts/76", + "text", + 0.99 ], - "iref": "#/texts/81", - "name": "text", - "orig-order": 123, - "page": 8, - "span": [ - 0, - 87 + [ + "language", + 8082547756621048511, + "TEXT", + "#/texts/77", + "en", + 0.8 ], - "sref": "#/page-elements/123", - "text-order": 123, - "type": "paragraph" - }, - { - "bbox": [ - 317.95361328125, - 466.65576171875, - 398.97723388671875, - 476.83929443359375 + [ + "semantic", + 8082547756621048511, + "TEXT", + "#/texts/77", + "text", + 0.92 ], - "iref": "#/texts/82", - "name": "subtitle-level-1", - "orig-order": 124, - "page": 8, - "span": [ - 0, - 14 + [ + "language", + 7791113385466815951, + "TEXT", + "#/texts/78", + "en", + 0.92 ], - "sref": "#/page-elements/124", - "text-order": 124, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 317.9549865722656, - 408.89727783203125, - 559.3217163085938, - 463.36187744140625 + [ + "semantic", + 7791113385466815951, + "TEXT", + "#/texts/78", + "text", + 0.95 ], - "iref": "#/texts/83", - "name": "text", - "orig-order": 125, - "page": 8, - "span": [ - 0, - 302 + [ + "language", + 2845012065511066307, + "TEXT", + "#/texts/79", + "en", + 0.96 ], - "sref": "#/page-elements/125", - "text-order": 125, - "type": "paragraph" - }, - { - "bbox": [ - 317.9549865722656, - 332.0350341796875, - 559.6873779296875, - 406.2972106933594 + [ + "semantic", + 2845012065511066307, + "TEXT", + "#/texts/79", + "text", + 0.97 ], - "iref": "#/texts/84", - "name": "text", - "orig-order": 126, - "page": 8, - "span": [ - 0, - 445 + [ + "language", + 15072914837937068796, + "TEXT", + "#/texts/80", + "en", + 0.95 ], - "sref": "#/page-elements/126", - "text-order": 126, - "type": "paragraph" - }, - { - "bbox": [ - 317.9181823730469, - 277.7332763671875, - 559.6890869140625, - 329.584228515625 + [ + "semantic", + 15072914837937068796, + "TEXT", + "#/texts/80", + "text", + 0.96 ], - "iref": "#/texts/85", - "name": "text", - "orig-order": 127, - "page": 8, - "span": [ - 0, - 307 + [ + "language", + 15263283599394646155, + "TEXT", + "#/texts/81", + "en", + 0.98 ], - "sref": "#/page-elements/127", - "text-order": 127, - "type": "paragraph" - }, - { - "bbox": [ - 317.6409912109375, - 200.8416748046875, - 559.6902465820312, - 274.79022216796875 + [ + "semantic", + 15263283599394646155, + "TEXT", + "#/texts/81", + "text", + 0.97 ], - "iref": "#/texts/86", - "name": "text", - "orig-order": 128, - "page": 8, - "span": [ - 0, - 438 + [ + "language", + 11417717357379295278, + "TEXT", + "#/texts/82", + "en", + 0.84 ], - "sref": "#/page-elements/128", - "text-order": 128, - "type": "paragraph" - }, - { - "bbox": [ - 317.9549865722656, - 177.281005859375, - 438.01214599609375, - 187.37762451171875 + [ + "semantic", + 11417717357379295278, + "TEXT", + "#/texts/82", + "header", + 1.0 ], - "iref": "#/texts/87", - "name": "subtitle-level-1", - "orig-order": 129, - "page": 8, - "span": [ - 0, - 22 + [ + "language", + 9031137420247852045, + "TEXT", + "#/texts/83", + "en", + 0.85 ], - "sref": "#/page-elements/129", - "text-order": 129, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 317.7309875488281, - 119.96771240234375, - 558.4498291015625, - 171.94122314453125 + [ + "semantic", + 9031137420247852045, + "TEXT", + "#/texts/83", + "text", + 1.0 ], - "iref": "#/texts/88", - "name": "text", - "orig-order": 130, - "page": 8, - "span": [ - 0, - 320 + [ + "language", + 18436578077535696718, + "TEXT", + "#/texts/84", + "en", + 0.94 ], - "sref": "#/page-elements/130", - "text-order": 130, - "type": "paragraph" - }, - { - "bbox": [ - 317.54400634765625, - 93.00566101074219, - 382.23095703125, - 99.5784912109375 + [ + "semantic", + 18436578077535696718, + "TEXT", + "#/texts/84", + "text", + 0.94 ], - "iref": "#/footnotes/21", - "name": "footnote", - "orig-order": 131, - "page": 8, - "span": [ - 0, - 29 + [ + "language", + 11734907767490759865, + "TEXT", + "#/texts/85", + "en", + 0.91 ], - "sref": "#/page-elements/131", - "text-order": 131, - "type": "footnote" - }, - { - "bbox": [ - 317.54400634765625, - 84.25965881347656, - 382.0310363769531, - 90.77301025390625 + [ + "semantic", + 11734907767490759865, + "TEXT", + "#/texts/85", + "text", + 0.98 ], - "iref": "#/footnotes/22", - "name": "footnote", - "orig-order": 132, - "page": 8, - "span": [ - 0, - 27 + [ + "language", + 7845460979782401889, + "TEXT", + "#/texts/86", + "en", + 0.94 ], - "sref": "#/page-elements/132", - "text-order": 132, - "type": "footnote" - }, - { - "bbox": [ - 53.79800033569336, - 619.7022094726562, - 295.08966064453125, - 706.5369262695312 + [ + "semantic", + 7845460979782401889, + "TEXT", + "#/texts/86", + "text", + 1.0 ], - "iref": "#/texts/89", - "name": "text", - "orig-order": 133, - "page": 9, - "span": [ - 0, - 504 + [ + "language", + 17769988780693768120, + "TEXT", + "#/texts/87", + "en", + 0.39 ], - "sref": "#/page-elements/133", - "text-order": 133, - "type": "paragraph" - }, - { - "bbox": [ - 53.474998474121094, - 421.4378662109375, - 295.7029724121094, - 616.7592163085938 + [ + "semantic", + 17769988780693768120, + "TEXT", + "#/texts/87", + "header", + 1.0 ], - "iref": "#/texts/90", - "name": "text", - "orig-order": 134, - "page": 9, - "span": [ - 0, - 1164 + [ + "language", + 12387489643011067991, + "TEXT", + "#/texts/88", + "en", + 0.93 ], - "sref": "#/page-elements/134", - "text-order": 134, - "type": "paragraph" - }, - { - "bbox": [ - 53.71706771850586, - 396.8446350097656, - 144.1709442138672, - 407.8390808105469 + [ + "semantic", + 12387489643011067991, + "TEXT", + "#/texts/88", + "text", + 1.0 ], - "iref": "#/texts/91", - "name": "subtitle-level-1", - "orig-order": 135, - "page": 9, - "span": [ - 0, - 12 + [ + "language", + 10375772475809458895, + "TEXT", + "#/texts/89", + "en", + 0.99 ], - "sref": "#/page-elements/135", - "text-order": 135, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 53.37699890136719, - 340.66925048828125, - 295.537841796875, - 392.5202331542969 + [ + "semantic", + 10375772475809458895, + "TEXT", + "#/texts/89", + "text", + 0.99 ], - "iref": "#/texts/92", - "name": "text", - "orig-order": 136, - "page": 9, - "span": [ - 0, - 276 + [ + "language", + 7054726458191881751, + "TEXT", + "#/texts/90", + "en", + 0.94 ], - "sref": "#/page-elements/136", - "text-order": 136, - "type": "paragraph" - }, - { - "bbox": [ - 53.79800033569336, - 263.8846435546875, - 295.6170654296875, - 337.7262268066406 + [ + "semantic", + 7054726458191881751, + "TEXT", + "#/texts/90", + "text", + 0.99 ], - "iref": "#/texts/93", - "name": "text", - "orig-order": 137, - "page": 9, - "span": [ - 0, - 468 + [ + "language", + 7794115281016062068, + "TEXT", + "#/texts/91", + "en", + 0.39 ], - "sref": "#/page-elements/137", - "text-order": 137, - "type": "paragraph" - }, - { - "bbox": [ - 53.79800033569336, - 131.818603515625, - 295.61065673828125, - 261.0132141113281 + [ + "semantic", + 7794115281016062068, + "TEXT", + "#/texts/91", + "header", + 1.0 ], - "iref": "#/texts/94", - "name": "text", - "orig-order": 138, - "page": 9, - "span": [ - 0, - 808 + [ + "language", + 7038163015905900647, + "TEXT", + "#/texts/92", + "en", + 0.92 ], - "sref": "#/page-elements/138", - "text-order": 138, - "type": "paragraph" - }, - { - "bbox": [ - 53.79800033569336, - 84.15838623046875, - 295.2939147949219, - 106.9937744140625 + [ + "semantic", + 7038163015905900647, + "TEXT", + "#/texts/92", + "text", + 0.98 ], - "iref": "#/footnotes/23", - "name": "footnote", - "orig-order": 139, - "page": 9, - "span": [ - 0, - 237 + [ + "language", + 1508626318915838319, + "TEXT", + "#/texts/93", + "en", + 0.95 ], - "sref": "#/page-elements/139", - "text-order": 139, - "type": "footnote" - }, - { - "bbox": [ - 317.6944274902344, - 684.9768676757812, - 559.6907348632812, - 704.4302368164062 + [ + "semantic", + 1508626318915838319, + "TEXT", + "#/texts/93", + "text", + 0.94 ], - "iref": "#/texts/95", - "name": "text", - "orig-order": 140, - "page": 9, - "span": [ - 0, - 119 + [ + "language", + 17247086344435786796, + "TEXT", + "#/texts/94", + "en", + 0.93 ], - "sref": "#/page-elements/140", - "text-order": 140, - "type": "paragraph" - }, - { - "bbox": [ - 317.9549865722656, - 663.3440551757812, - 438.23162841796875, - 673.7493286132812 + [ + "semantic", + 17247086344435786796, + "TEXT", + "#/texts/94", + "text", + 1.0 ], - "iref": "#/texts/96", - "name": "subtitle-level-1", - "orig-order": 141, - "page": 9, - "span": [ - 0, - 15 + [ + "language", + 10287541089279789496, + "TEXT", + "#/texts/95", + "en", + 0.83 ], - "sref": "#/page-elements/141", - "text-order": 141, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 317.677001953125, - 639.6272583007812, - 564.3941040039062, - 658.7048950195312 + [ + "semantic", + 10287541089279789496, + "TEXT", + "#/texts/95", + "text", + 0.94 ], - "iref": "#/texts/97", - "name": "text", - "orig-order": 142, - "page": 9, - "span": [ - 0, - 127 + [ + "language", + 7819882792760965882, + "TEXT", + "#/texts/96", + "en", + 0.25 ], - "sref": "#/page-elements/142", - "text-order": 142, - "type": "paragraph" - }, - { - "bbox": [ - 317.45098876953125, - 584.833251953125, - 571.9612426757812, - 636.8023071289062 + [ + "semantic", + 7819882792760965882, + "TEXT", + "#/texts/96", + "header", + 1.0 ], - "iref": "#/texts/98", - "name": "text", - "orig-order": 143, - "page": 9, - "span": [ - 0, - 269 + [ + "language", + 15983582675278266440, + "TEXT", + "#/texts/97", + "en", + 0.95 ], - "sref": "#/page-elements/143", - "text-order": 143, - "type": "paragraph" - }, - { - "bbox": [ - 317.9549865722656, - 563.3189697265625, - 391.296875, - 573.2864990234375 + [ + "semantic", + 15983582675278266440, + "TEXT", + "#/texts/97", + "text", + 0.99 ], - "iref": "#/texts/99", - "name": "subtitle-level-1", - "orig-order": 144, - "page": 9, - "span": [ - 0, - 10 + [ + "language", + 12711351442546714716, + "TEXT", + "#/texts/98", + "en", + 0.93 ], - "sref": "#/page-elements/144", - "text-order": 144, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 320.02252197265625, - 529.3375854492188, - 560.26220703125, - 559.6171875 + [ + "semantic", + 12711351442546714716, + "TEXT", + "#/texts/98", + "text", + 1.0 ], - "iref": "#/texts/100", - "name": "list-item", - "orig-order": 145, - "page": 9, - "span": [ - 0, - 280 + [ + "language", + 1225384713519841338, + "TEXT", + "#/texts/99", + "en", + 0.33 ], - "sref": "#/page-elements/145", - "text-order": 145, - "type": "paragraph" - }, - { - "bbox": [ - 321.42291259765625, - 513.4962158203125, - 559.0736694335938, - 527.7372436523438 + [ + "semantic", + 1225384713519841338, + "TEXT", + "#/texts/99", + "header", + 1.0 ], - "iref": "#/texts/101", - "name": "list-item", - "orig-order": 146, - "page": 9, - "span": [ - 0, - 122 + [ + "language", + 1712774266196702392, + "TEXT", + "#/texts/100", + "en", + 0.65 ], - "sref": "#/page-elements/146", - "text-order": 146, - "type": "paragraph" - }, - { - "bbox": [ - 321.3348388671875, - 489.4542236328125, - 559.13916015625, - 511.7962341308594 + [ + "semantic", + 1712774266196702392, + "TEXT", + "#/texts/100", + "reference", + 0.96 ], - "iref": "#/texts/102", - "name": "list-item", - "orig-order": 147, - "page": 9, - "span": [ - 0, - 164 + [ + "language", + 14718288547983000340, + "TEXT", + "#/texts/101", + "en", + 0.58 ], - "sref": "#/page-elements/147", - "text-order": 147, - "type": "paragraph" - }, - { - "bbox": [ - 321.1488342285156, - 457.43511962890625, - 559.233154296875, - 487.88623046875 + [ + "semantic", + 14718288547983000340, + "TEXT", + "#/texts/101", + "text", + 0.61 ], - "iref": "#/texts/103", - "name": "list-item", - "orig-order": 148, - "page": 9, - "span": [ - 0, - 282 + [ + "language", + 16943780574244090186, + "TEXT", + "#/texts/102", + "en", + 0.67 ], - "sref": "#/page-elements/148", - "text-order": 148, - "type": "paragraph" - }, - { - "bbox": [ - 321.2120056152344, - 433.30517578125, - 559.0735473632812, - 456.0062255859375 + [ + "semantic", + 16943780574244090186, + "TEXT", + "#/texts/102", + "reference", + 0.78 ], - "iref": "#/texts/104", - "name": "list-item", - "orig-order": 149, - "page": 9, - "span": [ - 0, - 224 + [ + "language", + 8004985786049140169, + "TEXT", + "#/texts/103", + "en", + 0.34 ], - "sref": "#/page-elements/149", - "text-order": 149, - "type": "paragraph" - }, - { - "bbox": [ - 321.4419860839844, - 409.4002685546875, - 558.4588012695312, - 432.0952453613281 + [ + "semantic", + 8004985786049140169, + "TEXT", + "#/texts/103", + "text", + 0.49 ], - "iref": "#/texts/105", - "name": "list-item", - "orig-order": 150, - "page": 9, - "span": [ - 0, - 233 + [ + "language", + 12744546813104546377, + "TEXT", + "#/texts/104", + "en", + 0.48 ], - "sref": "#/page-elements/150", - "text-order": 150, - "type": "paragraph" - }, - { - "bbox": [ - 320.9912109375, - 378.0406494140625, - 559.8010864257812, - 408.18524169921875 + [ + "semantic", + 12744546813104546377, + "TEXT", + "#/texts/104", + "text", + 0.61 ], - "iref": "#/texts/106", - "name": "list-item", - "orig-order": 151, - "page": 9, - "span": [ - 0, - 250 + [ + "language", + 16061746189176848219, + "TEXT", + "#/texts/105", + "en", + 0.63 ], - "sref": "#/page-elements/151", - "text-order": 151, - "type": "paragraph" - }, - { - "bbox": [ - 321.1594543457031, - 346.1596374511719, - 560.1024780273438, - 376.44775390625 + [ + "semantic", + 16061746189176848219, + "TEXT", + "#/texts/105", + "reference", + 0.58 ], - "iref": "#/texts/107", - "name": "list-item", - "orig-order": 152, - "page": 9, - "span": [ - 0, - 227 + [ + "language", + 11872392946390819176, + "TEXT", + "#/texts/106", + "en", + 0.39 ], - "sref": "#/page-elements/152", - "text-order": 152, - "type": "paragraph" - }, - { - "bbox": [ - 320.2006530761719, - 330.21966552734375, - 558.261474609375, - 344.5801696777344 + [ + "semantic", + 11872392946390819176, + "TEXT", + "#/texts/106", + "reference", + 0.6 ], - "iref": "#/texts/108", - "name": "list-item", - "orig-order": 153, - "page": 9, - "span": [ - 0, - 116 + [ + "language", + 2956849475535726296, + "TEXT", + "#/texts/107", + "en", + 0.63 ], - "sref": "#/page-elements/153", - "text-order": 153, - "type": "paragraph" - }, - { - "bbox": [ - 317.95501708984375, - 274.36358642578125, - 572.77392578125, - 328.4842529296875 + [ + "semantic", + 2956849475535726296, + "TEXT", + "#/texts/107", + "reference", + 0.79 ], - "iref": "#/texts/109", - "name": "list-item", - "orig-order": 154, - "page": 9, - "span": [ - 0, - 425 + [ + "language", + 6623297047995432604, + "TEXT", + "#/texts/108", + "en", + 0.44 ], - "sref": "#/page-elements/154", - "text-order": 154, - "type": "paragraph" - }, - { - "bbox": [ - 317.0665588378906, - 250.09552001953125, - 560.9763793945312, - 272.6932373046875 + [ + "semantic", + 6623297047995432604, + "TEXT", + "#/texts/108", + "reference", + 0.69 ], - "iref": "#/texts/110", - "name": "list-item", - "orig-order": 155, - "page": 9, - "span": [ - 0, - 166 + [ + "language", + 2507285765516108280, + "TEXT", + "#/texts/109", + "en", + 0.59 + ], + [ + "semantic", + 2507285765516108280, + "TEXT", + "#/texts/109", + "reference", + 0.68 ], - "sref": "#/page-elements/155", - "text-order": 155, - "type": "paragraph" - } - ], - "page-footers": [], - "page-headers": [], - "properties": { - "data": [ [ "language", + 14905276480471286920, + "TEXT", + "#/texts/110", "en", - 0.8799999952316284 + 0.48 ], [ "semantic", - "text", - 0.9200000166893005 + 14905276480471286920, + "TEXT", + "#/texts/110", + "reference", + 0.89 ] ], "headers": [ "type", + "subj_hash", + "subj_name", + "subj_path", "label", "confidence" ] }, + "sref": "#", "tables": [ { "#-cols": 5, @@ -87206,12 +93087,13 @@ "$ref": "#/page-elements/74" } ], + "sref": "#/tables/0/captions/0", "text": "Table 1: Time-to-solution and performance results for the Faster RCNN and YOLOv2 models. The training of the models was done on 25000 PDF pages. The prediction (per page) and performance numbers (Recall= \u211b and Precision= \ud835\udcab) were obtained on 5000 page images, where the prediction confidence cutoff was tuned to yield the maximum F1 score for each. All time-to-solution measurements for training were obtained on a POWER8 node with a single Pascal P100 GPU.", "text-hash": 17279509228359814482, "type": "paragraph" } ], - "confidence": 0.9700000286102295, + "confidence": 0.97, "created_by": "high_conf_pred", "data": [ [ @@ -87240,10 +93122,10 @@ }, { "bbox": [ - 410.43499755859375, - 595.8622436523438, - 469.40704345703125, - 603.8782348632812 + 410.43, + 595.86, + 469.41, + 603.88 ], "col": 1, "col-header": false, @@ -87272,10 +93154,10 @@ }, { "bbox": [ - 410.43499755859375, - 595.8622436523438, - 469.40704345703125, - 603.8782348632812 + 410.43, + 595.86, + 469.41, + 603.88 ], "col": 2, "col-header": false, @@ -87304,10 +93186,10 @@ }, { "bbox": [ - 488.8641357421875, - 595.8622436523438, - 534.6734619140625, - 603.8782348632812 + 488.86, + 595.86, + 534.67, + 603.88 ], "col": 3, "col-header": false, @@ -87336,10 +93218,10 @@ }, { "bbox": [ - 488.8641357421875, - 595.8622436523438, - 534.6734619140625, - 603.8782348632812 + 488.86, + 595.86, + 534.67, + 603.88 ], "col": 4, "col-header": false, @@ -87393,10 +93275,10 @@ }, { "bbox": [ - 401.5090026855469, - 584.9032592773438, - 432.07550048828125, - 592.9192504882812 + 401.51, + 584.9, + 432.08, + 592.92 ], "col": 1, "col-header": false, @@ -87421,10 +93303,10 @@ }, { "bbox": [ - 442.03717041015625, - 584.9032592773438, - 478.8980407714844, - 592.9192504882812 + 442.04, + 584.9, + 478.9, + 592.92 ], "col": 2, "col-header": false, @@ -87449,10 +93331,10 @@ }, { "bbox": [ - 494.74798583984375, - 584.8943481445312, - 503.8273620605469, - 592.9640502929688 + 494.75, + 584.89, + 503.83, + 592.96 ], "col": 3, "col-header": false, @@ -87477,10 +93359,10 @@ }, { "bbox": [ - 526.4854736328125, - 584.8943481445312, - 534.10693359375, - 592.9640502929688 + 526.49, + 584.89, + 534.11, + 592.96 ], "col": 4, "col-header": false, @@ -87507,10 +93389,10 @@ [ { "bbox": [ - 341.4880065917969, - 569.4572143554688, - 390.9825439453125, - 577.4732055664062 + 341.49, + 569.46, + 390.98, + 577.47 ], "col": 0, "col-header": false, @@ -87535,10 +93417,10 @@ }, { "bbox": [ - 400.9442138671875, - 569.4572143554688, - 432.0755615234375, - 577.4732055664062 + 400.94, + 569.46, + 432.08, + 577.47 ], "col": 1, "col-header": false, @@ -87563,10 +93445,10 @@ }, { "bbox": [ - 460.72320556640625, - 569.4572143554688, - 478.89813232421875, - 577.4732055664062 + 460.72, + 569.46, + 478.9, + 577.47 ], "col": 2, "col-header": false, @@ -87591,10 +93473,10 @@ }, { "bbox": [ - 488.85980224609375, - 569.4572143554688, - 504.5509948730469, - 577.4732055664062 + 488.86, + 569.46, + 504.55, + 577.47 ], "col": 3, "col-header": false, @@ -87619,10 +93501,10 @@ }, { "bbox": [ - 518.9779052734375, - 569.4572143554688, - 534.6691284179688, - 577.4732055664062 + 518.98, + 569.46, + 534.67, + 577.47 ], "col": 4, "col-header": false, @@ -87649,10 +93531,10 @@ [ { "bbox": [ - 341.4880065917969, - 558.4982299804688, - 375.08514404296875, - 566.5142211914062 + 341.49, + 558.5, + 375.09, + 566.51 ], "col": 0, "col-header": false, @@ -87677,10 +93559,10 @@ }, { "bbox": [ - 405.42742919921875, - 558.4982299804688, - 432.0755920410156, - 566.5142211914062 + 405.43, + 558.5, + 432.08, + 566.51 ], "col": 1, "col-header": false, @@ -87705,10 +93587,10 @@ }, { "bbox": [ - 453.9984436035156, - 558.4982299804688, - 478.8981628417969, - 566.5142211914062 + 454.0, + 558.5, + 478.9, + 566.51 ], "col": 2, "col-header": false, @@ -87733,10 +93615,10 @@ }, { "bbox": [ - 489.5060119628906, - 558.3368530273438, - 504.5517578125, - 566.720458984375 + 489.51, + 558.34, + 504.55, + 566.72 ], "col": 3, "col-header": false, @@ -87761,10 +93643,10 @@ }, { "bbox": [ - 519.624267578125, - 558.3368530273438, - 534.6697387695312, - 566.720458984375 + 519.62, + 558.34, + 534.67, + 566.72 ], "col": 4, "col-header": false, @@ -87793,25 +93675,12 @@ "footnotes": [], "hash": 16709517892596982787, "mentions": [], - "properties": { - "data": [ - [ - "language", - "en", - 0.75 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/75" } ], + "sref": "#/tables/0", "type": "table" }, { @@ -87827,12 +93696,13 @@ "$ref": "#/page-elements/86" } ], + "sref": "#/tables/1/captions/0", "text": "Table 3: Comparison for two different journal templates showing the aggregated precision and recall averaged over all labels. Each model has been independently trained on a dataset of 400 pages each. The results show that the ML algorithm proves to perform very well for the multiple document templates, simply by providing a different dataset to train on.", "text-hash": 8085176655901164108, "type": "paragraph" } ], - "confidence": 0.9900000095367432, + "confidence": 0.99, "created_by": "high_conf_pred", "data": [ [ @@ -87884,10 +93754,10 @@ }, { "bbox": [ - 177.53599548339844, - 628.7382202148438, - 230.57225036621094, - 636.7542114257812 + 177.54, + 628.74, + 230.57, + 636.75 ], "col": 2, "col-header": false, @@ -87932,10 +93802,10 @@ }, { "bbox": [ - 177.53599548339844, - 628.7382202148438, - 230.57225036621094, - 636.7542114257812 + 177.54, + 628.74, + 230.57, + 636.75 ], "col": 3, "col-header": false, @@ -87980,10 +93850,10 @@ }, { "bbox": [ - 177.53599548339844, - 628.7382202148438, - 230.57225036621094, - 636.7542114257812 + 177.54, + 628.74, + 230.57, + 636.75 ], "col": 4, "col-header": false, @@ -88028,10 +93898,10 @@ }, { "bbox": [ - 177.53599548339844, - 628.7382202148438, - 230.57225036621094, - 636.7542114257812 + 177.54, + 628.74, + 230.57, + 636.75 ], "col": 5, "col-header": false, @@ -88076,10 +93946,10 @@ }, { "bbox": [ - 177.53599548339844, - 628.7382202148438, - 230.57225036621094, - 636.7542114257812 + 177.54, + 628.74, + 230.57, + 636.75 ], "col": 6, "col-header": false, @@ -88124,10 +93994,10 @@ }, { "bbox": [ - 177.53599548339844, - 628.7382202148438, - 230.57225036621094, - 636.7542114257812 + 177.54, + 628.74, + 230.57, + 636.75 ], "col": 7, "col-header": false, @@ -88197,10 +94067,10 @@ }, { "bbox": [ - 120.86475372314453, - 597.093994140625, - 128.8807373046875, - 613.7177124023438 + 120.86, + 597.09, + 128.88, + 613.72 ], "col": 1, "col-header": false, @@ -88248,10 +94118,10 @@ }, { "bbox": [ - 151.00177001953125, - 597.093994140625, - 159.01776123046875, - 622.4957885742188 + 151.0, + 597.09, + 159.02, + 622.5 ], "col": 3, "col-header": false, @@ -88276,10 +94146,10 @@ }, { "bbox": [ - 181.1387939453125, - 597.093994140625, - 189.15478515625, - 624.9974365234375 + 181.14, + 597.09, + 189.15, + 625.0 ], "col": 4, "col-header": false, @@ -88304,10 +94174,10 @@ }, { "bbox": [ - 204.30877685546875, - 596.9730224609375, - 212.32476806640625, - 612.6463012695312 + 204.31, + 596.97, + 212.32, + 612.65 ], "col": 5, "col-header": false, @@ -88332,10 +94202,10 @@ }, { "bbox": [ - 236.92974853515625, - 597.093994140625, - 244.94573974609375, - 622.495849609375 + 236.93, + 597.09, + 244.95, + 622.5 ], "col": 6, "col-header": false, @@ -88360,10 +94230,10 @@ }, { "bbox": [ - 267.0667724609375, - 597.093994140625, - 275.082763671875, - 616.793212890625 + 267.07, + 597.09, + 275.08, + 616.79 ], "col": 7, "col-header": false, @@ -88390,10 +94260,10 @@ [ { "bbox": [ - 60.60176086425781, - 541.1279907226562, - 68.61771392822266, - 574.7430419921875 + 60.6, + 541.13, + 68.62, + 574.74 ], "col": 0, "col-header": false, @@ -88446,10 +94316,10 @@ }, { "bbox": [ - 77.13200378417969, - 580.8052368164062, - 93.75570678710938, - 588.8212280273438 + 77.13, + 580.81, + 93.76, + 588.82 ], "col": 1, "col-header": false, @@ -88474,10 +94344,10 @@ }, { "bbox": [ - 120.86499786376953, - 580.8052368164062, - 129.83139038085938, - 588.8212280273438 + 120.86, + 580.81, + 129.83, + 588.82 ], "col": 2, "col-header": false, @@ -88502,10 +94372,10 @@ }, { "bbox": [ - 151.00106811523438, - 580.8052368164062, - 155.48426818847656, - 588.8212280273438 + 151.0, + 580.81, + 155.48, + 588.82 ], "col": 3, "col-header": false, @@ -88530,10 +94400,10 @@ }, { "bbox": [ - 181.13714599609375, - 580.8052368164062, - 185.62034606933594, - 588.8212280273438 + 181.14, + 580.81, + 185.62, + 588.82 ], "col": 4, "col-header": false, @@ -88558,10 +94428,10 @@ }, { "bbox": [ - 204.5484161376953, - 580.8052368164062, - 209.0316162109375, - 588.8212280273438 + 204.55, + 580.81, + 209.03, + 588.82 ], "col": 5, "col-header": false, @@ -88586,10 +94456,10 @@ }, { "bbox": [ - 236.92608642578125, - 580.8052368164062, - 241.40928649902344, - 588.8212280273438 + 236.93, + 580.81, + 241.41, + 588.82 ], "col": 6, "col-header": false, @@ -88614,10 +94484,10 @@ }, { "bbox": [ - 267.0711364746094, - 580.8052368164062, - 271.5543212890625, - 588.8212280273438 + 267.07, + 580.81, + 271.55, + 588.82 ], "col": 7, "col-header": false, @@ -88644,10 +94514,10 @@ [ { "bbox": [ - 60.60176086425781, - 541.1279907226562, - 102.5338134765625, - 577.8622436523438 + 60.6, + 541.13, + 102.53, + 577.86 ], "col": 0, "col-header": false, @@ -88700,10 +94570,10 @@ }, { "bbox": [ - 120.86499786376953, - 569.8462524414062, - 125.34819793701172, - 577.8622436523438 + 120.86, + 569.85, + 125.35, + 577.86 ], "col": 1, "col-header": false, @@ -88728,10 +94598,10 @@ }, { "bbox": [ - 151.00106811523438, - 569.8462524414062, - 164.45066833496094, - 577.8622436523438 + 151.0, + 569.85, + 164.45, + 577.86 ], "col": 2, "col-header": false, @@ -88756,10 +94626,10 @@ }, { "bbox": [ - 181.13714599609375, - 569.8462524414062, - 185.62034606933594, - 577.8622436523438 + 181.14, + 569.85, + 185.62, + 577.86 ], "col": 3, "col-header": false, @@ -88784,10 +94654,10 @@ }, { "bbox": [ - 204.5484161376953, - 569.8462524414062, - 209.0316162109375, - 577.8622436523438 + 204.55, + 569.85, + 209.03, + 577.86 ], "col": 4, "col-header": false, @@ -88812,10 +94682,10 @@ }, { "bbox": [ - 236.92608642578125, - 569.8462524414062, - 241.40928649902344, - 577.8622436523438 + 236.93, + 569.85, + 241.41, + 577.86 ], "col": 5, "col-header": false, @@ -88840,10 +94710,10 @@ }, { "bbox": [ - 267.0711364746094, - 569.8462524414062, - 271.5543212890625, - 577.8622436523438 + 267.07, + 569.85, + 271.55, + 577.86 ], "col": 6, "col-header": false, @@ -88893,10 +94763,10 @@ [ { "bbox": [ - 60.60176086425781, - 541.1279907226562, - 105.03543853759766, - 574.7430419921875 + 60.6, + 541.13, + 105.04, + 574.74 ], "col": 0, "col-header": false, @@ -88949,10 +94819,10 @@ }, { "bbox": [ - 120.86499786376953, - 558.88720703125, - 125.34819793701172, - 566.9031982421875 + 120.86, + 558.89, + 125.35, + 566.9 ], "col": 1, "col-header": false, @@ -88977,10 +94847,10 @@ }, { "bbox": [ - 151.00106811523438, - 558.88720703125, - 155.48426818847656, - 566.9031982421875 + 151.0, + 558.89, + 155.48, + 566.9 ], "col": 2, "col-header": false, @@ -89005,10 +94875,10 @@ }, { "bbox": [ - 181.13714599609375, - 558.88720703125, - 194.5867462158203, - 566.9031982421875 + 181.14, + 558.89, + 194.59, + 566.9 ], "col": 3, "col-header": false, @@ -89033,10 +94903,10 @@ }, { "bbox": [ - 204.5484161376953, - 558.88720703125, - 209.0316162109375, - 566.9031982421875 + 204.55, + 558.89, + 209.03, + 566.9 ], "col": 4, "col-header": false, @@ -89061,10 +94931,10 @@ }, { "bbox": [ - 236.92608642578125, - 558.88720703125, - 241.40928649902344, - 566.9031982421875 + 236.93, + 558.89, + 241.41, + 566.9 ], "col": 5, "col-header": false, @@ -89089,10 +94959,10 @@ }, { "bbox": [ - 267.0711364746094, - 558.88720703125, - 271.5543212890625, - 566.9031982421875 + 267.07, + 558.89, + 271.55, + 566.9 ], "col": 6, "col-header": false, @@ -89142,10 +95012,10 @@ [ { "bbox": [ - 60.60176086425781, - 541.1279907226562, - 92.80526733398438, - 574.7430419921875 + 60.6, + 541.13, + 92.81, + 574.74 ], "col": 0, "col-header": false, @@ -89198,10 +95068,10 @@ }, { "bbox": [ - 120.86499786376953, - 547.92822265625, - 125.34819793701172, - 555.9442138671875 + 120.86, + 547.93, + 125.35, + 555.94 ], "col": 1, "col-header": false, @@ -89226,10 +95096,10 @@ }, { "bbox": [ - 151.00106811523438, - 547.92822265625, - 159.96746826171875, - 555.9442138671875 + 151.0, + 547.93, + 159.97, + 555.94 ], "col": 2, "col-header": false, @@ -89254,10 +95124,10 @@ }, { "bbox": [ - 181.13714599609375, - 547.92822265625, - 185.62034606933594, - 555.9442138671875 + 181.14, + 547.93, + 185.62, + 555.94 ], "col": 3, "col-header": false, @@ -89282,10 +95152,10 @@ }, { "bbox": [ - 204.5484161376953, - 547.92822265625, - 226.96441650390625, - 555.9442138671875 + 204.55, + 547.93, + 226.96, + 555.94 ], "col": 4, "col-header": false, @@ -89310,10 +95180,10 @@ }, { "bbox": [ - 236.92608642578125, - 547.92822265625, - 245.89248657226562, - 555.9442138671875 + 236.93, + 547.93, + 245.89, + 555.94 ], "col": 5, "col-header": false, @@ -89338,10 +95208,10 @@ }, { "bbox": [ - 267.0711364746094, - 547.92822265625, - 271.5543212890625, - 555.9442138671875 + 267.07, + 547.93, + 271.55, + 555.94 ], "col": 6, "col-header": false, @@ -89391,10 +95261,10 @@ [ { "bbox": [ - 60.60176086425781, - 536.96923828125, - 102.5338134765625, - 574.7430419921875 + 60.6, + 536.97, + 102.53, + 574.74 ], "col": 0, "col-header": false, @@ -89447,10 +95317,10 @@ }, { "bbox": [ - 120.86499786376953, - 536.96923828125, - 125.34819793701172, - 544.9852294921875 + 120.86, + 536.97, + 125.35, + 544.99 ], "col": 1, "col-header": false, @@ -89475,10 +95345,10 @@ }, { "bbox": [ - 151.00106811523438, - 536.96923828125, - 155.48426818847656, - 544.9852294921875 + 151.0, + 536.97, + 155.48, + 544.99 ], "col": 2, "col-header": false, @@ -89503,10 +95373,10 @@ }, { "bbox": [ - 181.13714599609375, - 536.96923828125, - 185.62034606933594, - 544.9852294921875 + 181.14, + 536.97, + 185.62, + 544.99 ], "col": 3, "col-header": false, @@ -89531,10 +95401,10 @@ }, { "bbox": [ - 204.5484161376953, - 536.96923828125, - 209.0316162109375, - 544.9852294921875 + 204.55, + 536.97, + 209.03, + 544.99 ], "col": 4, "col-header": false, @@ -89559,10 +95429,10 @@ }, { "bbox": [ - 236.92608642578125, - 536.96923828125, - 254.85888671875, - 544.9852294921875 + 236.93, + 536.97, + 254.86, + 544.99 ], "col": 5, "col-header": false, @@ -89587,10 +95457,10 @@ }, { "bbox": [ - 267.0711364746094, - 536.96923828125, - 276.03753662109375, - 544.9852294921875 + 267.07, + 536.97, + 276.04, + 544.99 ], "col": 6, "col-header": false, @@ -89640,10 +95510,10 @@ [ { "bbox": [ - 60.60176086425781, - 526.01025390625, - 96.8311767578125, - 574.7430419921875 + 60.6, + 526.01, + 96.83, + 574.74 ], "col": 0, "col-header": false, @@ -89696,10 +95566,10 @@ }, { "bbox": [ - 120.86499786376953, - 526.01025390625, - 125.34819793701172, - 534.0262451171875 + 120.86, + 526.01, + 125.35, + 534.03 ], "col": 1, "col-header": false, @@ -89724,10 +95594,10 @@ }, { "bbox": [ - 151.00106811523438, - 526.01025390625, - 155.48426818847656, - 534.0262451171875 + 151.0, + 526.01, + 155.48, + 534.03 ], "col": 2, "col-header": false, @@ -89752,10 +95622,10 @@ }, { "bbox": [ - 181.13714599609375, - 526.01025390625, - 185.62034606933594, - 534.0262451171875 + 181.14, + 526.01, + 185.62, + 534.03 ], "col": 3, "col-header": false, @@ -89780,10 +95650,10 @@ }, { "bbox": [ - 204.5484161376953, - 526.01025390625, - 209.0316162109375, - 534.0262451171875 + 204.55, + 526.01, + 209.03, + 534.03 ], "col": 4, "col-header": false, @@ -89808,10 +95678,10 @@ }, { "bbox": [ - 236.92608642578125, - 526.01025390625, - 241.40928649902344, - 534.0262451171875 + 236.93, + 526.01, + 241.41, + 534.03 ], "col": 5, "col-header": false, @@ -89836,10 +95706,10 @@ }, { "bbox": [ - 267.0711364746094, - 526.01025390625, - 285.0039367675781, - 534.0262451171875 + 267.07, + 526.01, + 285.0, + 534.03 ], "col": 6, "col-header": false, @@ -89889,10 +95759,10 @@ [ { "bbox": [ - 60.60176086425781, - 512.26220703125, - 100.04115295410156, - 574.7430419921875 + 60.6, + 512.26, + 100.04, + 574.74 ], "col": 0, "col-header": false, @@ -89945,10 +95815,10 @@ }, { "bbox": [ - 120.86499786376953, - 512.26220703125, - 134.31460571289062, - 520.2781982421875 + 120.86, + 512.26, + 134.31, + 520.28 ], "col": 1, "col-header": false, @@ -89973,10 +95843,10 @@ }, { "bbox": [ - 151.00106811523438, - 512.26220703125, - 171.17547607421875, - 520.2781982421875 + 151.0, + 512.26, + 171.18, + 520.28 ], "col": 2, "col-header": false, @@ -90001,10 +95871,10 @@ }, { "bbox": [ - 181.13714599609375, - 512.26220703125, - 194.5867462158203, - 520.2781982421875 + 181.14, + 512.26, + 194.59, + 520.28 ], "col": 3, "col-header": false, @@ -90029,10 +95899,10 @@ }, { "bbox": [ - 204.5484161376953, - 512.26220703125, - 224.72280883789062, - 520.2781982421875 + 204.55, + 512.26, + 224.72, + 520.28 ], "col": 4, "col-header": false, @@ -90057,10 +95927,10 @@ }, { "bbox": [ - 236.92608642578125, - 512.26220703125, - 257.1004943847656, - 520.2781982421875 + 236.93, + 512.26, + 257.1, + 520.28 ], "col": 5, "col-header": false, @@ -90085,10 +95955,10 @@ }, { "bbox": [ - 267.0711364746094, - 512.26220703125, - 287.24554443359375, - 520.2781982421875 + 267.07, + 512.26, + 287.25, + 520.28 ], "col": 6, "col-header": false, @@ -90138,10 +96008,10 @@ [ { "bbox": [ - 60.60176086425781, - 501.3032531738281, - 110.50493621826172, - 574.7430419921875 + 60.6, + 501.3, + 110.5, + 574.74 ], "col": 0, "col-header": false, @@ -90194,10 +96064,10 @@ }, { "bbox": [ - 120.86499786376953, - 501.3032531738281, - 141.03939819335938, - 509.3192138671875 + 120.86, + 501.3, + 141.04, + 509.32 ], "col": 1, "col-header": false, @@ -90222,10 +96092,10 @@ }, { "bbox": [ - 151.00106811523438, - 501.3032531738281, - 171.17547607421875, - 509.3192138671875 + 151.0, + 501.3, + 171.18, + 509.32 ], "col": 2, "col-header": false, @@ -90250,10 +96120,10 @@ }, { "bbox": [ - 181.13714599609375, - 501.3032531738281, - 194.5867462158203, - 509.3192138671875 + 181.14, + 501.3, + 194.59, + 509.32 ], "col": 3, "col-header": false, @@ -90278,10 +96148,10 @@ }, { "bbox": [ - 204.5484161376953, - 501.3032531738281, - 224.72280883789062, - 509.3192138671875 + 204.55, + 501.3, + 224.72, + 509.32 ], "col": 4, "col-header": false, @@ -90306,10 +96176,10 @@ }, { "bbox": [ - 236.92608642578125, - 501.3032531738281, - 257.1004943847656, - 509.3192138671875 + 236.93, + 501.3, + 257.1, + 509.32 ], "col": 5, "col-header": false, @@ -90334,10 +96204,10 @@ }, { "bbox": [ - 267.0711364746094, - 501.3032531738281, - 287.24554443359375, - 509.3192138671875 + 267.07, + 501.3, + 287.25, + 509.32 ], "col": 6, "col-header": false, @@ -90389,41 +96259,28 @@ "footnotes": [], "hash": 16041588621504517180, "mentions": [], - "properties": { - "data": [ - [ - "language", - "en", - 0.18000000715255737 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/81" } ], + "sref": "#/tables/1", "type": "table" }, { "#-cols": 3, "#-rows": 3, "captions": [], - "confidence": 0.9700000286102295, + "confidence": 0.97, "created_by": "high_conf_pred", "data": [ [ { "bbox": [ - 375.0260009765625, - 617.7792358398438, - 434.54498291015625, - 625.7952270507812 + 375.03, + 617.78, + 434.54, + 625.8 ], "col": 0, "col-header": false, @@ -90448,10 +96305,10 @@ }, { "bbox": [ - 457.4440002441406, - 617.7703247070312, - 466.52337646484375, - 625.8400268554688 + 457.44, + 617.77, + 466.52, + 625.84 ], "col": 1, "col-header": false, @@ -90476,10 +96333,10 @@ }, { "bbox": [ - 487.4330139160156, - 617.7703247070312, - 495.054443359375, - 625.8400268554688 + 487.43, + 617.77, + 495.05, + 625.84 ], "col": 2, "col-header": false, @@ -90506,10 +96363,10 @@ [ { "bbox": [ - 375.0260009765625, - 602.333251953125, - 442.7761535644531, - 610.3492431640625 + 375.03, + 602.33, + 442.78, + 610.35 ], "col": 0, "col-header": false, @@ -90534,10 +96391,10 @@ }, { "bbox": [ - 452.7380065917969, - 602.171875, - 471.9527587890625, - 610.5554809570312 + 452.74, + 602.17, + 471.95, + 610.56 ], "col": 1, "col-header": false, @@ -90562,10 +96419,10 @@ }, { "bbox": [ - 481.9144287109375, - 602.171875, - 501.1307678222656, - 610.5554809570312 + 481.91, + 602.17, + 501.13, + 610.56 ], "col": 2, "col-header": false, @@ -90592,10 +96449,10 @@ [ { "bbox": [ - 375.0260314941406, - 591.374267578125, - 404.1847839355469, - 599.3902587890625 + 375.03, + 591.37, + 404.18, + 599.39 ], "col": 0, "col-header": false, @@ -90620,10 +96477,10 @@ }, { "bbox": [ - 452.738037109375, - 591.212890625, - 471.9527893066406, - 599.5964965820312 + 452.74, + 591.21, + 471.95, + 599.6 ], "col": 1, "col-header": false, @@ -90648,10 +96505,10 @@ }, { "bbox": [ - 481.9144592285156, - 591.212890625, - 501.13079833984375, - 599.5964965820312 + 481.91, + 591.21, + 501.13, + 599.6 ], "col": 2, "col-header": false, @@ -90680,25 +96537,12 @@ "footnotes": [], "hash": 14817357053216629605, "mentions": [], - "properties": { - "data": [ - [ - "language", - "ru", - 0.18000000715255737 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/87" } ], + "sref": "#/tables/2", "type": "table" } ], @@ -90707,30 +96551,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/0", "hash": 7377574370756688828, "orig": "arXiv:1806.02284v1 [cs.DL] 24 May 2018", - "properties": { - "data": [ - [ - "language", - "en", - 0.7799999713897705 - ], - [ - "semantic", - "text", - 0.9399999976158142 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/0" } ], + "sref": "#/texts/0", "text": "arXiv:1806.02284v1 [cs.DL] 24 May 2018", "text-hash": 605943372629925146, "type": "paragraph" @@ -90739,30 +96565,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/1", "hash": 10227328696767902037, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", - "properties": { - "data": [ - [ - "language", - "en", - 0.699999988079071 - ], - [ - "semantic", - "header", - 0.7099999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/1" } ], + "sref": "#/texts/1", "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "title" @@ -90771,30 +96579,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/2", "hash": 8770494724746327817, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", - "properties": { - "data": [ - [ - "language", - "en", - 0.25999999046325684 - ], - [ - "semantic", - "meta-data", - 0.800000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/2" } ], + "sref": "#/texts/2", "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "paragraph" @@ -90803,30 +96593,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/3", "hash": 18258237174351515285, "orig": "taa,dol,cau,bek@zurich.ibm.com", - "properties": { - "data": [ - [ - "language", - "zh", - 0.09000000357627869 - ], - [ - "semantic", - "text", - 0.7900000214576721 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/3" } ], + "sref": "#/texts/3", "text": "taa,dol,cau,bek@zurich.ibm.com", "text-hash": 7883794643982446593, "type": "paragraph" @@ -90835,30 +96607,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/4", "hash": 5704354110496947297, "orig": "IBM Research", - "properties": { - "data": [ - [ - "language", - "en", - 0.5299999713897705 - ], - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/4" } ], + "sref": "#/texts/4", "text": "IBM Research", "text-hash": 16114797969310195405, "type": "paragraph" @@ -90867,30 +96621,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/5", "hash": 11056873211244709904, "orig": "Rueschlikon, Switzerland", - "properties": { - "data": [ - [ - "language", - "en", - 0.49000000953674316 - ], - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/5" } ], + "sref": "#/texts/5", "text": "Rueschlikon, Switzerland", "text-hash": 10483037511456664190, "type": "paragraph" @@ -90899,30 +96635,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/6", "hash": 11788868678004267702, "orig": "ABSTRACT", - "properties": { - "data": [ - [ - "language", - "en", - 0.6499999761581421 - ], - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/6" } ], + "sref": "#/texts/6", "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "subtitle-level-1" @@ -90931,30 +96649,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/7", "hash": 3624246356859711021, "orig": "1 INTRODUCTION", - "properties": { - "data": [ - [ - "language", - "en", - 0.550000011920929 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/7" } ], + "sref": "#/texts/7", "text": "1 INTRODUCTION", "text-hash": 4359834464932974729, "type": "subtitle-level-1" @@ -90963,30 +96663,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/8", "hash": 17999848460847860039, "orig": "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size. Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable. Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging. In this paper, we present a modular, cloud-based platform to ingest documents at scale. This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format. We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9200000166893005 - ], - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/8" } ], + "sref": "#/texts/8", "text": "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size. Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable. Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging. In this paper, we present a modular, cloud-based platform to ingest documents at scale. This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format. We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 8142196169563728819, "type": "paragraph" @@ -90995,30 +96677,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/9", "hash": 14387482728083328702, "orig": "ACM Reference Format:", - "properties": { - "data": [ - [ - "language", - "en", - 0.20999999344348907 - ], - [ - "semantic", - "header", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/9" } ], + "sref": "#/texts/9", "text": "ACM Reference Format:", "text-hash": 7430992009485070364, "type": "subtitle-level-1" @@ -91027,30 +96691,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/10", "hash": 11222145795862225841, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas. 2018. Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.. In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom. ACM, New York, NY, USA, 9 pages. https://doi.org/10. 1145/3219819.3219834", - "properties": { - "data": [ - [ - "language", - "en", - 0.49000000953674316 - ], - [ - "semantic", - "text", - 0.8600000143051147 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/10" } ], + "sref": "#/texts/10", "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas. 2018. Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.. In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom. ACM, New York, NY, USA, 9 pages. https://doi.org/10. 1145/3219819.3219834", "text-hash": 10605881125688857885, "type": "paragraph" @@ -91059,30 +96705,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/11", "hash": 16923207262044929933, "orig": "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$. These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery. It is needless to say that valuable qualitative and quantitative information is contained in many of them. However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout. Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery. In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$. This poses a real problem, since more and more information published in the PDF documents is going dark. In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components. First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML. Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9399999976158142 - ], - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/15" } ], + "sref": "#/texts/11", "text": "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$. These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery. It is needless to say that valuable qualitative and quantitative information is contained in many of them. However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout. Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery. In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$. This poses a real problem, since more and more information published in the PDF documents is going dark. In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components. First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML. Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context.", "text-hash": 9516638039579926761, "type": "paragraph" @@ -91091,30 +96719,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/12", "hash": 3749305213430885773, "orig": "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files. The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms. This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components. Each of these microservices can be consumed by its own REST API. This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform. In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures.", - "properties": { - "data": [ - [ - "language", - "en", - 0.949999988079071 - ], - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/16" } ], + "sref": "#/texts/12", "text": "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files. The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms. This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components. Each of these microservices can be consumed by its own REST API. This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform. In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures.", "text-hash": 3945867624210419433, "type": "paragraph" @@ -91123,30 +96733,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/13", "hash": 3409470577915009676, "orig": "To obtain a thorough understanding of what our platform can do and how well it performs, we have structured this paper as follows: In Section 2, we briefly review the current state-of-the-art document", - "properties": { - "data": [ - [ - "language", - "en", - 0.949999988079071 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/17" } ], + "sref": "#/texts/13", "text": "To obtain a thorough understanding of what our platform can do and how well it performs, we have structured this paper as follows: In Section 2, we briefly review the current state-of-the-art document", "text-hash": 4583103017707584490, "type": "paragraph" @@ -91155,30 +96747,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/15", "hash": 17187299362680072378, "orig": "processing solutions. In Section 3, we present the design of the platform and its components. In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively. Finally, in Section 5, we discuss the open questions w.r.t. research and possible next steps in the development of the platform.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9200000166893005 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/22" } ], + "sref": "#/texts/14", "text": "processing solutions. In Section 3, we present the design of the platform and its components. In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively. Finally, in Section 5, we discuss the open questions w.r.t. research and possible next steps in the development of the platform.", "text-hash": 9243393324994873880, "type": "paragraph" @@ -91187,30 +96761,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/16", "hash": 697648145931166262, "orig": "2 STATE OF THE ART", - "properties": { - "data": [ - [ - "language", - "en", - 0.47999998927116394 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/23" } ], + "sref": "#/texts/15", "text": "2 STATE OF THE ART", "text-hash": 2385816824895853732, "type": "subtitle-level-1" @@ -91219,30 +96775,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/17", "hash": 7935233310532930917, "orig": "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4]. Broadly speaking, there are two types of approaches to this problem. In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document. This can be done through a conversion from PDF towards HTML or MS Word for example. The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format. For example, this could be a JSON/XML file with a particular schema. Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9200000166893005 - ], - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/24" } ], + "sref": "#/texts/16", "text": "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4]. Broadly speaking, there are two types of approaches to this problem. In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document. This can be done through a conversion from PDF towards HTML or MS Word for example. The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format. For example, this could be a JSON/XML file with a particular schema. Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution.", "text-hash": 57757550267838417, "type": "paragraph" @@ -91251,30 +96789,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/18", "hash": 2762070725424637531, "orig": "Many solutions have already been developed that tackle the problem of document conversion. There are well known open-source programs such as Xpdf 3 and Tabula$^{4}$. There are also proprietary solutions, such as Abby$^{5}$, Nuance 6 or DataCap$^{7}$. In contrast to the open-source solutions, all three proprietary solutions support also", - "properties": { - "data": [ - [ - "language", - "en", - 0.9700000286102295 - ], - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/25" } ], + "sref": "#/texts/17", "text": "Many solutions have already been developed that tackle the problem of document conversion. There are well known open-source programs such as Xpdf 3 and Tabula$^{4}$. There are also proprietary solutions, such as Abby$^{5}$, Nuance 6 or DataCap$^{7}$. In contrast to the open-source solutions, all three proprietary solutions support also", "text-hash": 5230489225511983287, "type": "paragraph" @@ -91283,30 +96803,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/19", "hash": 7536915191196259776, "orig": "extraction from scanned documents. Besides the well known opensource and proprietary solutions, there are also countless academic solutions as well as libraries. For example, the challenge of segmenting complex page layouts is actively addressed by recurring competitions posed by ICDAR, as in Ref. [1] and previous editions.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9900000095367432 - ], - [ - "semantic", - "text", - 0.949999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/31" } ], + "sref": "#/texts/18", "text": "extraction from scanned documents. Besides the well known opensource and proprietary solutions, there are also countless academic solutions as well as libraries. For example, the challenge of segmenting complex page layouts is actively addressed by recurring competitions posed by ICDAR, as in Ref. [1] and previous editions.", "text-hash": 167221319977518894, "type": "paragraph" @@ -91315,30 +96817,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/20", "hash": 11495493007651807568, "orig": "3 PLATFORM DESIGN", - "properties": { - "data": [ - [ - "language", - "en", - 0.3100000023841858 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/32" } ], + "sref": "#/texts/19", "text": "3 PLATFORM DESIGN", "text-hash": 10322960049580053438, "type": "subtitle-level-1" @@ -91347,30 +96831,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/21", "hash": 7650015170039242996, "orig": "Given the plethora of existing solutions, we would like to point out how our solution differs from these, and thus approaches the problem of document conversion in a new way.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9399999976158142 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/33" } ], + "sref": "#/texts/20", "text": "Given the plethora of existing solutions, we would like to point out how our solution differs from these, and thus approaches the problem of document conversion in a new way.", "text-hash": 333520156392116834, "type": "paragraph" @@ -91379,30 +96845,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/22", "hash": 14959508657858158650, "orig": "The key idea is that we do not write any rule-based conversion algorithms, but rather utilize generic machine learning algorithms which produce models that can be easily and quickly trained on ground-truth acquired via human annotation. This flexible mechanism allows us to adapt very quickly to certain templates of documents, achieve very accurate results and ultimately eliminates the time-consuming and costly tuning of traditional rule-based conversion algorithms. This approach is in stark contrast to the previously mentioned state of the art conversion systems, which are all rulebased.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9599999785423279 - ], - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/34" } ], + "sref": "#/texts/21", "text": "The key idea is that we do not write any rule-based conversion algorithms, but rather utilize generic machine learning algorithms which produce models that can be easily and quickly trained on ground-truth acquired via human annotation. This flexible mechanism allows us to adapt very quickly to certain templates of documents, achieve very accurate results and ultimately eliminates the time-consuming and costly tuning of traditional rule-based conversion algorithms. This approach is in stark contrast to the previously mentioned state of the art conversion systems, which are all rulebased.", "text-hash": 6868109665737773720, "type": "paragraph" @@ -91411,30 +96859,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/23", "hash": 10379300903412882972, "orig": "While the approach of swapping rule based solutions with machine learning solutions might appear very natural in the current era of artificial intelligence, it has some serious consequences with regard to its design. First of all, one can not think anymore at the level of a single document. Rather, one should think at the level of a collection of documents (or a corpus of documents). A machine learned model for a single document is not very useful, but a machine learned model for a certain type of documents (e.g. scientific articles, patents, regulations, contracts, etc.) obviously is. This is the first big distinction between the current existing solutions and ours: Existing solutions take one document at a time (no matter its origin) and convert it to a desired output format. Our solution can ingest an entire collection of documents and build machine learned models on top of that. Of course, once the the model is trained, one can convert documents one at a time, too.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9399999976158142 - ], - [ - "semantic", - "text", - 0.949999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/35" } ], + "sref": "#/texts/22", "text": "While the approach of swapping rule based solutions with machine learning solutions might appear very natural in the current era of artificial intelligence, it has some serious consequences with regard to its design. First of all, one can not think anymore at the level of a single document. Rather, one should think at the level of a collection of documents (or a corpus of documents). A machine learned model for a single document is not very useful, but a machine learned model for a certain type of documents (e.g. scientific articles, patents, regulations, contracts, etc.) obviously is. This is the first big distinction between the current existing solutions and ours: Existing solutions take one document at a time (no matter its origin) and convert it to a desired output format. Our solution can ingest an entire collection of documents and build machine learned models on top of that. Of course, once the the model is trained, one can convert documents one at a time, too.", "text-hash": 11150916691880738938, "type": "paragraph" @@ -91443,30 +96873,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/25", "hash": 4994395008195818594, "orig": "A second discriminator between the existing solutions and ours is that we need to provide the tools to gather ground-truth, since no model can be trained without it. Hence, not only do we need the ability to manage collections of documents, we also need the ability for people to annotate documents and store these annotations in an efficient way. These annotations are then used as ground-truth data to train models. It is clear then that ML models add an extra level of complexity: One has to provide the ability to store a collection of documents, annotate these documents, store the annotations, train a model and ultimately apply this model on unseen documents. For the authors of this paper, it was therefore evident that our solution cannot be a monolithic application. It fits much better the concept of a cloud-based platform that can execute the previously mentioned tasks in an efficient and scalable way.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9599999785423279 - ], - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/39" } ], + "sref": "#/texts/23", "text": "A second discriminator between the existing solutions and ours is that we need to provide the tools to gather ground-truth, since no model can be trained without it. Hence, not only do we need the ability to manage collections of documents, we also need the ability for people to annotate documents and store these annotations in an efficient way. These annotations are then used as ground-truth data to train models. It is clear then that ML models add an extra level of complexity: One has to provide the ability to store a collection of documents, annotate these documents, store the annotations, train a model and ultimately apply this model on unseen documents. For the authors of this paper, it was therefore evident that our solution cannot be a monolithic application. It fits much better the concept of a cloud-based platform that can execute the previously mentioned tasks in an efficient and scalable way.", "text-hash": 16536368219630364368, "type": "paragraph" @@ -91475,30 +96887,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/26", "hash": 4203835122307823579, "orig": "3.1 Components", - "properties": { - "data": [ - [ - "language", - "en", - 0.23999999463558197 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/40" } ], + "sref": "#/texts/24", "text": "3.1 Components", "text-hash": 3789103236857293111, "type": "subtitle-level-1" @@ -91507,30 +96901,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/27", "hash": 13520362244078084911, "orig": "Our platform implements a processing pipeline to ingest, manage, parse, annotate, train and eventually convert the data contained in any type of format (scanned or programmatically created PDF, bitmap images, Word documents, etc.) into a structured data format (e.g. JSON or XML).", - "properties": { - "data": [ - [ - "language", - "en", - 0.75 - ], - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/41" } ], + "sref": "#/texts/25", "text": "Our platform implements a processing pipeline to ingest, manage, parse, annotate, train and eventually convert the data contained in any type of format (scanned or programmatically created PDF, bitmap images, Word documents, etc.) into a structured data format (e.g. JSON or XML).", "text-hash": 12910497814715733387, "type": "paragraph" @@ -91539,30 +96915,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/28", "hash": 1749622367305947670, "orig": "This processing pipeline is formed by five components as depicted in Figure 1: (1) parsing of documents into an internal format optimised for ML, (2) Annotation of the label ground-truth in parsed documents (3) training ML models from the acquired annotations, (4) applying the custom ML model(s), (5) assembling the document(s) into a structured data format. If a trained model is available, only components 1, 4 and 5 are needed to convert the documents. If no template-specific machine learned model is available yet, we provide two additional components 2 and 3, that allow users to gather ground-truth and train custom models. It is important to note that the platform comes with default models, so annotation and training are advised to retrieve the best quality output, yet they are optional.", - "properties": { - "data": [ - [ - "language", - "en", - 0.8999999761581421 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/42" } ], + "sref": "#/texts/26", "text": "This processing pipeline is formed by five components as depicted in Figure 1: (1) parsing of documents into an internal format optimised for ML, (2) Annotation of the label ground-truth in parsed documents (3) training ML models from the acquired annotations, (4) applying the custom ML model(s), (5) assembling the document(s) into a structured data format. If a trained model is available, only components 1, 4 and 5 are needed to convert the documents. If no template-specific machine learned model is available yet, we provide two additional components 2 and 3, that allow users to gather ground-truth and train custom models. It is important to note that the platform comes with default models, so annotation and training are advised to retrieve the best quality output, yet they are optional.", "text-hash": 1334541935326461060, "type": "paragraph" @@ -91571,30 +96929,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/30", "hash": 11083736481641202939, "orig": "Let us now elaborate on what each of the five components deliver in the rest of this section.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9200000166893005 - ], - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/44" } ], + "sref": "#/texts/27", "text": "Let us now elaborate on what each of the five components deliver in the rest of this section.", "text-hash": 10456209429844276823, "type": "paragraph" @@ -91603,30 +96943,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/31", "hash": 15403141463083979171, "orig": "3.2 Parsing of Documents", - "properties": { - "data": [ - [ - "language", - "en", - 0.6800000071525574 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/45" } ], + "sref": "#/texts/28", "text": "3.2 Parsing of Documents", "text-hash": 6127225399482532623, "type": "subtitle-level-1" @@ -91635,30 +96957,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/32", "hash": 12234429517419341922, "orig": "In the parsing component of the pipeline, we solve the following straightforward but non-trivial task: Find the bounding boxes of all text-snippets that appear on each PDF page. For simplicity, we will refer to the bounding boxes of the text-snippets as cells in the remainder of the paper. There are two reasons why we are interested in these cells. First, they provide us with the crucial geometric features which are later used in the machine learning models to determine the layout semantic label. Second, the concept of a cell can be easily transferred to scanned documents. In Figure 2, we show the cells obtained from an example PDF page after the parsing stage.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9300000071525574 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/46" } ], + "sref": "#/texts/29", "text": "In the parsing component of the pipeline, we solve the following straightforward but non-trivial task: Find the bounding boxes of all text-snippets that appear on each PDF page. For simplicity, we will refer to the bounding boxes of the text-snippets as cells in the remainder of the paper. There are two reasons why we are interested in these cells. First, they provide us with the crucial geometric features which are later used in the machine learning models to determine the layout semantic label. Second, the concept of a cell can be easily transferred to scanned documents. In Figure 2, we show the cells obtained from an example PDF page after the parsing stage.", "text-hash": 13908173772261346000, "type": "paragraph" @@ -91667,30 +96971,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/33", "hash": 16957857111665886816, "orig": "While the task of finding the cells might appear intuitive from a conceptual point of view, it is not in practice, since there does not exist a unique, precise definition of the cells. This lack of a precise definition has its origins not only in the ISO-standard 8 detailing the PDF document code but also in the variability of the quality of PDFs. Older PDFs which were created from scanned images using OCR typically return cells for each word, while more recent PDFs allow us to create cells for full text-lines. This variability in the geometric features of the cell (e.g. the width of the cell) can negatively impact the performance of later machine learning models. As a consequence, we reduce the variability of the geometric features as much as possible. The more consistent and homogeneous the geometric features of a cell are, the better the machine learning algorithms can do predictions.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9399999976158142 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/47" } ], + "sref": "#/texts/30", "text": "While the task of finding the cells might appear intuitive from a conceptual point of view, it is not in practice, since there does not exist a unique, precise definition of the cells. This lack of a precise definition has its origins not only in the ISO-standard 8 detailing the PDF document code but also in the variability of the quality of PDFs. Older PDFs which were created from scanned images using OCR typically return cells for each word, while more recent PDFs allow us to create cells for full text-lines. This variability in the geometric features of the cell (e.g. the width of the cell) can negatively impact the performance of later machine learning models. As a consequence, we reduce the variability of the geometric features as much as possible. The more consistent and homogeneous the geometric features of a cell are, the better the machine learning algorithms can do predictions.", "text-hash": 9481411723883903182, "type": "paragraph" @@ -91699,30 +96985,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/34", "hash": 10390915169360946497, "orig": "For programmatic PDFs, the text cells are contructed from raw streams of symbols and transforms defined in the PDF document. This operation relies on the iterators provided by the QPDF library$^{9}$.", - "properties": { - "data": [ - [ - "language", - "en", - 0.8500000238418579 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/48" } ], + "sref": "#/texts/31", "text": "For programmatic PDFs, the text cells are contructed from raw streams of symbols and transforms defined in the PDF document. This operation relies on the iterators provided by the QPDF library$^{9}$.", "text-hash": 11149022357700220845, "type": "paragraph" @@ -91731,30 +96999,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/35", "hash": 15254383206256494278, "orig": "For scanned PDFs, we use a two step approach to find the cells by first running all bitmap resources in the PDF through an OCR engine and then merging the extracted text-snippets from the images with the remaining cells from the programmatically created content. Eventually, all the created cells and line paths are stored in an internal JSON format, which also keeps references to the bitmap resources embedded in the PDF document. From this point, all further processing does not need to distinguish between scanned or programmatic sources.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9399999976158142 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/51" } ], + "sref": "#/texts/32", "text": "For scanned PDFs, we use a two step approach to find the cells by first running all bitmap resources in the PDF through an OCR engine and then merging the extracted text-snippets from the images with the remaining cells from the programmatically created content. Eventually, all the created cells and line paths are stored in an internal JSON format, which also keeps references to the bitmap resources embedded in the PDF document. From this point, all further processing does not need to distinguish between scanned or programmatic sources.", "text-hash": 6573226034038831156, "type": "paragraph" @@ -91763,30 +97013,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/36", "hash": 17759618186065566858, "orig": "3.3 Ground-truth gathering through human-annotation", - "properties": { - "data": [ - [ - "language", - "en", - 0.8299999833106995 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/52" } ], + "sref": "#/texts/33", "text": "3.3 Ground-truth gathering through human-annotation", "text-hash": 8679681341332585960, "type": "subtitle-level-1" @@ -91795,30 +97027,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/37", "hash": 11638821473906997927, "orig": "In this component, we collect ground-truth for the custom machine learning models to be trained on. Representative ground-truth data is of paramount importance to obtain machine learned models with excellent recall and precision. Unfortunately, it is often very hard to obtain lots of representative ground-truth data, primarily due the the enormous variability across the layout of documents. As a consequence, the concept of annotators for documents were incorporated into the platform from the very beginning. The purpose of these annotators is two-fold.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9700000286102295 - ], - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/53" } ], + "sref": "#/texts/34", "text": "In this component, we collect ground-truth for the custom machine learning models to be trained on. Representative ground-truth data is of paramount importance to obtain machine learned models with excellent recall and precision. Unfortunately, it is often very hard to obtain lots of representative ground-truth data, primarily due the the enormous variability across the layout of documents. As a consequence, the concept of annotators for documents were incorporated into the platform from the very beginning. The purpose of these annotators is two-fold.", "text-hash": 14503768930839698451, "type": "paragraph" @@ -91827,30 +97041,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/38", "hash": 13020065077657899116, "orig": "First and foremost, the annotators on the platform allow us to gather ground-truth at scale using a crowd-sourcing approach. In each annotation task, we retrieve the original PDF page and its associated parsed components, containing the cells (see Figure 2). We then ask the (human) annotator to assign each cell a layout semantic label. Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$. In the annotator tool, each layout semantic label is visually represented by a colour. By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3. Since humans are very efficient in visual recognition, this task comes very natural to us. The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns.", - "properties": { - "data": [ - [ - "language", - "en", - 0.8899999856948853 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/54" } ], + "sref": "#/texts/35", "text": "First and foremost, the annotators on the platform allow us to gather ground-truth at scale using a crowd-sourcing approach. In each annotation task, we retrieve the original PDF page and its associated parsed components, containing the cells (see Figure 2). We then ask the (human) annotator to assign each cell a layout semantic label. Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$. In the annotator tool, each layout semantic label is visually represented by a colour. By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3. Since humans are very efficient in visual recognition, this task comes very natural to us. The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns.", "text-hash": 13130850271187616458, "type": "paragraph" @@ -91859,30 +97055,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/39", "hash": 10103841011442966464, "orig": "The second purpose of the annotators is to visually inspect the quality of our machine learned models. The goal of the models is to emulate the action of the annotators, i.e. to assign a layout semantic label to each cell. Clearly, the result of a prediction for each page can therefore be displayed as if it were an annotated page. This allows the users to directly inspect the results of the models on unseen pages. A direct consequence of this inspection capability in the annotators is that the annotation task can be transformed easily into a correction task, i.e. the human annotators only need to correct the incorrectly predicted labels. Of course, as the models become better over time, the number of corrections needed to be made become less and less. This allows us to significantly reduce the annotation time per document. Since annotations are typically created by professionals with a high hourly rate, the colouring technique allowed us to significantly reduce the cost of ground-truth gathering.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9399999976158142 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/55" } ], + "sref": "#/texts/36", "text": "The second purpose of the annotators is to visually inspect the quality of our machine learned models. The goal of the models is to emulate the action of the annotators, i.e. to assign a layout semantic label to each cell. Clearly, the result of a prediction for each page can therefore be displayed as if it were an annotated page. This allows the users to directly inspect the results of the models on unseen pages. A direct consequence of this inspection capability in the annotators is that the annotation task can be transformed easily into a correction task, i.e. the human annotators only need to correct the incorrectly predicted labels. Of course, as the models become better over time, the number of corrections needed to be made become less and less. This allows us to significantly reduce the annotation time per document. Since annotations are typically created by professionals with a high hourly rate, the colouring technique allowed us to significantly reduce the cost of ground-truth gathering.", "text-hash": 11435379797753757998, "type": "paragraph" @@ -91891,30 +97069,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/40", "hash": 10982401368140758581, "orig": "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute. The vertical red lines indicate that a training was performed on the annotated pages, and a new, improved model is", - "properties": { - "data": [ - [ - "language", - "en", - 0.9599999785423279 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/56" } ], + "sref": "#/texts/37", "text": "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute. The vertical red lines indicate that a training was performed on the annotated pages, and a new, improved model is", "text-hash": 10548529097098469537, "type": "paragraph" @@ -91923,30 +97083,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/42", "hash": 887751753527930563, "orig": "used from that point to predict the labels. Since the corrections become less and less, the rate of annotation goes up. It is needless to say that this inter-leaving of training models (based on annotated ground-truth) and annotation benefits directly from our platform approach, since each task (submitting page-annotations, training the model, applying the model for predicting the labels) comes down to an asynchronous call to a microservice. The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection.", - "properties": { - "data": [ - [ - "language", - "en", - 0.949999988079071 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/60" } ], + "sref": "#/texts/38", "text": "used from that point to predict the labels. Since the corrections become less and less, the rate of annotation goes up. It is needless to say that this inter-leaving of training models (based on annotated ground-truth) and annotation benefits directly from our platform approach, since each task (submitting page-annotations, training the model, applying the model for predicting the labels) comes down to an asynchronous call to a microservice. The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection.", "text-hash": 2205427981859754031, "type": "paragraph" @@ -91955,30 +97097,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/43", "hash": 4695688617288377564, "orig": "3.4 Machine Learning: Training models & Applying models", - "properties": { - "data": [ - [ - "language", - "en", - 0.800000011920929 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/61" } ], + "sref": "#/texts/39", "text": "3.4 Machine Learning: Training models & Applying models", "text-hash": 16834670239362291258, "type": "subtitle-level-1" @@ -91987,30 +97111,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/44", "hash": 3275001812318455279, "orig": "In the CCS, there are essentially two types of machine-learning models. On the one hand, we have default models, which are designed to be layout independent. They take a raster image of the page to identify and locate basic objects, such as tables, figures, formulas, etc. On the other hand, we also support the training of custom, templatespecific models, which are designed to specialize on a particular layout template and allow us to convert and extract the data out of documents with very high precision and recall. They will classify each cell in the page with regard to their layout semantic label.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9399999976158142 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/62" } ], + "sref": "#/texts/40", "text": "In the CCS, there are essentially two types of machine-learning models. On the one hand, we have default models, which are designed to be layout independent. They take a raster image of the page to identify and locate basic objects, such as tables, figures, formulas, etc. On the other hand, we also support the training of custom, templatespecific models, which are designed to specialize on a particular layout template and allow us to convert and extract the data out of documents with very high precision and recall. They will classify each cell in the page with regard to their layout semantic label.", "text-hash": 4429706140044408651, "type": "paragraph" @@ -92019,30 +97125,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/45", "hash": 15354930767839681193, "orig": "3.4.1 Metrics. Before discussing the performance of the models, let us first define the precision and recall metrics used to evaluate the results. The first observation is that the output of a machine learned model is exactly the same of what a human annotator would produce, i.e. it will assign a text cell a semantic label. The correctness of this label is what we aim to measure with the recall and precision metrics. The second observation is that we deal with a", - "properties": { - "data": [ - [ - "language", - "en", - 0.8999999761581421 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/63" } ], + "sref": "#/texts/41", "text": "3.4.1 Metrics. Before discussing the performance of the models, let us first define the precision and recall metrics used to evaluate the results. The first observation is that the output of a machine learned model is exactly the same of what a human annotator would produce, i.e. it will assign a text cell a semantic label. The correctness of this label is what we aim to measure with the recall and precision metrics. The second observation is that we deal with a", "text-hash": 6184852591532473349, "type": "paragraph" @@ -92051,30 +97139,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/47", "hash": 6337233386759158728, "orig": "multi-class classification problem, i.e. we don't have only two labels, but many possible semantic labels, hence the performance result will be the average of the recall and precision for each label.", - "properties": { - "data": [ - [ - "language", - "en", - 0.8999999761581421 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/66" } ], + "sref": "#/texts/42", "text": "multi-class classification problem, i.e. we don't have only two labels, but many possible semantic labels, hence the performance result will be the average of the recall and precision for each label.", "text-hash": 15490331838172880166, "type": "paragraph" @@ -92083,30 +97153,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/48", "hash": 2249972239307071508, "orig": "The recall (= \u211b) and precision (= \ud835\udcab) for a given label on a page is defined by the standard formulas", - "properties": { - "data": [ - [ - "language", - "en", - 0.8199999928474426 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/67" } ], + "sref": "#/texts/43", "text": "The recall (= \u211b) and precision (= \ud835\udcab) for a given label on a page is defined by the standard formulas", "text-hash": 1131271437908497026, "type": "paragraph" @@ -92115,30 +97167,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/49", "hash": 12383805870947794174, "orig": "\u211b = t$_{p}$ t$_{p}$ + f$_{p}$ , \ud835\udcab = t$_{p}$ t$_{p}$ + f$_{n}$ , (1)", - "properties": { - "data": [ - [ - "language", - "en", - 0.27000001072883606 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/68" } ], + "sref": "#/texts/44", "text": "\u211b = t$_{p}$ t$_{p}$ + f$_{p}$ , \ud835\udcab = t$_{p}$ t$_{p}$ + f$_{n}$ , (1)", "text-hash": 14055366495763095132, "type": "equation" @@ -92147,30 +97181,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/50", "hash": 7053654953998543393, "orig": "where t$_{p}$, f$_{p}$ and f$_{n}$ represent respectively true positive, false positive and false negative predicted labels.", - "properties": { - "data": [ - [ - "language", - "en", - 0.5799999833106995 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/69" } ], + "sref": "#/texts/45", "text": "where t$_{p}$, f$_{p}$ and f$_{n}$ represent respectively true positive, false positive and false negative predicted labels.", "text-hash": 642098605774556301, "type": "paragraph" @@ -92179,30 +97195,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/51", "hash": 15921044595687116426, "orig": "3.4.2 Default Models. The aim of the default models is to identify specific, ubiquitous objects in documents. Examples of such objects are tables, figures with their captions, mathematical formulas, etc. Due to the high variability in both the document layout as well as in the representation of these objects, we need very robust object detection methods. Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7]. On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions.", - "properties": { - "data": [ - [ - "language", - "en", - 0.949999988079071 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/70" } ], + "sref": "#/texts/46", "text": "3.4.2 Default Models. The aim of the default models is to identify specific, ubiquitous objects in documents. Examples of such objects are tables, figures with their captions, mathematical formulas, etc. Due to the high variability in both the document layout as well as in the representation of these objects, we need very robust object detection methods. Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7]. On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions.", "text-hash": 5618307884355612648, "type": "paragraph" @@ -92211,30 +97209,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/52", "hash": 12234068400463628788, "orig": "In this paper, we will focus only on the detection of table objects, but the same principles described in the following analysis are also applied for other type of objects.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9700000286102295 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/71" } ], + "sref": "#/texts/47", "text": "In this paper, we will focus only on the detection of table objects, but the same principles described in the following analysis are also applied for other type of objects.", "text-hash": 13907813772802190178, "type": "paragraph" @@ -92243,30 +97223,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/53", "hash": 4628466594790006384, "orig": "The networks available on our platform have been trained on arXiv data$^{11}$. We have annotated 30000 PDF pages and know the", - "properties": { - "data": [ - [ - "language", - "en", - 0.9200000166893005 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/72" } ], + "sref": "#/texts/48", "text": "The networks available on our platform have been trained on arXiv data$^{11}$. We have annotated 30000 PDF pages and know the", "text-hash": 16911352314006995166, "type": "paragraph" @@ -92275,30 +97237,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/55", "hash": 9651706913678711778, "orig": "location of at least one table on each page. From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation. Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9200000166893005 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/76" } ], + "sref": "#/texts/49", "text": "location of at least one table on each page. From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation. Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms.", "text-hash": 11888191065829014864, "type": "paragraph" @@ -92307,30 +97251,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/56", "hash": 1363251178266051349, "orig": "We do not locate the table directly on the image of the original PDF page but rather on an image representation of the parsed PDF page with cell boxes. The reasoning behind this is to reduce the variability between all input PDF pages as much as possible and thus increase the effectiveness of the deep neural networks. An example of such an image can be seen in Figure 5. The red bounding boxes around the tables are a result of the prediction using YOLOv2 and are absent in the image on which the model predicts. Note that the visualisation of the text cells visible in Figure 5 does not include any text of the original document, but only its geometrical definition. This is important when one compares for example Asian documents with Japanese, Chinese or Korean characters versus European languages with the roman alphabet. We do not want the deep neural network to focus on the specific characters, but rather on the layout of the cells in the page.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9100000262260437 - ], - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/77" } ], + "sref": "#/texts/50", "text": "We do not locate the table directly on the image of the original PDF page but rather on an image representation of the parsed PDF page with cell boxes. The reasoning behind this is to reduce the variability between all input PDF pages as much as possible and thus increase the effectiveness of the deep neural networks. An example of such an image can be seen in Figure 5. The red bounding boxes around the tables are a result of the prediction using YOLOv2 and are absent in the image on which the model predicts. Note that the visualisation of the text cells visible in Figure 5 does not include any text of the original document, but only its geometrical definition. This is important when one compares for example Asian documents with Japanese, Chinese or Korean characters versus European languages with the roman alphabet. We do not want the deep neural network to focus on the specific characters, but rather on the layout of the cells in the page.", "text-hash": 2009046567395259777, "type": "paragraph" @@ -92339,30 +97265,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/57", "hash": 18259197018396996238, "orig": "Let us now discuss both deep neural network training microservices on the platform. In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision. In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times. We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied. We believe that this is the main origin for the discrepancy of time-to-solution for the training phase. The same holds true for the prediction. Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", - "properties": { - "data": [ - [ - "language", - "en", - 0.9599999785423279 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/78" } ], + "sref": "#/texts/51", "text": "Let us now discuss both deep neural network training microservices on the platform. In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision. In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times. We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied. We believe that this is the main origin for the discrepancy of time-to-solution for the training phase. The same holds true for the prediction. Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", "text-hash": 7883278994224882668, "type": "paragraph" @@ -92371,30 +97279,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/58", "hash": 14663676516964431047, "orig": "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously. The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1. We use these bounding boxes to associate with each cell a label, which is in this particular case either Table or Not-Table, depending on whether they overlap", - "properties": { - "data": [ - [ - "language", - "en", - 0.949999988079071 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/79" } ], + "sref": "#/texts/52", "text": "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously. The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1. We use these bounding boxes to associate with each cell a label, which is in this particular case either Table or Not-Table, depending on whether they overlap", "text-hash": 7164504172498806323, "type": "paragraph" @@ -92403,30 +97293,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/59", "hash": 4577067829072175096, "orig": "Table 2: Performance results for the template specific model of the Physical Review B journals. The confusion matrix highlights the huge imbalance between the number of text cells with different labels. The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types.", - "properties": { - "data": [ - [ - "language", - "en", - 0.8600000143051147 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/80" } ], + "sref": "#/texts/53", "text": "Table 2: Performance results for the template specific model of the Physical Review B journals. The confusion matrix highlights the huge imbalance between the number of text cells with different labels. The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types.", "text-hash": 3406859306294395222, "type": "paragraph" @@ -92435,30 +97307,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/60", "hash": 2569392033451362672, "orig": "with the predicted bounding box. The corresponding recall and precision are then computed for this dual-class classification problem. In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence. For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0. 5. The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers. We believe this originates from the selective search algorithm which is used to determine regions of interest. The images we feed it are not typical photographic images (made with a camera) but layout visualisations. The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9200000166893005 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/82" } ], + "sref": "#/texts/54", "text": "with the predicted bounding box. The corresponding recall and precision are then computed for this dual-class classification problem. In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence. For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0. 5. The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers. We believe this originates from the selective search algorithm which is used to determine regions of interest. The images we feed it are not typical photographic images (made with a camera) but layout visualisations. The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", "text-hash": 5414143675771382750, "type": "paragraph" @@ -92467,30 +97321,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/61", "hash": 14539041145469267811, "orig": "3.4.3 Template specific Models. The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template. This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance. Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9200000166893005 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/83" } ], + "sref": "#/texts/55", "text": "3.4.3 Template specific Models. The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template. This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance. Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality.", "text-hash": 6991735551340401103, "type": "paragraph" @@ -92499,30 +97335,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/62", "hash": 8607014065143641201, "orig": "For an algorithm to fit in the interactive platform design we identified a few key requirements. First, it is crucial that the model can generate good results with a limited set of pages. In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation. Second it must be robust against extreme imbalance of the labeled data. It is clear that cells of the label Title will be much more uncommon than cells with the label of Text. Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process.", - "properties": { - "data": [ - [ - "language", - "en", - 0.949999988079071 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/84" } ], + "sref": "#/texts/56", "text": "For an algorithm to fit in the interactive platform design we identified a few key requirements. First, it is crucial that the model can generate good results with a limited set of pages. In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation. Second it must be robust against extreme imbalance of the labeled data. It is clear that cells of the label Title will be much more uncommon than cells with the label of Text. Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process.", "text-hash": 17832237182951286493, "type": "paragraph" @@ -92531,30 +97349,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/63", "hash": 1994904537764312371, "orig": "For these reasons, we chose random forest [2] as a machine learning algorithm for template specific models. Random forest algorithms are known to be trained fast and can produce very accurate results on limited, but relatively structured data. In our case,", - "properties": { - "data": [ - [ - "language", - "en", - 0.949999988079071 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/85" } ], + "sref": "#/texts/57", "text": "For these reasons, we chose random forest [2] as a machine learning algorithm for template specific models. Random forest algorithms are known to be trained fast and can produce very accurate results on limited, but relatively structured data. In our case,", "text-hash": 1377511684573734815, "type": "paragraph" @@ -92563,30 +97363,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/65", "hash": 7742256726079628058, "orig": "this structure originates of course from the template. Furthermore, random forest is an ensemble method, meaning that they learn on the distribution function of the features, and not individual dataelements. As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9200000166893005 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/88" } ], + "sref": "#/texts/58", "text": "this structure originates of course from the template. Furthermore, random forest is an ensemble method, meaning that they learn on the distribution function of the features, and not individual dataelements. As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised.", "text-hash": 250119056806139256, "type": "paragraph" @@ -92595,30 +97377,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/66", "hash": 8810233123818174294, "orig": "The random forest method is applied to each cell of the page based on a feature vector representing all of its properties. For example, the feature vector contains information as the page number, the size of the text cell, its position, as well as the distance from the neighbouring cells. Additionally to pure geometrical information we include the text style (normal, italic, or bold) and some text statistics, as the fraction of numeric characters. We then improve the obtained results by performing subsequent iterations with other random forest methods, which operate on an enlarged feature space including the previously predicted labels of the neighbourhood around the current cell.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9599999785423279 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/89" } ], + "sref": "#/texts/59", "text": "The random forest method is applied to each cell of the page based on a feature vector representing all of its properties. For example, the feature vector contains information as the page number, the size of the text cell, its position, as well as the distance from the neighbouring cells. Additionally to pure geometrical information we include the text style (normal, italic, or bold) and some text statistics, as the fraction of numeric characters. We then improve the obtained results by performing subsequent iterations with other random forest methods, which operate on an enlarged feature space including the previously predicted labels of the neighbourhood around the current cell.", "text-hash": 17619932035192809924, "type": "paragraph" @@ -92627,30 +97391,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/67", "hash": 16446711449286912460, "orig": "It is important to realize that almost all of these features are purely geometrical. This allows us to apply exactly the same machine learning methods on both scanned and programmatic PDF documents.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9399999976158142 - ], - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/90" } ], + "sref": "#/texts/60", "text": "It is important to realize that almost all of these features are purely geometrical. This allows us to apply exactly the same machine learning methods on both scanned and programmatic PDF documents.", "text-hash": 9704353849744984874, "type": "paragraph" @@ -92659,30 +97405,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/68", "hash": 9558434107504657973, "orig": "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$. We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels. Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label. We observe that the recall and precision numbers are excellent, with most of them above 99%. This is not surprising, since we are building models that specialise for a particular template.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9100000262260437 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/91" } ], + "sref": "#/texts/61", "text": "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$. We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels. Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label. We observe that the recall and precision numbers are excellent, with most of them above 99%. This is not surprising, since we are building models that specialise for a particular template.", "text-hash": 11971893452237256865, "type": "paragraph" @@ -92691,30 +97419,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/69", "hash": 18349896906192842040, "orig": "Moreover, the same ML algorithm proves to perform very well on different document templates, as is evident from the numbers shown in Table 3, simply by providing it with different datasets to train on. The latter is the power of our platform: we can re-use the same machine-learning algorithm to generate different models solely based on the data gathered by the annotation on the platform. We do not need to define rules and heuristics or update code in order to deal with new types of documents. We only need to gather more data.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9399999976158142 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/92" } ], + "sref": "#/texts/62", "text": "Moreover, the same ML algorithm proves to perform very well on different document templates, as is evident from the numbers shown in Table 3, simply by providing it with different datasets to train on. The latter is the power of our platform: we can re-use the same machine-learning algorithm to generate different models solely based on the data gathered by the annotation on the platform. We do not need to define rules and heuristics or update code in order to deal with new types of documents. We only need to gather more data.", "text-hash": 8080940474762743702, "type": "paragraph" @@ -92723,30 +97433,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/70", "hash": 10082834006373808153, "orig": "3.5 Assembly", - "properties": { - "data": [ - [ - "language", - "en", - 0.8199999928474426 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/93" } ], + "sref": "#/texts/63", "text": "3.5 Assembly", "text-hash": 11736313095563614837, "type": "subtitle-level-1" @@ -92755,30 +97447,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/71", "hash": 15253541252152665681, "orig": "In this component, we build a structured data file in JSON or XML format, which contains all the text and objects (e.g. tables) from the original document, retaining the layout semantics. This structured data file is constructed by assembling all the cells from the parsed file", - "properties": { - "data": [ - [ - "language", - "en", - 0.8899999856948853 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/94" } ], + "sref": "#/texts/64", "text": "In this component, we build a structured data file in JSON or XML format, which contains all the text and objects (e.g. tables) from the original document, retaining the layout semantics. This structured data file is constructed by assembling all the cells from the parsed file", "text-hash": 6565628665194191037, "type": "paragraph" @@ -92787,30 +97461,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/72", "hash": 3904142170608486950, "orig": "Listing 1: Excerpt from the JSON output of the Corpus Conversion Service after conversion of this paper.", - "properties": { - "data": [ - [ - "language", - "en", - 0.7799999713897705 - ], - [ - "semantic", - "text", - 0.5199999809265137 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/96" } ], + "sref": "#/texts/65", "text": "Listing 1: Excerpt from the JSON output of the Corpus Conversion Service after conversion of this paper.", "text-hash": 4079383948124449940, "type": "paragraph" @@ -92819,30 +97475,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/73", "hash": 6410818076508661508, "orig": "{ 'description ': { 'title ': 'Corpus Conversion Service: A machine learning platform to ingest documents at scale. ',, \u2192 'abstract ': 'Over the past few decades, the amount of scientific articles [...] ',, \u2192 'affiliations ': 'IBM Research Rueschlikon, Switzerland ', 'authors ': 'Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas ', \u2192 }, 'main-text ': [{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...], 'tables ': [{...},...], 'images ': [{...},...] }", - "properties": { - "data": [ - [ - "language", - "en", - 0.3499999940395355 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/97" } ], + "sref": "#/texts/66", "text": "{ 'description ': { 'title ': 'Corpus Conversion Service: A machine learning platform to ingest documents at scale. ',, \u2192 'abstract ': 'Over the past few decades, the amount of scientific articles [...] ',, \u2192 'affiliations ': 'IBM Research Rueschlikon, Switzerland ', 'authors ': 'Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas ', \u2192 }, 'main-text ': [{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...], 'tables ': [{...},...], 'images ': [{...},...] }", "text-hash": 15129105844666734962, "type": "paragraph" @@ -92851,30 +97489,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/74", "hash": 12813875992986832439, "orig": "in combination with their associated predicted (or human-annotated) layout semantic labels. It should be noted that no machine learning is used in this component. It is purely rule based and therefore completely deterministic.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9800000190734863 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/98" } ], + "sref": "#/texts/67", "text": "in combination with their associated predicted (or human-annotated) layout semantic labels. It should be noted that no machine learning is used in this component. It is purely rule based and therefore completely deterministic.", "text-hash": 13337022012432085155, "type": "paragraph" @@ -92883,30 +97503,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/75", "hash": 11030869010407626539, "orig": "The assembly phase is a two step process. First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order. Then, the text of all cells that have the same label is contracted into a temporary document objects. Third, we build the internal structure of the temporary document objects, based on the information provided by the models. The latter is only applicable for internally structured objects, such as tables. An example of the generated JSON output is shown in Listing 1.", - "properties": { - "data": [ - [ - "language", - "en", - 0.949999988079071 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/99" } ], + "sref": "#/texts/68", "text": "The assembly phase is a two step process. First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order. Then, the text of all cells that have the same label is contracted into a temporary document objects. Third, we build the internal structure of the temporary document objects, based on the information provided by the models. The latter is only applicable for internally structured objects, such as tables. An example of the generated JSON output is shown in Listing 1.", "text-hash": 10508897272021404039, "type": "paragraph" @@ -92915,30 +97517,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/76", "hash": 2142320548375900929, "orig": "4 ARCHITECTURE AND ORCHESTRATION OF CLOUD BASED MICROSERVICES", - "properties": { - "data": [ - [ - "language", - "en", - 0.33000001311302185 - ], - [ - "semantic", - "header", - 0.8700000047683716 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/100" } ], + "sref": "#/texts/69", "text": "4 ARCHITECTURE AND ORCHESTRATION OF CLOUD BASED MICROSERVICES", "text-hash": 950718827856471405, "type": "subtitle-level-1" @@ -92947,30 +97531,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/77", "hash": 12747011194397783283, "orig": "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated. Before discussing the technical details, we would like to point out our requirements for the architecture of the platform. These requirements are all related to scaling. Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources. In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation. It is clear that the architecture of such a service is heavily influenced by these requirements.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9599999785423279 - ], - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/101" } ], + "sref": "#/texts/70", "text": "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated. Before discussing the technical details, we would like to point out our requirements for the architecture of the platform. These requirements are all related to scaling. Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources. In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation. It is clear that the architecture of such a service is heavily influenced by these requirements.", "text-hash": 13395059553653450335, "type": "paragraph" @@ -92979,30 +97545,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/78", "hash": 174789262945188010, "orig": "4.1 Platform layers", - "properties": { - "data": [ - [ - "language", - "en", - 0.6200000047683716 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/102" } ], + "sref": "#/texts/71", "text": "4.1 Platform layers", "text-hash": 3197077882590976520, "type": "subtitle-level-1" @@ -93011,30 +97559,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/79", "hash": 7228893318503650455, "orig": "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents. In Figure 6, we show a sketch of its", - "properties": { - "data": [ - [ - "language", - "en", - 0.9399999976158142 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/103" } ], + "sref": "#/texts/72", "text": "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents. In Figure 6, we show a sketch of its", "text-hash": 475277818666452483, "type": "paragraph" @@ -93043,30 +97573,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/81", "hash": 9230667184712205690, "orig": "architecture. As one can observe, we have grouped the service into four layers. These layers are:", - "properties": { - "data": [ - [ - "language", - "en", - 0.9599999785423279 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/106" } ], + "sref": "#/texts/73", "text": "architecture. As one can observe, we have grouped the service into four layers. These layers are:", "text-hash": 12309253064221915096, "type": "paragraph" @@ -93075,30 +97587,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/82", "hash": 17419815751432442882, "orig": "(1) An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering. The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", - "properties": { - "data": [ - [ - "language", - "en", - 0.8600000143051147 - ], - [ - "semantic", - "text", - 0.9200000166893005 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/107" } ], + "sref": "#/texts/74", "text": "(1) An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering. The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", "text-hash": 8731693174932948592, "type": "paragraph" @@ -93107,30 +97601,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/83", "hash": 11194226403360998426, "orig": "(2) An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result. The task scheduling is done with the Message Broker RabbitMQ$^{14}$. The results are stored in the in-memory data store Redis$^{15}$. In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully. This approach allows for a very robust, fault-tolerant service with very little downtime.", - "properties": { - "data": [ - [ - "language", - "en", - 0.8899999856948853 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/108" } ], + "sref": "#/texts/75", "text": "(2) An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result. The task scheduling is done with the Message Broker RabbitMQ$^{14}$. The results are stored in the in-memory data store Redis$^{15}$. In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully. This approach allows for a very robust, fault-tolerant service with very little downtime.", "text-hash": 10633901501381588600, "type": "paragraph" @@ -93139,30 +97615,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/84", "hash": 9005324696118733701, "orig": "(3) A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc). In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$. This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker. The workers are not only consumers of tasks, but may also produce new ones. This is the case for the requests", - "properties": { - "data": [ - [ - "language", - "en", - 0.8799999952316284 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/109" } ], + "sref": "#/texts/76", "text": "(3) A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc). In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$. This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker. The workers are not only consumers of tasks, but may also produce new ones. This is the case for the requests", "text-hash": 17146307233289309425, "type": "paragraph" @@ -93171,30 +97629,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/86", "hash": 8082547756621048511, "orig": "operating on the whole corpus. Whenever possible we parallelise the compute-heavy operations at the page (or document) level.", - "properties": { - "data": [ - [ - "language", - "en", - 0.800000011920929 - ], - [ - "semantic", - "text", - 0.9200000166893005 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/116" } ], + "sref": "#/texts/77", "text": "operating on the whole corpus. Whenever possible we parallelise the compute-heavy operations at the page (or document) level.", "text-hash": 18059523399368641563, "type": "paragraph" @@ -93203,30 +97643,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/87", "hash": 7791113385466815951, "orig": "(4) A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store. The object-store allows us to easily scale the storage with regard to the number of processed documents. However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9200000166893005 - ], - [ - "semantic", - "text", - 0.949999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/117" } ], + "sref": "#/texts/78", "text": "(4) A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store. The object-store allows us to easily scale the storage with regard to the number of processed documents. However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", "text-hash": 18360382746077681451, "type": "paragraph" @@ -93235,30 +97657,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/88", "hash": 2845012065511066307, "orig": "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it. This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ. Being a cloud-based platform, our solution allows for these software assets to be detached from the main deployment and to be served by specialised vendors services which are certified to the latest industry requirements such as data-at-rest encryption, high availability, etc.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9599999785423279 - ], - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/118" } ], + "sref": "#/texts/79", "text": "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it. This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ. Being a cloud-based platform, our solution allows for these software assets to be detached from the main deployment and to be served by specialised vendors services which are certified to the latest industry requirements such as data-at-rest encryption, high availability, etc.", "text-hash": 5147922161190726703, "type": "paragraph" @@ -93267,30 +97671,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/89", "hash": 15072914837937068796, "orig": "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform. From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services. During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks. For example other result-backends didn't offer the auto-cleaning functionality offered by Redis and, before opting for a custom solution mixing MongoDB with an object storage, we evaluated other solutions as", - "properties": { - "data": [ - [ - "language", - "en", - 0.949999988079071 - ], - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/119" } ], + "sref": "#/texts/80", "text": "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform. From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services. During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks. For example other result-backends didn't offer the auto-cleaning functionality offered by Redis and, before opting for a custom solution mixing MongoDB with an object storage, we evaluated other solutions as", "text-hash": 6457975667604208730, "type": "paragraph" @@ -93299,30 +97685,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/91", "hash": 15263283599394646155, "orig": "the GridFS storage, but it didn't fit to the constraints of typical cloud environments.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9800000190734863 - ], - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/123" } ], + "sref": "#/texts/81", "text": "the GridFS storage, but it didn't fit to the constraints of typical cloud environments.", "text-hash": 6564180200469858791, "type": "paragraph" @@ -93331,30 +97699,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/92", "hash": 11417717357379295278, "orig": "4.2 Deployment", - "properties": { - "data": [ - [ - "language", - "en", - 0.8399999737739563 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/124" } ], + "sref": "#/texts/82", "text": "4.2 Deployment", "text-hash": 10410411375713696396, "type": "subtitle-level-1" @@ -93363,30 +97713,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/93", "hash": 9031137420247852045, "orig": "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution. Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", - "properties": { - "data": [ - [ - "language", - "en", - 0.8500000238418579 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/125" } ], + "sref": "#/texts/83", "text": "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution. Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", "text-hash": 17120327512656828009, "type": "paragraph" @@ -93395,30 +97727,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/94", "hash": 18436578077535696718, "orig": "The common parts of all deployments are the interface and the compute layer. The compute layer is designed for dynamically adapt the number of resources on the current load. For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9399999976158142 - ], - [ - "semantic", - "text", - 0.9399999976158142 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/126" } ], + "sref": "#/texts/84", "text": "The common parts of all deployments are the interface and the compute layer. The compute layer is designed for dynamically adapt the number of resources on the current load. For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents.", "text-hash": 8003240278028347820, "type": "paragraph" @@ -93427,30 +97741,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/95", "hash": 11734907767490759865, "orig": "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements. The parse component is indeed more demanding than the simple annotation components.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9100000262260437 - ], - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/127" } ], + "sref": "#/texts/85", "text": "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements. The parse component is indeed more demanding than the simple annotation components.", "text-hash": 14704352826439757333, "type": "paragraph" @@ -93459,30 +97755,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/96", "hash": 7845460979782401889, "orig": "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks. Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment. Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9399999976158142 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/128" } ], + "sref": "#/texts/86", "text": "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks. Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment. Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer.", "text-hash": 18296438351865061837, "type": "paragraph" @@ -93491,30 +97769,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/97", "hash": 17769988780693768120, "orig": "4.3 Scaling benchmarks", - "properties": { - "data": [ - [ - "language", - "en", - 0.38999998569488525 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/129" } ], + "sref": "#/texts/87", "text": "4.3 Scaling benchmarks", "text-hash": 8669715371308316950, "type": "subtitle-level-1" @@ -93523,30 +97783,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/98", "hash": 12387489643011067991, "orig": "Let us now discuss some scaling results on our platform. As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources. In Figure 7, we show the number of users and the number of processed PDF", - "properties": { - "data": [ - [ - "language", - "en", - 0.9300000071525574 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/130" } ], + "sref": "#/texts/88", "text": "Let us now discuss some scaling results on our platform. As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources. In Figure 7, we show the number of users and the number of processed PDF", "text-hash": 14043220598855238339, "type": "paragraph" @@ -93555,30 +97797,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/99", "hash": 10375772475809458895, "orig": "pages 20 as a function of time. As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017. It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time. Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9900000095367432 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/133" } ], + "sref": "#/texts/89", "text": "pages 20 as a function of time. As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017. It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time. Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", "text-hash": 11451664978555915307, "type": "paragraph" @@ -93587,30 +97811,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/100", "hash": 7054726458191881751, "orig": "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources. We show this scaling by displaying the speedup versus the number of worker nodes available. Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores. As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes. Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker. The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level. The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes. Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9399999976158142 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/134" } ], + "sref": "#/texts/90", "text": "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources. We show this scaling by displaying the speedup versus the number of worker nodes available. Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores. As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes. Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker. The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level. The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes. Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size.", "text-hash": 641132783909312643, "type": "paragraph" @@ -93619,30 +97825,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/101", "hash": 7794115281016062068, "orig": "5 CONCLUSION", - "properties": { - "data": [ - [ - "language", - "en", - 0.38999998569488525 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/135" } ], + "sref": "#/texts/91", "text": "5 CONCLUSION", "text-hash": 18347902420476900066, "type": "subtitle-level-1" @@ -93651,30 +97839,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/102", "hash": 7038163015905900647, "orig": "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation.", - "properties": { - "data": [ - [ - "language", - "en", - 0.9200000166893005 - ], - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/136" } ], + "sref": "#/texts/92", "text": "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation.", "text-hash": 657005981473069779, "type": "paragraph" @@ -93683,30 +97853,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/103", "hash": 1508626318915838319, "orig": "The fundamental design choices in our solution have proven to enable scaling in three elementary ways. First, it can service multiple users concurrently. Second, it can ingest, parse and apply machine learned models on many documents at the same time. Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources.", - "properties": { - "data": [ - [ - "language", - "en", - 0.949999988079071 - ], - [ - "semantic", - "text", - 0.9399999976158142 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/137" } ], + "sref": "#/texts/93", "text": "The fundamental design choices in our solution have proven to enable scaling in three elementary ways. First, it can service multiple users concurrently. Second, it can ingest, parse and apply machine learned models on many documents at the same time. Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources.", "text-hash": 1575427749670982603, "type": "paragraph" @@ -93715,30 +97867,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/104", "hash": 17247086344435786796, "orig": "In the future, we plan to extend the platform in two major areas. First, we would like to extend the number of microservices, especially with regard to image understanding. The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc). The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier. Second, we would like to improve the quality and performance of our default models. We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5). To leverage this growing use of deep learning models, we will additionally introduce", - "properties": { - "data": [ - [ - "language", - "en", - 0.9300000071525574 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/138" } ], + "sref": "#/texts/94", "text": "In the future, we plan to extend the platform in two major areas. First, we would like to extend the number of microservices, especially with regard to image understanding. The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc). The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier. Second, we would like to improve the quality and performance of our default models. We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5). To leverage this growing use of deep learning models, we will additionally introduce", "text-hash": 9192771730962863754, "type": "paragraph" @@ -93747,30 +97881,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/105", "hash": 10287541089279789496, "orig": "specialised data-parallelism in order to speed up the training and provide interactive user-customisation capabilities.", - "properties": { - "data": [ - [ - "language", - "en", - 0.8299999833106995 - ], - [ - "semantic", - "text", - 0.9399999976158142 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/140" } ], + "sref": "#/texts/95", "text": "specialised data-parallelism in order to speed up the training and provide interactive user-customisation capabilities.", "text-hash": 11530911151361059606, "type": "paragraph" @@ -93779,30 +97895,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/106", "hash": 7819882792760965882, "orig": "ACKNOWLEDGMENTS", - "properties": { - "data": [ - [ - "language", - "en", - 0.25 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/141" } ], + "sref": "#/texts/96", "text": "ACKNOWLEDGMENTS", "text-hash": 18322720810464861272, "type": "subtitle-level-1" @@ -93811,30 +97909,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/107", "hash": 15983582675278266440, "orig": "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system.", - "properties": { - "data": [ - [ - "language", - "en", - 0.949999988079071 - ], - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/142" } ], + "sref": "#/texts/97", "text": "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system.", "text-hash": 5556222901900980902, "type": "paragraph" @@ -93843,30 +97923,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/108", "hash": 12711351442546714716, "orig": "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation. MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", - "properties": { - "data": [ - [ - "language", - "en", - 0.9300000071525574 - ], - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/143" } ], + "sref": "#/texts/98", "text": "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation. MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", "text-hash": 13431247303555599034, "type": "paragraph" @@ -93875,30 +97937,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/109", "hash": 1225384713519841338, "orig": "REFERENCES", - "properties": { - "data": [ - [ - "language", - "en", - 0.33000001311302185 - ], - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/144" } ], + "sref": "#/texts/99", "text": "REFERENCES", "text-hash": 1858797456585454232, "type": "subtitle-level-1" @@ -93907,30 +97951,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/110", "hash": 1712774266196702392, "orig": "[1] A. Antonacopoulos, C. Clausner, C. Papadopoulos, and S. Pletschacher. 2015. ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015. In Proceedings of the 13th International Conference on Document Analysis and Recognition (ICDAR2015). Nancy, 1151-1155.", - "properties": { - "data": [ - [ - "language", - "en", - 0.6499999761581421 - ], - [ - "semantic", - "reference", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/145" } ], + "sref": "#/texts/100", "text": "[1] A. Antonacopoulos, C. Clausner, C. Papadopoulos, and S. Pletschacher. 2015. ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015. In Proceedings of the 13th International Conference on Document Analysis and Recognition (ICDAR2015). Nancy, 1151-1155.", "text-hash": 1659105420801451542, "type": "paragraph" @@ -93939,30 +97965,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/111", "hash": 14718288547983000340, "orig": "[2] Leo Breiman. 2001. Random Forests. Machine Learning 45, 1 (01 Oct 2001), 5-32. https://doi.org/10.1023/A:1010933404324", - "properties": { - "data": [ - [ - "language", - "en", - 0.5799999833106995 - ], - [ - "semantic", - "text", - 0.6100000143051147 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/146" } ], + "sref": "#/texts/101", "text": "[2] Leo Breiman. 2001. Random Forests. Machine Learning 45, 1 (01 Oct 2001), 5-32. https://doi.org/10.1023/A:1010933404324", "text-hash": 6812664208788567426, "type": "paragraph" @@ -93971,30 +97979,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/112", "hash": 16943780574244090186, "orig": "[3] R. Cattoni, T. Coianiz, S. Messelodi, and C. M. Modena. 1998. Geometric layout analysis techniques for document image understanding: a review. Technical Report.", - "properties": { - "data": [ - [ - "language", - "en", - 0.6700000166893005 - ], - [ - "semantic", - "reference", - 0.7799999713897705 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/147" } ], + "sref": "#/texts/102", "text": "[3] R. Cattoni, T. Coianiz, S. Messelodi, and C. M. Modena. 1998. Geometric layout analysis techniques for document image understanding: a review. Technical Report.", "text-hash": 9486476535199015848, "type": "paragraph" @@ -94003,30 +97993,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/113", "hash": 8004985786049140169, "orig": "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier. 2005. From Legacy Documents to XML: A Conversion Framework. Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103. https://doi.org/10.1007/11551362_9", - "properties": { - "data": [ - [ - "language", - "en", - 0.3400000035762787 - ], - [ - "semantic", - "text", - 0.49000000953674316 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/148" } ], + "sref": "#/texts/103", "text": "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier. 2005. From Legacy Documents to XML: A Conversion Framework. Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103. https://doi.org/10.1007/11551362_9", "text-hash": 18434854666592634661, "type": "paragraph" @@ -94035,30 +98007,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/114", "hash": 12744546813104546377, "orig": "[5] Ross Girshick. 2015. Fast R-CNN. In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15). IEEE Computer Society, Washington, DC, USA, 1440-1448. https://doi.org/10.1109/ICCV.2015.169", - "properties": { - "data": [ - [ - "language", - "en", - 0.47999998927116394 - ], - [ - "semantic", - "text", - 0.6100000143051147 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/149" } ], + "sref": "#/texts/104", "text": "[5] Ross Girshick. 2015. Fast R-CNN. In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15). IEEE Computer Society, Washington, DC, USA, 1440-1448. https://doi.org/10.1109/ICCV.2015.169", "text-hash": 13406949228208477349, "type": "paragraph" @@ -94067,30 +98021,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/115", "hash": 16061746189176848219, "orig": "[6] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. 2013. Rich feature hierarchies for accurate object detection and semantic segmentation. CoRR abs/1311.2524 (2013). arXiv:1311.2524 http://arxiv.org/abs/1311.2524", - "properties": { - "data": [ - [ - "language", - "en", - 0.6299999952316284 - ], - [ - "semantic", - "reference", - 0.5799999833106995 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/150" } ], + "sref": "#/texts/105", "text": "[6] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. 2013. Rich feature hierarchies for accurate object detection and semantic segmentation. CoRR abs/1311.2524 (2013). arXiv:1311.2524 http://arxiv.org/abs/1311.2524", "text-hash": 5756829059313082807, "type": "paragraph" @@ -94099,30 +98035,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/116", "hash": 11872392946390819176, "orig": "[7] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C. Berg. 2016. SSD: Single Shot MultiBox Detector. Springer International Publishing, Cham, 21-37. https://doi.org/10. 1007/978-3-319-46448-0_2", - "properties": { - "data": [ - [ - "language", - "en", - 0.38999998569488525 - ], - [ - "semantic", - "reference", - 0.6000000238418579 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/151" } ], + "sref": "#/texts/106", "text": "[7] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C. Berg. 2016. SSD: Single Shot MultiBox Detector. Springer International Publishing, Cham, 21-37. https://doi.org/10. 1007/978-3-319-46448-0_2", "text-hash": 14270091870781297606, "type": "paragraph" @@ -94131,30 +98049,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/117", "hash": 2956849475535726296, "orig": "[8] Joseph Redmon, Santosh Kumar Divvala, Ross B. Girshick, and Ali Farhadi. 2016. You Only Look Once: Unified, Real-Time Object Detection. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016), 779-788.", - "properties": { - "data": [ - [ - "language", - "en", - 0.6299999952316284 - ], - [ - "semantic", - "reference", - 0.7900000214576721 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/152" } ], + "sref": "#/texts/107", "text": "[8] Joseph Redmon, Santosh Kumar Divvala, Ross B. Girshick, and Ali Farhadi. 2016. You Only Look Once: Unified, Real-Time Object Detection. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016), 779-788.", "text-hash": 4738468948628789302, "type": "paragraph" @@ -94163,30 +98063,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/118", "hash": 6623297047995432604, "orig": "[9] Joseph Redmon and Ali Farhadi. 2016. YOLO9000: Better, Faster, Stronger. arXiv preprint arXiv:1612.08242 (2016).", - "properties": { - "data": [ - [ - "language", - "en", - 0.4399999976158142 - ], - [ - "semantic", - "reference", - 0.6899999976158142 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/153" } ], + "sref": "#/texts/108", "text": "[9] Joseph Redmon and Ali Farhadi. 2016. YOLO9000: Better, Faster, Stronger. arXiv preprint arXiv:1612.08242 (2016).", "text-hash": 15195146357792776186, "type": "paragraph" @@ -94195,30 +98077,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/119", "hash": 2507285765516108280, "orig": "[10] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In Advances in Neural Information Processing Systems 28, C. Cortes, N. D. Lawrence, D. D. Lee, M. Sugiyama, and R. Garnett (Eds.). Curran Associates, Inc., 91-99. http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks. pdf", - "properties": { - "data": [ - [ - "language", - "en", - 0.5899999737739563 - ], - [ - "semantic", - "reference", - 0.6800000071525574 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/154" } ], + "sref": "#/texts/109", "text": "[10] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In Advances in Neural Information Processing Systems 28, C. Cortes, N. D. Lawrence, D. D. Lee, M. Sugiyama, and R. Garnett (Eds.). Curran Associates, Inc., 91-99. http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks. pdf", "text-hash": 5476658171803931478, "type": "paragraph" @@ -94227,30 +98091,12 @@ "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/120", "hash": 14905276480471286920, "orig": "[11] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. 2018. Corpus Conversion Service poster at the SysML conference. http://www.sysml.cc/doc/ 76.pdf", - "properties": { - "data": [ - [ - "language", - "en", - 0.47999998927116394 - ], - [ - "semantic", - "reference", - 0.8899999856948853 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/155" } ], + "sref": "#/texts/110", "text": "[11] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. 2018. Corpus Conversion Service poster at the SysML conference. http://www.sysml.cc/doc/ 76.pdf", "text-hash": 6922174983558886886, "type": "paragraph" diff --git a/tests/data/docs/doc_01.nlp.json b/tests/data/docs/doc_01.nlp.json index 52989428..82142ef9 100644 --- a/tests/data/docs/doc_01.nlp.json +++ b/tests/data/docs/doc_01.nlp.json @@ -871,6 +871,88 @@ "type": "figure" } ], + "file-info": { + "#-pages": 15, + "document-hash": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d", + "filename": "Applied AI Letters - 2020 - Staar.pdf", + "page-hashes": [ + { + "hash": "365f2c5695b6aa22d2096bc88ab18f76b2c7f3eed6359d68af14a6aa7916ce70", + "model": "model", + "page": 1 + }, + { + "hash": "4fe5aedd95efb3beca7a5b39c9d9883838b8f5ac746d167c3fbfb4e9848d4a7e", + "model": "model", + "page": 2 + }, + { + "hash": "f3d1a1b3bad4dce4ab96bdf3e3fe105a7814009d24bacbfe5b90737979503e01", + "model": "model", + "page": 3 + }, + { + "hash": "dd8ebce06c6e2bec4a188b005eab757b6a222b7ae632eeec943767dbac6573b8", + "model": "model", + "page": 4 + }, + { + "hash": "0ebc61b0f17fd0a49cb6fd809500169f2fb13a27f15dfd345e8099c7e45491ed", + "model": "model", + "page": 5 + }, + { + "hash": "39a00bc2dd440023ebc606e26683a79bf9f6806f61af143cb1edfd03eba28a14", + "model": "model", + "page": 6 + }, + { + "hash": "5cfb4312275e9ed166e878f8b557709d831899591c7414facbf8d0fdd732d4ce", + "model": "model", + "page": 7 + }, + { + "hash": "71916575c81d0609e6862ac97c00dad85d1ec468262ea3934db42b8f13f0e7e7", + "model": "model", + "page": 8 + }, + { + "hash": "df3fce65445de0d07ebfda8634f70566417a0fcdd249c0120056aa082f1b1733", + "model": "model", + "page": 9 + }, + { + "hash": "03a8792b4ca1b1e832dec33cd23345c446d214f94b29e174946e062d890555e0", + "model": "model", + "page": 10 + }, + { + "hash": "b69f19266a3247e8e43ce2fdefba2018e63d827c38c8758ee70c51426a78c651", + "model": "model", + "page": 11 + }, + { + "hash": "53eb4a9c44cc911454a0a1bc45ffc8261737cdb47f43e2d97cc477ff1912c2a5", + "model": "model", + "page": 12 + }, + { + "hash": "56679ab44cd3f3e11de5886ccf872fb064cee7916809fa78e02a87cfab71a8e1", + "model": "model", + "page": 13 + }, + { + "hash": "6888730ff9f5a52a88ec198c50365f020fc78bca19bbdd1482bd5059172da68c", + "model": "model", + "page": 14 + }, + { + "hash": "0ff9d40eaca3f08f5b36c3644bf466c37a0644bd71a71066425200792dcff82d", + "model": "model", + "page": 15 + } + ] + }, "footnotes": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/footnotes/0", @@ -905,130 +987,172 @@ "instances": { "data": [ [ - "name", - "person-name", - 16781763356419781679, + "sentence", + "", + 16949854269270315165, "TEXT", - "#/texts/2", + "#/texts/94", 1.0, - 4686361850733567621, - 14538190648130419824, + 1859492819924485121, + 10838117205519727135, 18446744073709551615, 18446744073709551615, 0, - 17, + 128, 0, - 17, + 128, 0, - 6, + 20, true, - "Peter W J Staar", - "Peter W. J. Staar" + "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions.", + "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions." ], [ - "numval", - "ival", - 4017434568255781081, + "term", + "single-term", + 16949854269270315165, "TEXT", - "#/texts/8", + "#/texts/94", 1.0, - 17767354399704235156, - 7351830786655903422, + 17889054130498802051, + 13611413549729115921, 18446744073709551615, 18446744073709551615, - 62, - 63, - 62, - 63, - 12, - 13, + 27, + 44, + 27, + 44, + 5, + 7, true, - "4", - "4" + "fundamental types", + "fundamental types" ], [ - "numval", - "ival", - 4017434568255781081, + "term", + "single-term", + 16949854269270315165, "TEXT", - "#/texts/8", + "#/texts/94", 1.0, - 389609625533568071, - 13739132446336686651, + 3534171294115941544, + 8731026536612016164, 18446744073709551615, 18446744073709551615, - 65, - 69, - 65, - 69, - 14, - 15, + 48, + 57, + 48, + 57, + 8, + 9, true, - "8820", - "8820" + "worktasks", + "worktasks" ], [ - "link", - "email", - 4017434568255781081, + "expression", + "word-concatenation", + 16949854269270315165, "TEXT", - "#/texts/8", + "#/texts/94", 1.0, - 11117094662504195367, - 8034517963455466339, + 15221896740599576202, + 7666904121768591309, + 18446744073709551615, + 18446744073709551615, + 59, + 73, + 59, + 73, + 10, + 11, + true, + "node-retrieval", + "node-retrieval" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 3503811091434006699, + 4368860458480451668, + 18446744073709551615, + 18446744073709551615, + 75, + 84, + 75, + 84, + 12, + 13, + true, + "traversal", + "traversal" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 16654294478124171317, + 10151652501900860692, 18446744073709551615, 18446744073709551615, + 86, 103, - 121, + 86, 103, - 121, - 21, - 26, + 14, + 16, true, - "taa@zurich.ibm.com", - "taa@zurich.ibm.com" + "logical operators", + "logical operators" ], [ - "name", - "person-name", - 4017434568255781081, + "term", + "single-term", + 16949854269270315165, "TEXT", - "#/texts/8", + "#/texts/94", 1.0, - 9807900919297989315, - 8857913618678092312, + 11555096374369856312, + 7157942907653228754, 18446744073709551615, 18446744073709551615, - 0, - 32, - 0, - 32, - 0, - 7, + 108, + 127, + 108, + 127, + 17, + 19, true, - "Correspondence Peter W J Staar", - "Correspondence Peter W. J. Staar" + "transform functions", + "transform functions" ], [ "sentence", "", - 4017434568255781081, + 16949854269270315165, "TEXT", - "#/texts/8", + "#/texts/94", 1.0, - 1463783400548512489, - 4562795260271874000, + 4963035477772371835, + 4020325737246968829, 18446744073709551615, 18446744073709551615, - 0, - 95, - 0, - 95, - 0, - 19, + 129, + 262, + 129, + 262, + 20, + 44, true, - "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland.", - "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland." + "In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design.", + "In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design." ], [ "term", @@ -1054,23 +1178,23 @@ [ "term", "single-term", - 4017434568255781081, + 16949854269270315165, "TEXT", - "#/texts/8", + "#/texts/94", 1.0, - 497725968887992147, - 15543972956793692858, + 17030057430150962643, + 11687865223449973507, 18446744073709551615, 18446744073709551615, - 48, - 61, - 48, - 61, - 11, - 12, + 136, + 154, + 136, + 154, + 22, + 24, true, - "Saumerstrasse", - "Saumerstrasse" + "following sections", + "following sections" ], [ "term", @@ -1096,23 +1220,23 @@ [ "term", "single-term", - 4017434568255781081, + 16949854269270315165, "TEXT", - "#/texts/8", + "#/texts/94", 1.0, - 2664439525053388608, - 478252263928496257, + 16381206568246674273, + 3558057784302965696, 18446744073709551615, 18446744073709551615, - 83, - 94, - 83, - 94, - 17, - 18, + 175, + 181, + 175, + 181, + 29, + 30, true, - "Switzerland", - "Switzerland" + "detail", + "detail" ], [ "term", @@ -1136,25 +1260,25 @@ "Email" ], [ - "parenthesis", - "round brackets", - 11695737263227886476, + "term", + "single-term", + 16949854269270315165, "TEXT", - "#/texts/10", + "#/texts/94", 1.0, - 329104053210154735, - 17075323869805573137, + 3534171294115941544, + 8731026536612028033, 18446744073709551615, 18446744073709551615, - 601, - 606, - 601, - 606, - 99, - 102, + 190, + 199, + 190, + 199, + 32, + 33, true, - "(CPS)", - "(CPS)" + "worktasks", + "worktasks" ], [ "expression", @@ -1178,25 +1302,25 @@ "etc." ], [ - "expression", - "word-concatenation", - 11695737263227886476, + "numval", + "ival", + 18391264192891079539, "TEXT", - "#/texts/10", + "#/texts/95", 1.0, - 5044385734724420019, - 12757516288413416407, + 14654386914267794441, + 12796143052106760105, 18446744073709551615, 18446744073709551615, - 821, - 837, - 821, - 837, - 140, - 141, + 0, + 8, + 0, + 8, + 0, + 1, true, - "state-of-the-art", - "state-of-the-art" + "26895595", + "26895595" ], [ "expression", @@ -1220,277 +1344,277 @@ "endto-end" ], [ - "expression", - "word-concatenation", - 11695737263227886476, + "numval", + "year", + 18391264192891079539, "TEXT", - "#/texts/10", + "#/texts/95", 1.0, - 15984801488078789848, - 11443881616252239060, + 389609625548777262, + 8826555294676663632, 18446744073709551615, 18446744073709551615, - 1573, - 1583, - 1573, - 1583, - 250, - 251, + 10, + 14, + 10, + 14, + 2, + 3, true, - "real-world", - "real-world" + "2020", + "2020" ], [ - "sentence", - "", - 11695737263227886476, + "numval", + "ival", + 18391264192891079539, "TEXT", - "#/texts/10", + "#/texts/95", 1.0, - 2370655382906505271, - 8040324972313183116, + 17767354399704235162, + 7753390158484899261, 18446744073709551615, 18446744073709551615, - 0, - 123, - 0, - 123, - 0, - 21, + 16, + 17, + 16, + 17, + 4, + 5, true, - "Knowledge Graphs have been fast emerging as the de facto standard to model and explore knowledge in weakly structured data.", - "Knowledge Graphs have been fast emerging as the de facto standard to model and explore knowledge in weakly structured data." + "2", + "2" ], [ "sentence", "", - 11695737263227886476, + 18391264192891079539, "TEXT", - "#/texts/10", + "#/texts/95", 1.0, - 8027272490911089522, - 7702147940060331105, + 10933383461306782608, + 10178418358179275356, 18446744073709551615, 18446744073709551615, - 124, - 261, - 124, - 261, - 21, - 43, + 19, + 125, + 19, + 125, + 6, + 16, true, - "Large corpora of documents constitute a source of weakly structured data of particular interest for both the academic and business world.", - "Large corpora of documents constitute a source of weakly structured data of particular interest for both the academic and business world." + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." ], [ - "sentence", - "", - 11695737263227886476, + "link", + "url", + 18391264192891079539, "TEXT", - "#/texts/10", + "#/texts/95", 1.0, - 17559485512387879488, - 4960776794400025005, + 8536069645534292969, + 16063604623463467342, 18446744073709551615, 18446744073709551615, - 262, - 469, - 262, - 469, - 43, - 76, + 35, + 87, + 35, + 87, + 8, + 10, true, - "Key examples include scientific publications, technical reports, manuals, patents, regulations, etc. Such corpora embed many facts that are elementary to critical decision making or enabling new discoveries.", - "Key examples include scientific publications, technical reports, manuals, patents, regulations, etc. Such corpora embed many facts that are elementary to critical decision making or enabling new discoveries." + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," ], [ - "sentence", - "", - 11695737263227886476, + "expression", + "wtoken-concatenation", + 18391264192891079539, "TEXT", - "#/texts/10", + "#/texts/95", 1.0, - 3570937525268539532, - 11346171061679122962, + 3856967589249015473, + 3576147774941915841, 18446744073709551615, 18446744073709551615, - 470, - 607, - 470, - 607, - 76, - 103, + 35, + 86, + 35, + 86, + 8, + 9, true, - "In this paper, we present a scalable cloud platform to create and serve Knowledge Graphs, which we named corpus processing service (CPS).", - "In this paper, we present a scalable cloud platform to create and serve Knowledge Graphs, which we named corpus processing service (CPS)." + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" ], [ - "sentence", - "", - 11695737263227886476, + "link", + "doi", + 18391264192891079539, "TEXT", - "#/texts/10", + "#/texts/95", 1.0, - 13906125717568729148, - 4155905020420410366, + 1697220653346092555, + 8458710314769009562, 18446744073709551615, 18446744073709551615, - 608, - 793, - 608, - 793, - 103, - 134, - true, - "Its purpose is to process large document corpora, extract the content and embedded facts, and ultimately represent these in a consistent knowledge graph that can be intuitively queried.", - "Its purpose is to process large document corpora, extract the content and embedded facts, and ultimately represent these in a consistent knowledge graph that can be intuitively queried." + 67, + 87, + 67, + 87, + 8, + 10, + false, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," ], [ - "sentence", - "", - 11695737263227886476, + "numval", + "fval", + 18391264192891079539, "TEXT", - "#/texts/10", + "#/texts/95", 1.0, - 7674845204641058037, - 6672554339198903999, + 8104408072666212335, + 13552219042525319352, 18446744073709551615, 18446744073709551615, - 794, - 1004, - 794, - 1004, - 134, - 162, - true, - "To accomplish this, we use state-of-the-art natural language understanding models to extract entities and relationships from documents converted with our previously presented corpus conversion service platform.", - "To accomplish this, we use state-of-the-art natural language understanding models to extract entities and relationships from documents converted with our previously presented corpus conversion service platform." - ], - [ - "sentence", - "", - 11695737263227886476, - "TEXT", - "#/texts/10", + 71, + 78, + 71, + 78, + 8, + 8, + false, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/95", 1.0, - 3532957815608940811, - 14429112738710635391, + 389609625548868096, + 8826558551385119058, 18446744073709551615, 18446744073709551615, - 1005, - 1171, - 1005, - 1171, - 162, - 185, - true, - "This pipeline is complemented with a newly developed graph engine which ensures extremely performant graph queries and provides powerful graph analytics capabilities.", - "This pipeline is complemented with a newly developed graph engine which ensures extremely performant graph queries and provides powerful graph analytics capabilities." + 82, + 86, + 82, + 86, + 8, + 9, + false, + "2.20", + "2.20" ], [ - "sentence", - "", - 11695737263227886476, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/10", + "#/texts/95", 1.0, - 9674378140136415946, - 14302529272335550558, + 12466457873768409517, + 3430070082404029638, 18446744073709551615, 18446744073709551615, - 1172, - 1256, - 1172, - 1256, - 185, - 199, + 88, + 108, + 88, + 108, + 10, + 13, true, - "Both components are tightly integrated and can be easily consumed through REST APIs.", - "Both components are tightly integrated and can be easily consumed through REST APIs." + "Wiley Online Library", + "Wiley Online Library" ], [ - "sentence", - "", - 11695737263227886476, + "parenthesis", + "square brackets", + 18391264192891079539, "TEXT", - "#/texts/10", + "#/texts/95", 1.0, - 4006066418266254732, - 8099847092788323681, + 15691754593896323724, + 15433429984583237828, 18446744073709551615, 18446744073709551615, - 1257, - 1391, - 1257, - 1391, - 199, - 220, + 112, + 124, + 112, + 124, + 14, + 15, true, - "Additionally, we provide user interfaces to control the data ingestion flow and formulate queries using a visual programming approach.", - "Additionally, we provide user interfaces to control the data ingestion flow and formulate queries using a visual programming approach." + "[23/08/2023]", + "[23/08/2023]" ], [ - "sentence", - "", - 11695737263227886476, + "expression", + "wtoken-concatenation", + 18391264192891079539, "TEXT", - "#/texts/10", + "#/texts/95", 1.0, - 6036810454349605181, - 11082321410160481613, + 15691754593896323724, + 15433429984583237828, 18446744073709551615, 18446744073709551615, - 1392, - 1487, - 1392, - 1487, - 220, - 235, + 112, + 124, + 112, + 124, + 14, + 15, true, - "The CPS platform is designed as a modular microservice system operating on Kubernetes clusters.", - "The CPS platform is designed as a modular microservice system operating on Kubernetes clusters." + "[23/08/2023]", + "[23/08/2023]" ], [ - "sentence", - "", - 11695737263227886476, + "numval", + "ival", + 18391264192891079539, "TEXT", - "#/texts/10", + "#/texts/95", 1.0, - 9891169339298843383, - 12261378132459206353, + 15441160910541481791, + 3518619573290839093, 18446744073709551615, 18446744073709551615, - 1488, - 1624, - 1488, - 1624, - 235, - 259, - true, - "Finally, we validate the quality of queries on our endto-end knowledge pipeline in a real-world application in the oil and gas industry.", - "Finally, we validate the quality of queries on our endto-end knowledge pipeline in a real-world application in the oil and gas industry." + 113, + 115, + 113, + 115, + 14, + 14, + false, + "23", + "23" ], [ - "term", - "enum-term-mark-2", - 11695737263227886476, + "numval", + "ival", + 18391264192891079539, "TEXT", - "#/texts/10", + "#/texts/95", 1.0, - 848781837929279741, - 6552561416683889377, + 15441160910541481543, + 3518617976696906498, 18446744073709551615, 18446744073709551615, - 1603, - 1623, - 1603, - 1623, - 254, - 258, - true, - "oil and gas industry", - "oil and gas industry" + 116, + 118, + 116, + 118, + 14, + 14, + false, + "08", + "08" ], [ "term", @@ -1513,6 +1637,27 @@ "entities and relationships", "entities and relationships" ], + [ + "sentence", + "", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 2370655382906505271, + 8040324972313183116, + 18446744073709551615, + 18446744073709551615, + 0, + 123, + 0, + 123, + 0, + 21, + true, + "Knowledge Graphs have been fast emerging as the de facto standard to model and explore knowledge in weakly structured data.", + "Knowledge Graphs have been fast emerging as the de facto standard to model and explore knowledge in weakly structured data." + ], [ "term", "single-term", @@ -1534,6 +1679,90 @@ "Knowledge Graphs", "Knowledge Graphs" ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 329104161610777240, + 6943276019110900001, + 18446744073709551615, + 18446744073709551615, + 69, + 74, + 69, + 74, + 12, + 13, + true, + "model", + "model" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 6184122545182835014, + 10337587533357109733, + 18446744073709551615, + 18446744073709551615, + 87, + 96, + 87, + 96, + 15, + 16, + true, + "knowledge", + "knowledge" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 389609625696431489, + 6015166006560019356, + 18446744073709551615, + 18446744073709551615, + 118, + 122, + 118, + 122, + 19, + 20, + true, + "data", + "data" + ], + [ + "sentence", + "", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 8027272490911089522, + 7702147940060331105, + 18446744073709551615, + 18446744073709551615, + 124, + 261, + 124, + 261, + 21, + 43, + true, + "Large corpora of documents constitute a source of weakly structured data of particular interest for both the academic and business world.", + "Large corpora of documents constitute a source of weakly structured data of particular interest for both the academic and business world." + ], [ "term", "single-term", @@ -1555,6 +1784,69 @@ "Large corpora", "Large corpora" ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 6167933651658664291, + 7017623091478883550, + 18446744073709551615, + 18446744073709551615, + 141, + 150, + 141, + 150, + 24, + 25, + true, + "documents", + "documents" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 16381206579112188113, + 11795690975934241678, + 18446744073709551615, + 18446744073709551615, + 164, + 170, + 164, + 170, + 27, + 28, + true, + "source", + "source" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 389609625696431489, + 6015166006560022281, + 18446744073709551615, + 18446744073709551615, + 192, + 196, + 192, + 196, + 31, + 32, + true, + "data", + "data" + ], [ "term", "single-term", @@ -1597,6 +1889,27 @@ "business world", "business world" ], + [ + "sentence", + "", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 17559485512387879488, + 4960776794400025005, + 18446744073709551615, + 18446744073709551615, + 262, + 469, + 262, + 469, + 43, + 76, + true, + "Key examples include scientific publications, technical reports, manuals, patents, regulations, etc. Such corpora embed many facts that are elementary to critical decision making or enabling new discoveries.", + "Key examples include scientific publications, technical reports, manuals, patents, regulations, etc. Such corpora embed many facts that are elementary to critical decision making or enabling new discoveries." + ], [ "term", "single-term", @@ -1660,6 +1973,69 @@ "technical reports", "technical reports" ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 8106464587474035829, + 599250081059610946, + 18446744073709551615, + 18446744073709551615, + 327, + 334, + 327, + 334, + 52, + 53, + true, + "manuals", + "manuals" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 8106479143938802112, + 3741013633356507891, + 18446744073709551615, + 18446744073709551615, + 336, + 343, + 336, + 343, + 54, + 55, + true, + "patents", + "patents" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 4973525406703593304, + 5700149770998543624, + 18446744073709551615, + 18446744073709551615, + 345, + 356, + 345, + 356, + 56, + 57, + true, + "regulations", + "regulations" + ], [ "term", "single-term", @@ -1744,6 +2120,48 @@ "new discoveries", "new discoveries" ], + [ + "sentence", + "", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 3570937525268539532, + 11346171061679122962, + 18446744073709551615, + 18446744073709551615, + 470, + 607, + 470, + 607, + 76, + 103, + true, + "In this paper, we present a scalable cloud platform to create and serve Knowledge Graphs, which we named corpus processing service (CPS).", + "In this paper, we present a scalable cloud platform to create and serve Knowledge Graphs, which we named corpus processing service (CPS)." + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 329104161668023890, + 6940026313184513359, + 18446744073709551615, + 18446744073709551615, + 478, + 483, + 478, + 483, + 78, + 79, + true, + "paper", + "paper" + ], [ "term", "single-term", @@ -1808,25 +2226,25 @@ "corpus processing service" ], [ - "term", - "single-term", + "parenthesis", + "round brackets", 11695737263227886476, "TEXT", "#/texts/10", 1.0, - 10668868939620055202, - 14232732675773488092, + 329104053210154735, + 17075323869805573137, 18446744073709551615, 18446744073709551615, - 634, - 656, - 634, - 656, - 108, - 111, + 601, + 606, + 601, + 606, + 99, + 102, true, - "large document corpora", - "large document corpora" + "(CPS)", + "(CPS)" ], [ "term", @@ -1835,18 +2253,123 @@ "TEXT", "#/texts/10", 1.0, - 6703089473517255637, - 7569105345513072239, + 12178341415896222428, + 9671093415367483529, 18446744073709551615, 18446744073709551615, - 682, - 696, - 682, - 696, - 116, - 118, - true, - "embedded facts", + 602, + 605, + 602, + 605, + 100, + 101, + true, + "CPS", + "CPS" + ], + [ + "sentence", + "", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 13906125717568729148, + 4155905020420410366, + 18446744073709551615, + 18446744073709551615, + 608, + 793, + 608, + 793, + 103, + 134, + true, + "Its purpose is to process large document corpora, extract the content and embedded facts, and ultimately represent these in a consistent knowledge graph that can be intuitively queried.", + "Its purpose is to process large document corpora, extract the content and embedded facts, and ultimately represent these in a consistent knowledge graph that can be intuitively queried." + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 8106479265948440982, + 351105263671880898, + 18446744073709551615, + 18446744073709551615, + 612, + 619, + 612, + 619, + 104, + 105, + true, + "purpose", + "purpose" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 10668868939620055202, + 14232732675773488092, + 18446744073709551615, + 18446744073709551615, + 634, + 656, + 634, + 656, + 108, + 111, + true, + "large document corpora", + "large document corpora" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 8106398484416916345, + 1095125247314724175, + 18446744073709551615, + 18446744073709551615, + 670, + 677, + 670, + 677, + 114, + 115, + true, + "content", + "content" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 6703089473517255637, + 7569105345513072239, + 18446744073709551615, + 18446744073709551615, + 682, + 696, + 682, + 696, + 116, + 118, + true, + "embedded facts", "embedded facts" ], [ @@ -1870,6 +2393,90 @@ "consistent knowledge graph", "consistent knowledge graph" ], + [ + "sentence", + "", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 7674845204641058037, + 6672554339198903999, + 18446744073709551615, + 18446744073709551615, + 794, + 1004, + 794, + 1004, + 134, + 162, + true, + "To accomplish this, we use state-of-the-art natural language understanding models to extract entities and relationships from documents converted with our previously presented corpus conversion service platform.", + "To accomplish this, we use state-of-the-art natural language understanding models to extract entities and relationships from documents converted with our previously presented corpus conversion service platform." + ], + [ + "expression", + "wtoken-concatenation", + 9802652237802670052, + "TEXT", + "#/texts/97", + 1.0, + 329104147725285867, + 13023020285713349824, + 18446744073709551615, + 18446744073709551615, + 0, + 5, + 0, + 5, + 0, + 1, + true, + "3.3.1", + "3.3.1" + ], + [ + "numval", + "fval", + 9802652237802670052, + "TEXT", + "#/texts/97", + 1.0, + 12178341415896435196, + 198388536621247129, + 18446744073709551615, + 18446744073709551615, + 0, + 3, + 0, + 3, + 0, + 0, + false, + "3.3", + "3.3" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 4315218641775224883, + 11583210972753095337, + 18446744073709551615, + 18446744073709551615, + 1467, + 1486, + 1467, + 1486, + 232, + 234, + true, + "Kubernetes clusters", + "Kubernetes clusters" + ], [ "term", "single-term", @@ -1891,6 +2498,111 @@ "state-of-the-art natural language", "state-of-the-art natural language" ], + [ + "expression", + "word-concatenation", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 5044385734724420019, + 12757516288413416407, + 18446744073709551615, + 18446744073709551615, + 821, + 837, + 821, + 837, + 140, + 141, + true, + "state-of-the-art", + "state-of-the-art" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 16381206567230470443, + 11705694158167462403, + 18446744073709551615, + 18446744073709551615, + 869, + 875, + 869, + 875, + 144, + 145, + true, + "models", + "models" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 14652256560445338257, + 7220701613896570103, + 18446744073709551615, + 18446744073709551615, + 887, + 895, + 887, + 895, + 147, + 148, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 8279380567349713241, + 5428767239015427768, + 18446744073709551615, + 18446744073709551615, + 900, + 913, + 900, + 913, + 149, + 150, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 6167933651658664291, + 7017623091478798627, + 18446744073709551615, + 18446744073709551615, + 919, + 928, + 919, + 928, + 151, + 152, + true, + "documents", + "documents" + ], [ "term", "single-term", @@ -1912,6 +2624,48 @@ "corpus conversion service platform", "corpus conversion service platform" ], + [ + "sentence", + "", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 3532957815608940811, + 14429112738710635391, + 18446744073709551615, + 18446744073709551615, + 1005, + 1171, + 1005, + 1171, + 162, + 185, + true, + "This pipeline is complemented with a newly developed graph engine which ensures extremely performant graph queries and provides powerful graph analytics capabilities.", + "This pipeline is complemented with a newly developed graph engine which ensures extremely performant graph queries and provides powerful graph analytics capabilities." + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 14814125852840540191, + 6714967147835883438, + 18446744073709551615, + 18446744073709551615, + 1010, + 1018, + 1010, + 1018, + 163, + 164, + true, + "pipeline", + "pipeline" + ], [ "term", "single-term", @@ -1975,6 +2729,48 @@ "powerful graph analytics capabilities", "powerful graph analytics capabilities" ], + [ + "sentence", + "", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 9674378140136415946, + 14302529272335550558, + 18446744073709551615, + 18446744073709551615, + 1172, + 1256, + 1172, + 1256, + 185, + 199, + true, + "Both components are tightly integrated and can be easily consumed through REST APIs.", + "Both components are tightly integrated and can be easily consumed through REST APIs." + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 2703018952916355661, + 7441154421291581585, + 18446744073709551615, + 18446744073709551615, + 1177, + 1187, + 1177, + 1187, + 186, + 187, + true, + "components", + "components" + ], [ "term", "single-term", @@ -1996,6 +2792,27 @@ "REST APIs", "REST APIs" ], + [ + "sentence", + "", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 4006066418266254732, + 8099847092788323681, + 18446744073709551615, + 18446744073709551615, + 1257, + 1391, + 1257, + 1391, + 199, + 220, + true, + "Additionally, we provide user interfaces to control the data ingestion flow and formulate queries using a visual programming approach.", + "Additionally, we provide user interfaces to control the data ingestion flow and formulate queries using a visual programming approach." + ], [ "term", "single-term", @@ -2038,6 +2855,27 @@ "data ingestion flow", "data ingestion flow" ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 8106477782290185579, + 13397737841409408978, + 18446744073709551615, + 18446744073709551615, + 1347, + 1354, + 1347, + 1354, + 213, + 214, + true, + "queries", + "queries" + ], [ "term", "single-term", @@ -2059,6 +2897,27 @@ "visual programming approach", "visual programming approach" ], + [ + "sentence", + "", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 6036810454349605181, + 11082321410160481613, + 18446744073709551615, + 18446744073709551615, + 1392, + 1487, + 1392, + 1487, + 220, + 235, + true, + "The CPS platform is designed as a modular microservice system operating on Kubernetes clusters.", + "The CPS platform is designed as a modular microservice system operating on Kubernetes clusters." + ], [ "term", "single-term", @@ -2101,6 +2960,27 @@ "modular microservice system", "modular microservice system" ], + [ + "sentence", + "", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 9891169339298843383, + 12261378132459206353, + 18446744073709551615, + 18446744073709551615, + 1488, + 1624, + 1488, + 1624, + 235, + 259, + true, + "Finally, we validate the quality of queries on our endto-end knowledge pipeline in a real-world application in the oil and gas industry.", + "Finally, we validate the quality of queries on our endto-end knowledge pipeline in a real-world application in the oil and gas industry." + ], [ "term", "single-term", @@ -2108,19 +2988,40 @@ "TEXT", "#/texts/10", 1.0, - 4315218641775224883, - 11583210972753095337, + 8106477781724488761, + 13716403130135035691, 18446744073709551615, 18446744073709551615, - 1467, - 1486, - 1467, - 1486, - 232, - 234, + 1513, + 1520, + 1513, + 1520, + 240, + 241, true, - "Kubernetes clusters", - "Kubernetes clusters" + "quality", + "quality" + ], + [ + "term", + "single-term", + 11695737263227886476, + "TEXT", + "#/texts/10", + 1.0, + 8106477782290185579, + 13397737841409395272, + 18446744073709551615, + 18446744073709551615, + 1524, + 1531, + 1524, + 1531, + 242, + 243, + true, + "queries", + "queries" ], [ "term", @@ -2143,783 +3044,6 @@ "endto-end knowledge pipeline", "endto-end knowledge pipeline" ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 8973266897479869153, - 9626503990142682309, - 18446744073709551615, - 18446744073709551615, - 1573, - 1595, - 1573, - 1595, - 250, - 252, - true, - "real-world application", - "real-world application" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 17613546823892249124, - 1576417016664792020, - 18446744073709551615, - 18446744073709551615, - 1611, - 1623, - 1611, - 1623, - 256, - 258, - true, - "gas industry", - "gas industry" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 329104161610777240, - 6943276019110900001, - 18446744073709551615, - 18446744073709551615, - 69, - 74, - 69, - 74, - 12, - 13, - true, - "model", - "model" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 6184122545182835014, - 10337587533357109733, - 18446744073709551615, - 18446744073709551615, - 87, - 96, - 87, - 96, - 15, - 16, - true, - "knowledge", - "knowledge" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 389609625696431489, - 6015166006560019356, - 18446744073709551615, - 18446744073709551615, - 118, - 122, - 118, - 122, - 19, - 20, - true, - "data", - "data" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 6167933651658664291, - 7017623091478883550, - 18446744073709551615, - 18446744073709551615, - 141, - 150, - 141, - 150, - 24, - 25, - true, - "documents", - "documents" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 16381206579112188113, - 11795690975934241678, - 18446744073709551615, - 18446744073709551615, - 164, - 170, - 164, - 170, - 27, - 28, - true, - "source", - "source" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 389609625696431489, - 6015166006560022281, - 18446744073709551615, - 18446744073709551615, - 192, - 196, - 192, - 196, - 31, - 32, - true, - "data", - "data" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 8106464587474035829, - 599250081059610946, - 18446744073709551615, - 18446744073709551615, - 327, - 334, - 327, - 334, - 52, - 53, - true, - "manuals", - "manuals" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 8106479143938802112, - 3741013633356507891, - 18446744073709551615, - 18446744073709551615, - 336, - 343, - 336, - 343, - 54, - 55, - true, - "patents", - "patents" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 4973525406703593304, - 5700149770998543624, - 18446744073709551615, - 18446744073709551615, - 345, - 356, - 345, - 356, - 56, - 57, - true, - "regulations", - "regulations" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 329104161668023890, - 6940026313184513359, - 18446744073709551615, - 18446744073709551615, - 478, - 483, - 478, - 483, - 78, - 79, - true, - "paper", - "paper" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 12178341415896222428, - 9671093415367483529, - 18446744073709551615, - 18446744073709551615, - 602, - 605, - 602, - 605, - 100, - 101, - true, - "CPS", - "CPS" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 8106479265948440982, - 351105263671880898, - 18446744073709551615, - 18446744073709551615, - 612, - 619, - 612, - 619, - 104, - 105, - true, - "purpose", - "purpose" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 8106398484416916345, - 1095125247314724175, - 18446744073709551615, - 18446744073709551615, - 670, - 677, - 670, - 677, - 114, - 115, - true, - "content", - "content" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 16381206567230470443, - 11705694158167462403, - 18446744073709551615, - 18446744073709551615, - 869, - 875, - 869, - 875, - 144, - 145, - true, - "models", - "models" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 14652256560445338257, - 7220701613896570103, - 18446744073709551615, - 18446744073709551615, - 887, - 895, - 887, - 895, - 147, - 148, - true, - "entities", - "entities" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 8279380567349713241, - 5428767239015427768, - 18446744073709551615, - 18446744073709551615, - 900, - 913, - 900, - 913, - 149, - 150, - true, - "relationships", - "relationships" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 6167933651658664291, - 7017623091478798627, - 18446744073709551615, - 18446744073709551615, - 919, - 928, - 919, - 928, - 151, - 152, - true, - "documents", - "documents" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 14814125852840540191, - 6714967147835883438, - 18446744073709551615, - 18446744073709551615, - 1010, - 1018, - 1010, - 1018, - 163, - 164, - true, - "pipeline", - "pipeline" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 2703018952916355661, - 7441154421291581585, - 18446744073709551615, - 18446744073709551615, - 1177, - 1187, - 1177, - 1187, - 186, - 187, - true, - "components", - "components" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 8106477782290185579, - 13397737841409408978, - 18446744073709551615, - 18446744073709551615, - 1347, - 1354, - 1347, - 1354, - 213, - 214, - true, - "queries", - "queries" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 8106477781724488761, - 13716403130135035691, - 18446744073709551615, - 18446744073709551615, - 1513, - 1520, - 1513, - 1520, - 240, - 241, - true, - "quality", - "quality" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 8106477782290185579, - 13397737841409395272, - 18446744073709551615, - 18446744073709551615, - 1524, - 1531, - 1524, - 1531, - 242, - 243, - true, - "queries", - "queries" - ], - [ - "term", - "single-term", - 11695737263227886476, - "TEXT", - "#/texts/10", - 1.0, - 12178341415895623363, - 9671102709835951159, - 18446744073709551615, - 18446744073709551615, - 1603, - 1606, - 1603, - 1606, - 254, - 255, - true, - "oil", - "oil" - ], - [ - "numval", - "ival", - 11913688961435238004, - "TEXT", - "#/texts/13", - 1.0, - 17767354399704235161, - 9682837417262995739, - 18446744073709551615, - 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, - true, - "1", - "1" - ], - [ - "numval", - "year", - 9977041563469582014, - "TEXT", - "#/texts/14", - 1.0, - 389609625548777059, - 17632943630740203190, - 18446744073709551615, - 18446744073709551615, - 6, - 10, - 6, - 10, - 2, - 3, - true, - "2015", - "2015" - ], - [ - "numval", - "fval", - 9977041563469582014, - "TEXT", - "#/texts/14", - 1.0, - 12178341415896439105, - 13434398423091096866, - 18446744073709551615, - 18446744073709551615, - 44, - 47, - 44, - 47, - 9, - 10, - true, - "2.7", - "2.7" - ], - [ - "expression", - "common", - 9977041563469582014, - "TEXT", - "#/texts/14", - 1.0, - 12178341415895450733, - 13434388706261344427, - 18446744073709551615, - 18446744073709551615, - 579, - 583, - 579, - 583, - 97, - 98, - true, - "etc", - "etc." - ], - [ - "expression", - "word-concatenation", - 9977041563469582014, - "TEXT", - "#/texts/14", - 1.0, - 8803983102511961753, - 11026648589532064531, - 18446744073709551615, - 18446744073709551615, - 102, - 114, - 102, - 114, - 19, - 20, - true, - "self-evident", - "self-evident" - ], - [ - "expression", - "word-concatenation", - 9977041563469582014, - "TEXT", - "#/texts/14", - 1.0, - 8043212133150675222, - 7506328330981893578, - 18446744073709551615, - 18446744073709551615, - 475, - 487, - 475, - 487, - 80, - 81, - true, - "ever-growing", - "ever-growing" - ], - [ - "sentence", - "", - 9977041563469582014, - "TEXT", - "#/texts/14", - 1.0, - 9580276197039337323, - 13841174173201944352, - 18446744073709551615, - 18446744073709551615, - 0, - 95, - 0, - 95, - 0, - 17, - true, - "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally.", - "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally." - ], - [ - "sentence", - "", - 9977041563469582014, - "TEXT", - "#/texts/14", - 1.0, - 9079004519467152167, - 6860715527459606106, - 18446744073709551615, - 18446744073709551615, - 96, - 157, - 96, - 157, - 17, - 28, - true, - "It is self-evident that this number has increased ever since.", - "It is self-evident that this number has increased ever since." - ], - [ - "sentence", - "", - 9977041563469582014, - "TEXT", - "#/texts/14", - 1.0, - 12959192130376635610, - 18180244594714576233, - 18446744073709551615, - 18446744073709551615, - 158, - 322, - 158, - 322, - 28, - 54, - true, - "The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world.", - "The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world." - ], - [ - "sentence", - "", - 9977041563469582014, - "TEXT", - "#/texts/14", - 1.0, - 9260326806510524947, - 14982882722757884571, - 18446744073709551615, - 18446744073709551615, - 323, - 459, - 323, - 459, - 54, - 77, - true, - "The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings.", - "The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings." - ], - [ - "sentence", - "", - 9977041563469582014, - "TEXT", - "#/texts/14", - 1.0, - 5075859589505957998, - 18360416951435709110, - 18446744073709551615, - 18446744073709551615, - 460, - 639, - 460, - 639, - 77, - 107, - true, - "Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable.", - "Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable." - ], - [ - "term", - "enum-term-mark-1", - 9977041563469582014, - "TEXT", - "#/texts/14", - 1.0, - 2327733945986976512, - 16359156217665106996, - 18446744073709551615, - 18446744073709551615, - 293, - 321, - 293, - 321, - 49, - 53, - true, - "academic and corporate world", - "academic and corporate world" - ], [ "term", "single-term", @@ -2942,214 +3066,214 @@ "trillion PDF documents" ], [ - "term", - "single-term", - 9977041563469582014, + "sentence", + "", + 5524728206729419689, "TEXT", - "#/texts/14", + "#/texts/98", 1.0, - 11551851235882828048, - 8670374056430505501, + 13639548757740861010, + 11696805249441926913, 18446744073709551615, 18446744073709551615, - 162, - 178, - 162, - 178, - 29, - 31, + 0, + 69, + 0, + 69, + 0, + 13, true, - "explosive growth", - "explosive growth" + "This task finds a set of nodes which satisfy certain search criteria.", + "This task finds a set of nodes which satisfy certain search criteria." ], [ "term", "single-term", - 9977041563469582014, + 5524728206729419689, "TEXT", - "#/texts/14", + "#/texts/98", 1.0, - 5652441786009596562, - 559076346625196990, + 389609625631210899, + 1695322703373668221, 18446744073709551615, 18446744073709551615, - 214, - 232, - 214, - 232, - 37, - 39, + 5, + 9, + 5, + 9, + 1, + 2, true, - "digital publishing", - "digital publishing" + "task", + "task" ], [ "term", "single-term", - 9977041563469582014, + 5524728206729419689, "TEXT", - "#/texts/14", + "#/texts/98", 1.0, - 11978931670712051192, - 9926895489093501949, + 12178341415895638602, + 16401925845918103767, 18446744073709551615, 18446744073709551615, - 263, - 280, - 263, - 280, - 44, - 46, + 18, + 21, + 18, + 21, + 4, + 5, true, - "serious challenge", - "serious challenge" + "set", + "set" ], [ "term", "single-term", - 9977041563469582014, + 5524728206729419689, "TEXT", - "#/texts/14", + "#/texts/98", 1.0, - 7780875503607700578, - 7527213517068304878, + 329104161758737773, + 9063467011231067037, 18446744073709551615, 18446744073709551615, - 306, - 321, - 306, - 321, - 51, - 53, + 25, + 30, + 25, + 30, + 6, + 7, true, - "corporate world", - "corporate world" + "nodes", + "nodes" ], [ "term", "single-term", - 9977041563469582014, + 5524728206729419689, "TEXT", - "#/texts/14", + "#/texts/98", 1.0, - 3488136445312217472, - 563560862623828716, + 1139782918783911343, + 10980002430644435601, 18446744073709551615, 18446744073709551615, - 337, - 353, - 337, - 353, - 56, - 58, + 45, + 68, + 45, + 68, + 9, + 12, true, - "publication rate", - "publication rate" + "certain search criteria", + "certain search criteria" ], [ - "term", - "single-term", - 9977041563469582014, + "sentence", + "", + 5524728206729419689, "TEXT", - "#/texts/14", + "#/texts/98", 1.0, - 7863808487922385366, - 10797157915381492366, + 9504985242355517435, + 18023630049865929203, 18446744073709551615, 18446744073709551615, - 357, - 376, - 357, - 376, - 59, - 61, + 70, + 216, + 70, + 216, + 13, + 41, true, - "scientific articles", - "scientific articles" + "This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property.", + "This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property." ], [ "term", "single-term", - 9977041563469582014, + 5524728206729419689, "TEXT", - "#/texts/14", + "#/texts/98", 1.0, - 16667234436856023081, - 17857792665552379798, + 1353284443403185756, + 13247714493573934499, 18446744073709551615, 18446744073709551615, - 443, - 458, - 443, - 458, - 74, - 76, + 100, + 111, + 100, + 111, + 19, + 21, true, - "latest findings", - "latest findings" + "single node", + "single node" ], [ - "term", - "single-term", - 9977041563469582014, + "parenthesis", + "round brackets", + 5524728206729419689, "TEXT", - "#/texts/14", + "#/texts/98", 1.0, - 5751151653465478259, - 10695870790845961642, + 6343195480109663451, + 11165462414382695465, 18446744073709551615, 18446744073709551615, - 475, - 494, - 475, - 494, - 80, - 82, + 119, + 132, + 119, + 132, + 23, + 26, true, - "ever-growing number", - "ever-growing number" + "(approximate)", + "(approximate)" ], [ "term", "single-term", - 9977041563469582014, + 5524728206729419689, "TEXT", - "#/texts/14", + "#/texts/98", 1.0, - 18216685920424760230, - 8188107583662209298, + 389609625621548280, + 1694766356608744958, 18446744073709551615, 18446744073709551615, - 498, - 514, - 498, - 514, - 83, - 85, + 133, + 137, + 133, + 137, + 26, + 27, true, - "internal reports", - "internal reports" + "name", + "name" ], [ "term", "single-term", - 9977041563469582014, + 5524728206729419689, "TEXT", - "#/texts/14", + "#/texts/98", 1.0, - 10815771517668250054, - 9700260059013966190, + 6764280510749928008, + 2538978002994667418, 18446744073709551615, 18446744073709551615, - 564, - 577, - 564, - 577, - 94, - 96, + 141, + 162, + 141, + 162, + 28, + 31, true, - "court filings", - "court filings" + "exact node identifier", + "exact node identifier" ], [ "term", @@ -3173,130 +3297,130 @@ "most corporations" ], [ - "term", - "single-term", - 9977041563469582014, + "expression", + "wtoken-concatenation", + 4043385013945968936, "TEXT", - "#/texts/14", + "#/texts/99", 1.0, - 329104162020590744, - 12542051113387534152, + 5948620232447446819, + 3619933651552123134, 18446744073709551615, 18446744073709551615, - 12, - 17, - 12, - 17, - 4, - 5, + 2, + 15, + 2, + 15, + 1, + 2, true, - "Adobe", - "Adobe" + "^{!}_{i}=", + "$^{!}$$_{i}$=" ], [ - "term", - "single-term", - 9977041563469582014, + "numval", + "ival", + 4043385013945968936, "TEXT", - "#/texts/14", + "#/texts/99", 1.0, - 15526146950464474214, - 16227659806299083154, + 17767354399704235161, + 3863023118325513235, 18446744073709551615, 18446744073709551615, - 74, - 85, - 74, - 85, - 14, - 15, + 16, + 17, + 16, + 17, + 2, + 3, true, - "circulation", - "circulation" + "1", + "1" ], [ - "term", - "single-term", - 9977041563469582014, + "numval", + "ival", + 4043385013945968936, "TEXT", - "#/texts/14", + "#/texts/99", 1.0, - 16381206574973295053, - 11707971985141737188, + 17767354399704235160, + 3863023118293440507, 18446744073709551615, 18446744073709551615, - 125, - 131, - 125, - 131, - 22, - 23, + 33, + 34, + 33, + 34, + 8, + 9, true, - "number", - "number" + "0", + "0" ], [ - "term", - "single-term", - 9977041563469582014, + "numval", + "ival", + 4043385013945968936, "TEXT", - "#/texts/14", + "#/texts/99", 1.0, - 6167933651658664291, - 4272164818252510490, + 17767354399704235162, + 3863023118274566919, 18446744073709551615, 18446744073709551615, - 182, - 191, - 182, - 191, - 32, - 33, + 47, + 48, + 47, + 48, + 13, + 14, true, - "documents", - "documents" + "2", + "2" ], [ - "term", - "single-term", - 9977041563469582014, + "expression", + "wtoken-concatenation", + 4043385013945968936, "TEXT", - "#/texts/14", + "#/texts/99", 1.0, - 8380286976750653797, - 4381452488979706873, + 7116489890516680880, + 11145030960935339860, 18446744073709551615, 18446744073709551615, - 240, - 250, - 240, - 250, - 40, - 41, + 53, + 63, + 53, + 63, + 16, + 17, true, - "mainstream", - "mainstream" + "GLYPH", + "GLYPH" ], [ - "term", - "single-term", - 9977041563469582014, + "numval", + "ival", + 4043385013945968936, "TEXT", - "#/texts/14", + "#/texts/99", 1.0, - 5946940610854338392, - 9806611181783168768, + 15441160910541481788, + 1525860005576289474, 18446744073709551615, 18446744073709551615, - 408, - 417, - 408, - 417, - 67, - 68, - true, - "academics", - "academics" + 60, + 62, + 60, + 62, + 16, + 16, + false, + "26", + "26" ], [ "term", @@ -3341,46 +3465,46 @@ "patents" ], [ - "term", - "single-term", - 9977041563469582014, + "parenthesis", + "round brackets", + 14119822239274862236, "TEXT", - "#/texts/14", + "#/figures/7/captions/0", 1.0, - 5947882010261766213, - 14814557788069949454, + 9594608305374447191, + 549027973638755167, 18446744073709551615, 18446744073709551615, - 540, - 549, - 540, - 549, - 90, - 91, + 501, + 514, + 501, + 514, + 108, + 112, true, - "contracts", - "contracts" + "(worktask 17)", + "(worktask 17)" ], [ - "term", - "single-term", - 9977041563469582014, + "parenthesis", + "round brackets", + 14119822239274862236, "TEXT", - "#/texts/14", + "#/figures/7/captions/0", 1.0, - 4973525406703593304, - 17896266079288595392, + 9594608305374447126, + 549027974665356501, 18446744073709551615, 18446744073709551615, - 551, - 562, - 551, - 562, - 92, - 93, + 477, + 490, + 477, + 490, + 102, + 106, true, - "regulations", - "regulations" + "(worktask 16)", + "(worktask 16)" ], [ "numval", @@ -35618,151 +35742,151 @@ "Figure" ], [ - "expression", - "word-concatenation", - 16949854269270315165, + "sentence", + "", + 4017434568255781081, "TEXT", - "#/texts/94", + "#/texts/8", 1.0, - 15221896740599576202, - 7666904121768591309, + 1463783400548512489, + 4562795260271874000, 18446744073709551615, 18446744073709551615, - 59, - 73, - 59, - 73, - 10, - 11, + 0, + 95, + 0, + 95, + 0, + 19, true, - "node-retrieval", - "node-retrieval" + "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland.", + "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland." ], [ - "sentence", - "", - 16949854269270315165, + "name", + "person-name", + 4017434568255781081, "TEXT", - "#/texts/94", + "#/texts/8", 1.0, - 1859492819924485121, - 10838117205519727135, + 9807900919297989315, + 8857913618678092312, 18446744073709551615, 18446744073709551615, 0, - 128, + 32, 0, - 128, + 32, 0, - 20, + 7, true, - "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions.", - "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions." + "Correspondence Peter W J Staar", + "Correspondence Peter W. J. Staar" ], [ - "sentence", - "", - 16949854269270315165, + "link", + "email", + 4017434568255781081, "TEXT", - "#/texts/94", + "#/texts/8", 1.0, - 4963035477772371835, - 4020325737246968829, + 11117094662504195367, + 8034517963455466339, 18446744073709551615, 18446744073709551615, - 129, - 262, - 129, - 262, - 20, - 44, + 103, + 121, + 103, + 121, + 21, + 26, true, - "In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design.", - "In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design." + "taa@zurich.ibm.com", + "taa@zurich.ibm.com" ], [ - "term", - "single-term", - 16949854269270315165, + "numval", + "ival", + 4017434568255781081, "TEXT", - "#/texts/94", + "#/texts/8", 1.0, - 17889054130498802051, - 13611413549729115921, + 389609625533568071, + 13739132446336686651, 18446744073709551615, 18446744073709551615, - 27, - 44, - 27, - 44, - 5, - 7, + 65, + 69, + 65, + 69, + 14, + 15, true, - "fundamental types", - "fundamental types" + "8820", + "8820" ], [ - "term", - "single-term", - 16949854269270315165, + "numval", + "ival", + 4017434568255781081, "TEXT", - "#/texts/94", + "#/texts/8", 1.0, - 16654294478124171317, - 10151652501900860692, + 17767354399704235156, + 7351830786655903422, 18446744073709551615, 18446744073709551615, - 86, - 103, - 86, - 103, - 14, - 16, + 62, + 63, + 62, + 63, + 12, + 13, true, - "logical operators", - "logical operators" + "4", + "4" ], [ - "term", - "single-term", - 16949854269270315165, + "reference", + "author", + 13336841394978214677, "TEXT", - "#/texts/94", + "#/texts/6", 1.0, - 11555096374369856312, - 7157942907653228754, + 9737597816447750448, + 8444331438348317113, 18446744073709551615, 18446744073709551615, - 108, - 127, - 108, - 127, - 17, - 19, + 0, + 14, + 0, + 14, + 0, + 2, true, - "transform functions", - "transform functions" + "Christoph Auer", + "Christoph Auer" ], [ "term", "single-term", - 16949854269270315165, + 4017434568255781081, "TEXT", - "#/texts/94", + "#/texts/8", 1.0, - 17030057430150962643, - 11687865223449973507, + 497725968887992147, + 15543972956793692858, 18446744073709551615, 18446744073709551615, - 136, - 154, - 136, - 154, - 22, - 24, + 48, + 61, + 48, + 61, + 11, + 12, true, - "following sections", - "following sections" + "Saumerstrasse", + "Saumerstrasse" ], [ "term", @@ -35786,88 +35910,88 @@ "adjacency matrix design" ], [ - "term", - "single-term", - 16949854269270315165, + "name", + "person-name", + 16781763356419781679, "TEXT", - "#/texts/94", + "#/texts/2", 1.0, - 3534171294115941544, - 8731026536612016164, + 4686361850733567621, + 14538190648130419824, 18446744073709551615, 18446744073709551615, - 48, - 57, - 48, - 57, - 8, - 9, + 0, + 17, + 0, + 17, + 0, + 6, true, - "worktasks", - "worktasks" + "Peter W J Staar", + "Peter W. J. Staar" ], [ - "term", - "single-term", - 16949854269270315165, + "reference", + "author", + 2144509362215609527, "TEXT", - "#/texts/94", + "#/texts/0", 1.0, - 3503811091434006699, - 4368860458480451668, + 16381206540184854990, + 14425920664139507693, 18446744073709551615, 18446744073709551615, - 75, - 84, - 75, - 84, - 12, - 13, + 0, + 6, + 0, + 6, + 0, + 1, true, - "traversal", - "traversal" + "LETTER", + "LETTER" ], [ "term", "single-term", - 16949854269270315165, + 4017434568255781081, "TEXT", - "#/texts/94", + "#/texts/8", 1.0, - 16381206568246674273, - 3558057784302965696, + 2664439525053388608, + 478252263928496257, 18446744073709551615, 18446744073709551615, - 175, - 181, - 175, - 181, - 29, - 30, + 83, + 94, + 83, + 94, + 17, + 18, true, - "detail", - "detail" + "Switzerland", + "Switzerland" ], [ "term", - "single-term", - 16949854269270315165, + "enum-term-mark-1", + 9977041563469582014, "TEXT", - "#/texts/94", + "#/texts/14", 1.0, - 3534171294115941544, - 8731026536612028033, + 2327733945986976512, + 16359156217665106996, 18446744073709551615, 18446744073709551615, - 190, - 199, - 190, - 199, - 32, - 33, + 293, + 321, + 293, + 321, + 49, + 53, true, - "worktasks", - "worktasks" + "academic and corporate world", + "academic and corporate world" ], [ "term", @@ -35891,25 +36015,25 @@ "context" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "enum-term-mark-2", + 11695737263227886476, "TEXT", - "#/texts/95", + "#/texts/10", 1.0, - 389609625548777262, - 8826555294676663632, + 848781837929279741, + 6552561416683889377, 18446744073709551615, 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 1603, + 1623, + 1603, + 1623, + 254, + 258, true, - "2020", - "2020" + "oil and gas industry", + "oil and gas industry" ], [ "numval", @@ -35933,151 +36057,151 @@ "2023" ], [ - "numval", - "fval", - 18391264192891079539, + "sentence", + "", + 9977041563469582014, "TEXT", - "#/texts/95", + "#/texts/14", 1.0, - 8104408072666212335, - 13552219042525319352, + 5075859589505957998, + 18360416951435709110, 18446744073709551615, 18446744073709551615, - 71, - 78, - 71, - 78, - 8, - 8, - false, - "10.1002", - "10.1002" + 460, + 639, + 460, + 639, + 77, + 107, + true, + "Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable.", + "Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable." ], [ - "numval", - "fval", - 18391264192891079539, + "sentence", + "", + 9977041563469582014, "TEXT", - "#/texts/95", + "#/texts/14", 1.0, - 389609625548868096, - 8826558551385119058, + 9260326806510524947, + 14982882722757884571, 18446744073709551615, 18446744073709551615, - 82, - 86, - 82, - 86, - 8, - 9, - false, - "2.20", - "2.20" + 323, + 459, + 323, + 459, + 54, + 77, + true, + "The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings.", + "The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings." ], [ - "numval", - "ival", - 18391264192891079539, + "sentence", + "", + 9977041563469582014, "TEXT", - "#/texts/95", + "#/texts/14", 1.0, - 14654386914267794441, - 12796143052106760105, + 12959192130376635610, + 18180244594714576233, 18446744073709551615, 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 158, + 322, + 158, + 322, + 28, + 54, true, - "26895595", - "26895595" + "The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world.", + "The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world." ], [ - "numval", - "ival", - 18391264192891079539, + "sentence", + "", + 9977041563469582014, "TEXT", - "#/texts/95", + "#/texts/14", 1.0, - 17767354399704235162, - 7753390158484899261, + 9079004519467152167, + 6860715527459606106, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, + 96, + 157, + 96, + 157, 17, - 4, - 5, + 28, true, - "2", - "2" + "It is self-evident that this number has increased ever since.", + "It is self-evident that this number has increased ever since." ], [ - "numval", - "ival", - 18391264192891079539, + "sentence", + "", + 9977041563469582014, "TEXT", - "#/texts/95", + "#/texts/14", 1.0, - 15441160910541481791, - 3518619573290839093, + 9580276197039337323, + 13841174173201944352, 18446744073709551615, 18446744073709551615, - 113, - 115, - 113, - 115, - 14, - 14, - false, - "23", - "23" + 0, + 95, + 0, + 95, + 0, + 17, + true, + "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally.", + "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally." ], [ - "numval", - "ival", - 18391264192891079539, + "expression", + "word-concatenation", + 9977041563469582014, "TEXT", - "#/texts/95", + "#/texts/14", 1.0, - 15441160910541481543, - 3518617976696906498, - 18446744073709551615, - 18446744073709551615, - 116, - 118, - 116, - 118, - 14, - 14, - false, - "08", - "08" + 8043212133150675222, + 7506328330981893578, + 18446744073709551615, + 18446744073709551615, + 475, + 487, + 475, + 487, + 80, + 81, + true, + "ever-growing", + "ever-growing" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/95", + "#/texts/10", 1.0, - 8536069645534292969, - 16063604623463467342, + 17613546823892249124, + 1576417016664792020, 18446744073709551615, 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 10, + 1611, + 1623, + 1611, + 1623, + 256, + 258, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "gas industry", + "gas industry" ], [ "link", @@ -36101,25 +36225,25 @@ "https://onlinelibrary.wiley.com/terms-and-conditions" ], [ - "link", - "doi", - 18391264192891079539, + "expression", + "word-concatenation", + 9977041563469582014, "TEXT", - "#/texts/95", + "#/texts/14", 1.0, - 1697220653346092555, - 8458710314769009562, + 8803983102511961753, + 11026648589532064531, 18446744073709551615, 18446744073709551615, - 67, - 87, - 67, - 87, - 8, - 10, - false, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + 102, + 114, + 102, + 114, + 19, + 20, + true, + "self-evident", + "self-evident" ], [ "parenthesis", @@ -36144,108 +36268,108 @@ ], [ "parenthesis", - "square brackets", - 18391264192891079539, + "round brackets", + 14119822239274862236, "TEXT", - "#/texts/95", + "#/figures/7/captions/0", 1.0, - 15691754593896323724, - 15433429984583237828, + 9594608305374444490, + 549028029822782819, 18446744073709551615, 18446744073709551615, - 112, - 124, - 112, - 124, - 14, - 15, + 457, + 470, + 457, + 470, + 96, + 100, true, - "[23/08/2023]", - "[23/08/2023]" + "(worktask 15)", + "(worktask 15)" ], [ - "expression", - "wtoken-concatenation", - 18391264192891079539, + "numval", + "fval", + 9977041563469582014, "TEXT", - "#/texts/95", + "#/texts/14", 1.0, - 3856967589249015473, - 3576147774941915841, + 12178341415896439105, + 13434398423091096866, 18446744073709551615, 18446744073709551615, - 35, - 86, - 35, - 86, - 8, + 44, + 47, + 44, + 47, 9, + 10, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + "2.7", + "2.7" ], [ - "expression", - "wtoken-concatenation", - 18391264192891079539, + "numval", + "year", + 9977041563469582014, "TEXT", - "#/texts/95", + "#/texts/14", 1.0, - 15691754593896323724, - 15433429984583237828, + 389609625548777059, + 17632943630740203190, 18446744073709551615, 18446744073709551615, - 112, - 124, - 112, - 124, - 14, - 15, + 6, + 10, + 6, + 10, + 2, + 3, true, - "[23/08/2023]", - "[23/08/2023]" + "2015", + "2015" ], [ - "sentence", - "", - 18391264192891079539, + "numval", + "ival", + 11913688961435238004, "TEXT", - "#/texts/95", + "#/texts/13", 1.0, - 10933383461306782608, - 10178418358179275356, + 17767354399704235161, + 9682837417262995739, 18446744073709551615, 18446744073709551615, - 19, - 125, - 19, - 125, - 6, - 16, + 0, + 1, + 0, + 1, + 0, + 1, true, - "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", - "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + "1", + "1" ], [ - "term", - "single-term", - 18391264192891079539, + "expression", + "word-concatenation", + 11695737263227886476, "TEXT", - "#/texts/95", + "#/texts/10", 1.0, - 12466457873768409517, - 3430070082404029638, + 15984801488078789848, + 11443881616252239060, 18446744073709551615, 18446744073709551615, - 88, - 108, - 88, - 108, - 10, - 13, + 1573, + 1583, + 1573, + 1583, + 250, + 251, true, - "Wiley Online Library", - "Wiley Online Library" + "real-world", + "real-world" ], [ "term", @@ -36437,25 +36561,25 @@ "15" ], [ - "numval", - "fval", - 9802652237802670052, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/97", + "#/texts/10", 1.0, - 12178341415896435196, - 198388536621247129, + 12178341415895623363, + 9671102709835951159, 18446744073709551615, 18446744073709551615, - 0, - 3, - 0, - 3, - 0, - 0, - false, - "3.3", - "3.3" + 1603, + 1606, + 1603, + 1606, + 254, + 255, + true, + "oil", + "oil" ], [ "numval", @@ -36479,46 +36603,46 @@ "1" ], [ - "expression", - "wtoken-concatenation", - 9802652237802670052, + "term", + "single-term", + 11695737263227886476, "TEXT", - "#/texts/97", + "#/texts/10", 1.0, - 329104147725285867, - 13023020285713349824, + 8973266897479869153, + 9626503990142682309, 18446744073709551615, 18446744073709551615, - 0, - 5, - 0, - 5, - 0, - 1, + 1573, + 1595, + 1573, + 1595, + 250, + 252, true, - "3.3.1", - "3.3.1" + "real-world application", + "real-world application" ], [ "parenthesis", "round brackets", - 5524728206729419689, + 14119822239274862236, "TEXT", - "#/texts/98", + "#/figures/7/captions/0", 1.0, - 6343195480109663451, - 11165462414382695465, + 15753836491225885957, + 4303796380513418775, 18446744073709551615, 18446744073709551615, - 119, - 132, - 119, - 132, - 23, - 26, + 377, + 402, + 377, + 402, + 75, + 85, true, - "(approximate)", - "(approximate)" + "(worktasks 6, 10, 13, 14)", + "(worktasks 6, 10, 13, 14)" ], [ "expression", @@ -36542,109 +36666,109 @@ "$^{!}$" ], [ - "sentence", - "", - 5524728206729419689, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/98", + "#/texts/14", 1.0, - 13639548757740861010, - 11696805249441926913, + 18216685920424760230, + 8188107583662209298, 18446744073709551615, 18446744073709551615, - 0, - 69, - 0, - 69, - 0, - 13, + 498, + 514, + 498, + 514, + 83, + 85, true, - "This task finds a set of nodes which satisfy certain search criteria.", - "This task finds a set of nodes which satisfy certain search criteria." + "internal reports", + "internal reports" ], [ - "sentence", - "", - 5524728206729419689, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/98", + "#/texts/14", 1.0, - 9504985242355517435, - 18023630049865929203, + 5751151653465478259, + 10695870790845961642, 18446744073709551615, 18446744073709551615, - 70, - 216, - 70, - 216, - 13, - 41, + 475, + 494, + 475, + 494, + 80, + 82, true, - "This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property.", - "This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property." + "ever-growing number", + "ever-growing number" ], [ "term", "single-term", - 5524728206729419689, + 9977041563469582014, "TEXT", - "#/texts/98", + "#/texts/14", 1.0, - 1139782918783911343, - 10980002430644435601, + 16667234436856023081, + 17857792665552379798, 18446744073709551615, 18446744073709551615, - 45, - 68, - 45, - 68, - 9, - 12, + 443, + 458, + 443, + 458, + 74, + 76, true, - "certain search criteria", - "certain search criteria" + "latest findings", + "latest findings" ], [ "term", "single-term", - 5524728206729419689, + 9977041563469582014, "TEXT", - "#/texts/98", + "#/texts/14", 1.0, - 1353284443403185756, - 13247714493573934499, + 7863808487922385366, + 10797157915381492366, 18446744073709551615, 18446744073709551615, - 100, - 111, - 100, - 111, - 19, - 21, + 357, + 376, + 357, + 376, + 59, + 61, true, - "single node", - "single node" + "scientific articles", + "scientific articles" ], [ "term", "single-term", - 5524728206729419689, + 9977041563469582014, "TEXT", - "#/texts/98", + "#/texts/14", 1.0, - 6764280510749928008, - 2538978002994667418, + 3488136445312217472, + 563560862623828716, 18446744073709551615, 18446744073709551615, - 141, - 162, - 141, - 162, - 28, - 31, + 337, + 353, + 337, + 353, + 56, + 58, true, - "exact node identifier", - "exact node identifier" + "publication rate", + "publication rate" ], [ "term", @@ -36691,86 +36815,86 @@ [ "term", "single-term", - 5524728206729419689, + 9977041563469582014, "TEXT", - "#/texts/98", + "#/texts/14", 1.0, - 389609625631210899, - 1695322703373668221, + 7780875503607700578, + 7527213517068304878, 18446744073709551615, 18446744073709551615, - 5, - 9, - 5, - 9, - 1, - 2, + 306, + 321, + 306, + 321, + 51, + 53, true, - "task", - "task" + "corporate world", + "corporate world" ], [ "term", "single-term", - 5524728206729419689, + 9977041563469582014, "TEXT", - "#/texts/98", + "#/texts/14", 1.0, - 12178341415895638602, - 16401925845918103767, + 11978931670712051192, + 9926895489093501949, 18446744073709551615, 18446744073709551615, - 18, - 21, - 18, - 21, - 4, - 5, + 263, + 280, + 263, + 280, + 44, + 46, true, - "set", - "set" + "serious challenge", + "serious challenge" ], [ "term", "single-term", - 5524728206729419689, + 9977041563469582014, "TEXT", - "#/texts/98", + "#/texts/14", 1.0, - 329104161758737773, - 9063467011231067037, + 5652441786009596562, + 559076346625196990, 18446744073709551615, 18446744073709551615, - 25, - 30, - 25, - 30, - 6, - 7, + 214, + 232, + 214, + 232, + 37, + 39, true, - "nodes", - "nodes" + "digital publishing", + "digital publishing" ], [ "term", "single-term", - 5524728206729419689, + 9977041563469582014, "TEXT", - "#/texts/98", + "#/texts/14", 1.0, - 389609625621548280, - 1694766356608744958, + 11551851235882828048, + 8670374056430505501, 18446744073709551615, 18446744073709551615, - 133, - 137, - 133, - 137, - 26, - 27, + 162, + 178, + 162, + 178, + 29, + 31, true, - "name", - "name" + "explosive growth", + "explosive growth" ], [ "term", @@ -36815,88 +36939,88 @@ "task" ], [ - "numval", - "ival", - 4043385013945968936, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/99", + "#/texts/14", 1.0, - 17767354399704235161, - 3863023118325513235, + 5946940610854338392, + 9806611181783168768, 18446744073709551615, 18446744073709551615, - 16, - 17, - 16, - 17, - 2, - 3, + 408, + 417, + 408, + 417, + 67, + 68, true, - "1", - "1" + "academics", + "academics" ], [ - "numval", - "ival", - 4043385013945968936, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/99", + "#/texts/14", 1.0, - 17767354399704235160, - 3863023118293440507, + 8380286976750653797, + 4381452488979706873, 18446744073709551615, 18446744073709551615, - 33, - 34, - 33, - 34, - 8, - 9, + 240, + 250, + 240, + 250, + 40, + 41, true, - "0", - "0" + "mainstream", + "mainstream" ], [ - "numval", - "ival", - 4043385013945968936, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/99", + "#/texts/14", 1.0, - 17767354399704235162, - 3863023118274566919, + 6167933651658664291, + 4272164818252510490, 18446744073709551615, 18446744073709551615, - 47, - 48, - 47, - 48, - 13, - 14, + 182, + 191, + 182, + 191, + 32, + 33, true, - "2", - "2" + "documents", + "documents" ], [ - "numval", - "ival", - 4043385013945968936, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/99", + "#/texts/14", 1.0, - 15441160910541481788, - 1525860005576289474, + 16381206574973295053, + 11707971985141737188, 18446744073709551615, 18446744073709551615, - 60, - 62, - 60, - 62, - 16, - 16, - false, - "26", - "26" + 125, + 131, + 125, + 131, + 22, + 23, + true, + "number", + "number" ], [ "numval", @@ -36920,46 +37044,46 @@ "3" ], [ - "expression", - "wtoken-concatenation", - 4043385013945968936, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/99", + "#/texts/14", 1.0, - 5948620232447446819, - 3619933651552123134, + 15526146950464474214, + 16227659806299083154, 18446744073709551615, 18446744073709551615, - 2, - 15, - 2, + 74, + 85, + 74, + 85, + 14, 15, - 1, - 2, true, - "^{!}_{i}=", - "$^{!}$$_{i}$=" + "circulation", + "circulation" ], [ - "expression", - "wtoken-concatenation", - 4043385013945968936, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/texts/99", + "#/texts/14", 1.0, - 7116489890516680880, - 11145030960935339860, + 329104162020590744, + 12542051113387534152, 18446744073709551615, 18446744073709551615, - 53, - 63, - 53, - 63, - 16, + 12, + 17, + 12, 17, + 4, + 5, true, - "GLYPH", - "GLYPH" + "Adobe", + "Adobe" ], [ "sentence", @@ -70541,88 +70665,88 @@ "(worktasks 3-5, 7-9, 11, 12)" ], [ - "parenthesis", - "round brackets", - 14119822239274862236, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/figures/7/captions/0", + "#/texts/14", 1.0, - 15753836491225885957, - 4303796380513418775, + 5947882010261766213, + 14814557788069949454, 18446744073709551615, 18446744073709551615, - 377, - 402, - 377, - 402, - 75, - 85, + 540, + 549, + 540, + 549, + 90, + 91, true, - "(worktasks 6, 10, 13, 14)", - "(worktasks 6, 10, 13, 14)" + "contracts", + "contracts" ], [ - "parenthesis", - "round brackets", - 14119822239274862236, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/figures/7/captions/0", + "#/texts/14", 1.0, - 9594608305374444490, - 549028029822782819, + 4973525406703593304, + 17896266079288595392, 18446744073709551615, 18446744073709551615, - 457, - 470, - 457, - 470, - 96, - 100, + 551, + 562, + 551, + 562, + 92, + 93, true, - "(worktask 15)", - "(worktask 15)" + "regulations", + "regulations" ], [ - "parenthesis", - "round brackets", - 14119822239274862236, + "term", + "single-term", + 9977041563469582014, "TEXT", - "#/figures/7/captions/0", + "#/texts/14", 1.0, - 9594608305374447126, - 549027974665356501, + 10815771517668250054, + 9700260059013966190, 18446744073709551615, 18446744073709551615, - 477, - 490, - 477, - 490, - 102, - 106, + 564, + 577, + 564, + 577, + 94, + 96, true, - "(worktask 16)", - "(worktask 16)" + "court filings", + "court filings" ], [ - "parenthesis", - "round brackets", - 14119822239274862236, + "expression", + "common", + 9977041563469582014, "TEXT", - "#/figures/7/captions/0", + "#/texts/14", 1.0, - 9594608305374447191, - 549027973638755167, + 12178341415895450733, + 13434388706261344427, 18446744073709551615, 18446744073709551615, - 501, - 514, - 501, - 514, - 108, - 112, + 579, + 583, + 579, + 583, + 97, + 98, true, - "(worktask 17)", - "(worktask 17)" + "etc", + "etc." ] ], "headers": [ @@ -70743,88 +70867,88 @@ "other": [], "page-dimensions": [ { - "height": 782.3619995117188, + "height": 782.36, "page": 1, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 2, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 3, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 4, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 5, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 6, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 7, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 8, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 9, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 10, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 11, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 12, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 13, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 14, - "width": 595.2760009765625 + "width": 595.28 }, { - "height": 782.3619995117188, + "height": 782.36, "page": 15, - "width": 595.2760009765625 + "width": 595.28 } ], "page-elements": [ { "bbox": [ - 44.78739929199219, - 743.57568359375, - 131.78494262695312, - 750.7937622070312 + 44.79, + 743.58, + 131.78, + 750.79 ], "iref": "#/page-headers/0", "name": "page-header", @@ -70840,10 +70964,10 @@ }, { "bbox": [ - 146.3265380859375, - 744.093017578125, - 229.3131561279297, - 751.4437866210938 + 146.33, + 744.09, + 229.31, + 751.44 ], "iref": "#/page-headers/1", "name": "page-header", @@ -70859,10 +70983,10 @@ }, { "bbox": [ - 243.7840576171875, - 743.953369140625, - 332.99346923828125, - 751.3480224609375 + 243.78, + 743.95, + 332.99, + 751.35 ], "iref": "#/page-headers/2", "name": "page-header", @@ -70878,10 +71002,10 @@ }, { "bbox": [ - 44.6877326965332, - 730.7138671875, - 106.1191635131836, - 737.30078125 + 44.69, + 730.71, + 106.12, + 737.3 ], "iref": "#/page-headers/3", "name": "page-header", @@ -70897,10 +71021,10 @@ }, { "bbox": [ - 43.95979690551758, - 702.3956298828125, - 91.94560241699219, - 712.1011962890625 + 43.96, + 702.4, + 91.95, + 712.1 ], "iref": "#/texts/0", "name": "subtitle-level-1", @@ -70916,10 +71040,10 @@ }, { "bbox": [ - 44.709346771240234, - 631.2674560546875, - 520.7667236328125, - 672.0067749023438 + 44.71, + 631.27, + 520.77, + 672.01 ], "iref": "#/texts/1", "name": "subtitle-level-1", @@ -70935,10 +71059,10 @@ }, { "bbox": [ - 44.78739929199219, - 593.6065673828125, - 146.4720458984375, - 606.4735717773438 + 44.79, + 593.61, + 146.47, + 606.47 ], "iref": "#/texts/2", "name": "subtitle-level-1", @@ -70954,10 +71078,10 @@ }, { "bbox": [ - 160.10069274902344, - 593.7201538085938, - 163.59266662597656, - 605.1080322265625 + 160.1, + 593.72, + 163.59, + 605.11 ], "iref": "#/texts/3", "name": "text", @@ -70973,10 +71097,10 @@ }, { "bbox": [ - 170.39439392089844, - 593.4388427734375, - 265.1170959472656, - 607.2059326171875 + 170.39, + 593.44, + 265.12, + 607.21 ], "iref": "#/texts/4", "name": "subtitle-level-1", @@ -70992,10 +71116,10 @@ }, { "bbox": [ - 274.5636901855469, - 593.7201538085938, - 278.0556640625, - 605.1080322265625 + 274.56, + 593.72, + 278.06, + 605.11 ], "iref": "#/texts/5", "name": "text", @@ -71011,10 +71135,10 @@ }, { "bbox": [ - 290.0411682128906, - 593.2594604492188, - 387.6253967285156, - 606.9615478515625 + 290.04, + 593.26, + 387.63, + 606.96 ], "iref": "#/texts/6", "name": "text", @@ -71030,10 +71154,10 @@ }, { "bbox": [ - 44.78739929199219, - 559.602294921875, - 182.68014526367188, - 567.3045654296875 + 44.79, + 559.6, + 182.68, + 567.3 ], "iref": "#/texts/7", "name": "text", @@ -71049,10 +71173,10 @@ }, { "bbox": [ - 44.78739929199219, - 493.4922180175781, - 164.66183471679688, - 545.3080444335938 + 44.79, + 493.49, + 164.66, + 545.31 ], "iref": "#/texts/8", "name": "text", @@ -71068,10 +71192,10 @@ }, { "bbox": [ - 209.1903839111328, - 552.2532348632812, - 249.1348114013672, - 561.7433471679688 + 209.19, + 552.25, + 249.13, + 561.74 ], "iref": "#/texts/9", "name": "subtitle-level-1", @@ -71087,10 +71211,10 @@ }, { "bbox": [ - 208.6128387451172, - 251.58563232421875, - 543.8583984375, - 547.040771484375 + 208.61, + 251.59, + 543.86, + 547.04 ], "iref": "#/texts/10", "name": "text", @@ -71106,10 +71230,10 @@ }, { "bbox": [ - 209.21104431152344, - 228.2025146484375, - 269.01025390625, - 237.28173828125 + 209.21, + 228.2, + 269.01, + 237.28 ], "iref": "#/texts/11", "name": "subtitle-level-1", @@ -71125,10 +71249,10 @@ }, { "bbox": [ - 208.79600524902344, - 214.08453369140625, - 401.0297546386719, - 222.97467041015625 + 208.8, + 214.08, + 401.03, + 222.97 ], "iref": "#/texts/12", "name": "text", @@ -71144,10 +71268,10 @@ }, { "bbox": [ - 44.27853012084961, - 187.51553344726562, - 189.71961975097656, - 199.65557861328125 + 44.28, + 187.52, + 189.72, + 199.66 ], "iref": "#/texts/13", "name": "subtitle-level-1", @@ -71163,10 +71287,10 @@ }, { "bbox": [ - 44.78739929199219, - 96.98406982421875, - 552.6513061523438, - 172.33074951171875 + 44.79, + 96.98, + 552.65, + 172.33 ], "iref": "#/texts/14", "name": "text", @@ -71182,10 +71306,10 @@ }, { "bbox": [ - 44.787384033203125, - 52.49696731567383, - 540.7015991210938, - 70.33258056640625 + 44.79, + 52.5, + 540.7, + 70.33 ], "iref": "#/footnotes/0", "name": "footnote", @@ -71201,10 +71325,10 @@ }, { "bbox": [ - 44.787384033203125, - 42.44549560546875, - 272.1662902832031, - 50.207763671875 + 44.79, + 42.45, + 272.17, + 50.21 ], "iref": "#/footnotes/1", "name": "footnote", @@ -71220,10 +71344,10 @@ }, { "bbox": [ - 44.38350296020508, - 12.301444053649902, - 135.58876037597656, - 30.8690185546875 + 44.38, + 12.3, + 135.59, + 30.87 ], "iref": "#/page-footers/0", "name": "page-footer", @@ -71239,10 +71363,10 @@ }, { "bbox": [ - 400.53094482421875, - 22.279802322387695, - 550.6204223632812, - 29.6954345703125 + 400.53, + 22.28, + 550.62, + 29.7 ], "iref": "#/page-footers/1", "name": "page-footer", @@ -71258,10 +71382,10 @@ }, { "bbox": [ - 46.48820114135742, - 751.4075317382812, - 68.55958557128906, - 758.0504760742188 + 46.49, + 751.41, + 68.56, + 758.05 ], "iref": "#/texts/15", "name": "text", @@ -71277,10 +71401,10 @@ }, { "bbox": [ - 510.634765625, - 751.4635620117188, - 550.9636840820312, - 758.332763671875 + 510.63, + 751.46, + 550.96, + 758.33 ], "iref": "#/page-headers/4", "name": "page-header", @@ -71296,10 +71420,10 @@ }, { "bbox": [ - 45.97464370727539, - 604.0350952148438, - 554.3433227539062, - 732.5863037109375 + 45.97, + 604.04, + 554.34, + 732.59 ], "iref": "#/texts/16", "name": "text", @@ -71315,10 +71439,10 @@ }, { "bbox": [ - 46.485626220703125, - 513.0453491210938, - 553.2366943359375, - 601.0419921875 + 46.49, + 513.05, + 553.24, + 601.04 ], "iref": "#/texts/17", "name": "text", @@ -71334,10 +71458,10 @@ }, { "bbox": [ - 46.48820114135742, - 500.0622253417969, - 340.59906005859375, - 509.4723205566406 + 46.49, + 500.06, + 340.6, + 509.47 ], "iref": "#/texts/18", "name": "text", @@ -71353,10 +71477,10 @@ }, { "bbox": [ - 57.86075973510742, - 487.0791015625, - 492.157958984375, - 496.63543701171875 + 57.86, + 487.08, + 492.16, + 496.64 ], "iref": "#/texts/19", "name": "text", @@ -71372,10 +71496,10 @@ }, { "bbox": [ - 46.48820114135742, - 461.0568542480469, - 262.5708312988281, - 470.5727233886719 + 46.49, + 461.06, + 262.57, + 470.57 ], "iref": "#/texts/20", "name": "list-item", @@ -71391,10 +71515,10 @@ }, { "bbox": [ - 45.779930114746094, - 448.07373046875, - 241.75213623046875, - 457.51177978515625 + 45.78, + 448.07, + 241.75, + 457.51 ], "iref": "#/texts/21", "name": "list-item", @@ -71410,10 +71534,10 @@ }, { "bbox": [ - 46.48820114135742, - 435.03460693359375, - 174.95623779296875, - 444.5535583496094 + 46.49, + 435.03, + 174.96, + 444.55 ], "iref": "#/texts/22", "name": "list-item", @@ -71429,10 +71553,10 @@ }, { "bbox": [ - 46.48820114135742, - 422.0514831542969, - 528.8121948242188, - 431.5508728027344 + 46.49, + 422.05, + 528.81, + 431.55 ], "iref": "#/texts/23", "name": "list-item", @@ -71448,10 +71572,10 @@ }, { "bbox": [ - 45.387489318847656, - 409.068359375, - 446.47918701171875, - 418.8954772949219 + 45.39, + 409.07, + 446.48, + 418.9 ], "iref": "#/texts/24", "name": "list-item", @@ -71467,10 +71591,10 @@ }, { "bbox": [ - 45.996150970458984, - 292.05224609375, - 553.0557861328125, - 392.69879150390625 + 46.0, + 292.05, + 553.06, + 392.7 ], "iref": "#/texts/25", "name": "text", @@ -71486,10 +71610,10 @@ }, { "bbox": [ - 46.48820114135742, - 265.89093017578125, - 551.4827270507812, - 288.8219299316406 + 46.49, + 265.89, + 551.48, + 288.82 ], "iref": "#/texts/26", "name": "text", @@ -71505,10 +71629,10 @@ }, { "bbox": [ - 46.371070861816406, - 240.06375122070312, - 515.491943359375, - 249.5263671875 + 46.37, + 240.06, + 515.49, + 249.53 ], "iref": "#/texts/27", "name": "list-item", @@ -71524,10 +71648,10 @@ }, { "bbox": [ - 46.48820114135742, - 214.04150390625, - 551.0504760742188, - 236.58538818359375 + 46.49, + 214.04, + 551.05, + 236.59 ], "iref": "#/texts/28", "name": "list-item", @@ -71543,10 +71667,10 @@ }, { "bbox": [ - 45.20487594604492, - 201.05838012695312, - 376.7724914550781, - 210.76416015625 + 45.2, + 201.06, + 376.77, + 210.76 ], "iref": "#/texts/29", "name": "list-item", @@ -71562,10 +71686,10 @@ }, { "bbox": [ - 46.2375373840332, - 110.07154846191406, - 553.1372680664062, - 184.7841796875 + 46.24, + 110.07, + 553.14, + 184.78 ], "iref": "#/texts/30", "name": "text", @@ -71581,10 +71705,10 @@ }, { "bbox": [ - 46.487701416015625, - 84.04928588867188, - 550.5083618164062, - 107.71282958984375 + 46.49, + 84.05, + 550.51, + 107.71 ], "iref": "#/texts/31", "name": "text", @@ -71600,10 +71724,10 @@ }, { "bbox": [ - 45.976261138916016, - 45.04500961303711, - 551.8382568359375, - 81.24627685546875 + 45.98, + 45.05, + 551.84, + 81.25 ], "iref": "#/texts/32", "name": "text", @@ -71619,10 +71743,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/33", "name": "text", @@ -71638,10 +71762,10 @@ }, { "bbox": [ - 44.50688552856445, - 751.4635620117188, - 85.01602935791016, - 758.0504760742188 + 44.51, + 751.46, + 85.02, + 758.05 ], "iref": "#/page-headers/5", "name": "page-header", @@ -71657,10 +71781,10 @@ }, { "bbox": [ - 528.5497436523438, - 751.4075317382812, - 550.62109375, - 758.0504760742188 + 528.55, + 751.41, + 550.62, + 758.05 ], "iref": "#/texts/34", "name": "text", @@ -71676,10 +71800,10 @@ }, { "bbox": [ - 44.78739929199219, - 695.0468139648438, - 549.4096069335938, - 730.4614868164062 + 44.79, + 695.05, + 549.41, + 730.46 ], "iref": "#/texts/35", "name": "text", @@ -71695,10 +71819,10 @@ }, { "bbox": [ - 44.78739929199219, - 655.5153198242188, - 378.15191650390625, - 666.9031982421875 + 44.79, + 655.52, + 378.15, + 666.9 ], "iref": "#/texts/36", "name": "subtitle-level-1", @@ -71714,10 +71838,10 @@ }, { "bbox": [ - 44.785400390625, - 552.0484008789062, - 549.7849731445312, - 639.5802001953125 + 44.79, + 552.05, + 549.78, + 639.58 ], "iref": "#/texts/37", "name": "text", @@ -71733,10 +71857,10 @@ }, { "bbox": [ - 44.785430908203125, - 409.068603515625, - 554.4052124023438, - 548.475341796875 + 44.79, + 409.07, + 554.41, + 548.48 ], "iref": "#/texts/38", "name": "text", @@ -71752,10 +71876,10 @@ }, { "bbox": [ - 44.78739929199219, - 369.4996032714844, - 134.88641357421875, - 380.88751220703125 + 44.79, + 369.5, + 134.89, + 380.89 ], "iref": "#/texts/39", "name": "subtitle-level-1", @@ -71771,10 +71895,10 @@ }, { "bbox": [ - 44.524391174316406, - 317.6519470214844, - 552.3914184570312, - 353.5248107910156 + 44.52, + 317.65, + 552.39, + 353.52 ], "iref": "#/texts/40", "name": "text", @@ -71790,10 +71914,10 @@ }, { "bbox": [ - 78.5494384765625, - 102.71893310546875, - 512.3916625976562, - 284.9899597167969 + 78.55, + 102.72, + 512.39, + 284.99 ], "iref": "#/figures/0", "name": "picture", @@ -71809,10 +71933,10 @@ }, { "bbox": [ - 44.78328323364258, - 45.39774703979492, - 545.7940673828125, - 89.4708251953125 + 44.78, + 45.4, + 545.79, + 89.47 ], "iref": "#/figures/0/captions/0", "name": "caption", @@ -71828,10 +71952,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/41", "name": "text", @@ -71847,10 +71971,10 @@ }, { "bbox": [ - 46.48820114135742, - 751.4075317382812, - 68.55958557128906, - 758.0504760742188 + 46.49, + 751.41, + 68.56, + 758.05 ], "iref": "#/texts/42", "name": "text", @@ -71866,10 +71990,10 @@ }, { "bbox": [ - 510.634765625, - 751.4635620117188, - 550.9420166015625, - 758.4869384765625 + 510.63, + 751.46, + 550.94, + 758.49 ], "iref": "#/page-headers/6", "name": "page-header", @@ -71885,10 +72009,10 @@ }, { "bbox": [ - 45.14111328125, - 720.4854736328125, - 157.7607421875, - 732.3443603515625 + 45.14, + 720.49, + 157.76, + 732.34 ], "iref": "#/texts/43", "name": "subtitle-level-1", @@ -71904,10 +72028,10 @@ }, { "bbox": [ - 46.48820114135742, - 656.0805053710938, - 553.5469360351562, - 704.7728881835938 + 46.49, + 656.08, + 553.55, + 704.77 ], "iref": "#/texts/44", "name": "text", @@ -71923,10 +72047,10 @@ }, { "bbox": [ - 45.56229019165039, - 604.0359497070312, - 553.0910034179688, - 652.8948974609375 + 45.56, + 604.04, + 553.09, + 652.89 ], "iref": "#/texts/45", "name": "text", @@ -71942,10 +72066,10 @@ }, { "bbox": [ - 45.6591796875, - 565.0864868164062, - 552.8568115234375, - 600.9397583007812 + 45.66, + 565.09, + 552.86, + 600.94 ], "iref": "#/texts/46", "name": "text", @@ -71961,10 +72085,10 @@ }, { "bbox": [ - 45.497798919677734, - 525.5185546875, - 161.91403198242188, - 536.9064331054688 + 45.5, + 525.52, + 161.91, + 536.91 ], "iref": "#/texts/47", "name": "subtitle-level-1", @@ -71980,10 +72104,10 @@ }, { "bbox": [ - 46.28074645996094, - 435.03485107421875, - 552.7772827148438, - 509.80706787109375 + 46.28, + 435.03, + 552.78, + 509.81 ], "iref": "#/texts/48", "name": "text", @@ -71999,10 +72123,10 @@ }, { "bbox": [ - 45.999271392822266, - 370.0654296875, - 551.750244140625, - 431.6009521484375 + 46.0, + 370.07, + 551.75, + 431.6 ], "iref": "#/texts/49", "name": "text", @@ -72018,10 +72142,10 @@ }, { "bbox": [ - 46.37678527832031, - 304.9195251464844, - 551.427001953125, - 366.6332092285156 + 46.38, + 304.92, + 551.43, + 366.63 ], "iref": "#/texts/50", "name": "text", @@ -72037,10 +72161,10 @@ }, { "bbox": [ - 46.48663330078125, - 45.39759826660156, - 540.3204956054688, - 67.21272277832031 + 46.49, + 45.4, + 540.32, + 67.21 ], "iref": "#/texts/51", "name": "text", @@ -72056,10 +72180,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/52", "name": "text", @@ -72075,10 +72199,10 @@ }, { "bbox": [ - 44.041500091552734, - 751.3096313476562, - 85.72028350830078, - 759.7291870117188 + 44.04, + 751.31, + 85.72, + 759.73 ], "iref": "#/page-headers/7", "name": "page-header", @@ -72094,10 +72218,10 @@ }, { "bbox": [ - 454.1357421875, - 745.7154541015625, - 550.62109375, - 761.0070190429688 + 454.14, + 745.72, + 550.62, + 761.01 ], "iref": "#/figures/1", "name": "picture", @@ -72113,10 +72237,10 @@ }, { "bbox": [ - 44.78594970703125, - 483.39947509765625, - 548.2582397460938, - 529.3165283203125 + 44.79, + 483.4, + 548.26, + 529.32 ], "iref": "#/texts/53", "name": "text", @@ -72132,10 +72256,10 @@ }, { "bbox": [ - 44.78684997558594, - 370.0640563964844, - 549.865478515625, - 444.5719299316406 + 44.79, + 370.06, + 549.87, + 444.57 ], "iref": "#/texts/54", "name": "text", @@ -72151,10 +72275,10 @@ }, { "bbox": [ - 44.206939697265625, - 330.4949035644531, - 223.93128967285156, - 341.8828125 + 44.21, + 330.49, + 223.93, + 341.88 ], "iref": "#/texts/55", "name": "subtitle-level-1", @@ -72170,10 +72294,10 @@ }, { "bbox": [ - 44.78684616088867, - 149.07435607910156, - 549.819091796875, - 314.53570556640625 + 44.79, + 149.07, + 549.82, + 314.54 ], "iref": "#/texts/56", "name": "text", @@ -72189,10 +72313,10 @@ }, { "bbox": [ - 43.94790267944336, - 109.50601959228516, - 254.47779846191406, - 120.89392852783203 + 43.95, + 109.51, + 254.48, + 120.89 ], "iref": "#/texts/57", "name": "subtitle-level-1", @@ -72208,10 +72332,10 @@ }, { "bbox": [ - 44.78739929199219, - 45.00958251953125, - 549.1444091796875, - 93.61456298828125 + 44.79, + 45.01, + 549.14, + 93.61 ], "iref": "#/texts/58", "name": "text", @@ -72227,10 +72351,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/59", "name": "text", @@ -72246,10 +72370,10 @@ }, { "bbox": [ - 46.48820114135742, - 751.4075317382812, - 68.55958557128906, - 758.0504760742188 + 46.49, + 751.41, + 68.56, + 758.05 ], "iref": "#/texts/60", "name": "text", @@ -72265,10 +72389,10 @@ }, { "bbox": [ - 510.634765625, - 751.4635620117188, - 550.9879150390625, - 758.9756469726562 + 510.63, + 751.46, + 550.99, + 758.98 ], "iref": "#/page-headers/8", "name": "page-header", @@ -72284,10 +72408,10 @@ }, { "bbox": [ - 45.78483581542969, - 669.0628051757812, - 554.4027709960938, - 730.823486328125 + 45.78, + 669.06, + 554.4, + 730.82 ], "iref": "#/texts/61", "name": "text", @@ -72303,10 +72427,10 @@ }, { "bbox": [ - 45.753639221191406, - 629.4933471679688, - 148.00445556640625, - 641.5734252929688 + 45.75, + 629.49, + 148.0, + 641.57 ], "iref": "#/texts/62", "name": "subtitle-level-1", @@ -72322,10 +72446,10 @@ }, { "bbox": [ - 46.48820114135742, - 591.0541381835938, - 552.9049682617188, - 613.8143310546875 + 46.49, + 591.05, + 552.9, + 613.81 ], "iref": "#/texts/63", "name": "text", @@ -72341,10 +72465,10 @@ }, { "bbox": [ - 46.445133209228516, - 552.0497436523438, - 553.362548828125, - 575.2869873046875 + 46.45, + 552.05, + 553.36, + 575.29 ], "iref": "#/texts/64", "name": "list-item", @@ -72360,10 +72484,10 @@ }, { "bbox": [ - 45.744380950927734, - 526.0834350585938, - 553.5414428710938, - 548.8994140625 + 45.74, + 526.08, + 553.54, + 548.9 ], "iref": "#/texts/65", "name": "list-item", @@ -72379,10 +72503,10 @@ }, { "bbox": [ - 44.8809700012207, - 513.0443115234375, - 481.36083984375, - 523.5081787109375 + 44.88, + 513.04, + 481.36, + 523.51 ], "iref": "#/texts/66", "name": "list-item", @@ -72398,10 +72522,10 @@ }, { "bbox": [ - 46.38796615600586, - 435.0345458984375, - 553.393310546875, - 497.0226135253906 + 46.39, + 435.03, + 553.39, + 497.02 ], "iref": "#/texts/67", "name": "text", @@ -72417,10 +72541,10 @@ }, { "bbox": [ - 45.54835891723633, - 344.0406799316406, - 555.0050048828125, - 432.1236877441406 + 45.55, + 344.04, + 555.01, + 432.12 ], "iref": "#/texts/68", "name": "text", @@ -72436,10 +72560,10 @@ }, { "bbox": [ - 46.25617980957031, - 304.472900390625, - 469.55108642578125, - 315.8608093261719 + 46.26, + 304.47, + 469.55, + 315.86 ], "iref": "#/texts/69", "name": "subtitle-level-1", @@ -72455,10 +72579,10 @@ }, { "bbox": [ - 46.48820114135742, - 265.92974853515625, - 552.6448364257812, - 288.6134338378906 + 46.49, + 265.93, + 552.64, + 288.61 ], "iref": "#/texts/70", "name": "text", @@ -72474,10 +72598,10 @@ }, { "bbox": [ - 46.377140045166016, - 240.049560546875, - 429.5157165527344, - 249.76214599609375 + 46.38, + 240.05, + 429.52, + 249.76 ], "iref": "#/texts/71", "name": "list-item", @@ -72493,10 +72617,10 @@ }, { "bbox": [ - 45.62164306640625, - 227.0850830078125, - 346.3638916015625, - 237.665283203125 + 45.62, + 227.09, + 346.36, + 237.67 ], "iref": "#/texts/72", "name": "list-item", @@ -72512,10 +72636,10 @@ }, { "bbox": [ - 45.322208404541016, - 162.0574493408203, - 553.8873901367188, - 210.65191650390625 + 45.32, + 162.06, + 553.89, + 210.65 ], "iref": "#/texts/73", "name": "text", @@ -72531,10 +72655,10 @@ }, { "bbox": [ - 45.762847900390625, - 71.06684875488281, - 554.2275390625, - 158.80230712890625 + 45.76, + 71.07, + 554.23, + 158.8 ], "iref": "#/texts/74", "name": "text", @@ -72550,10 +72674,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/75", "name": "text", @@ -72569,10 +72693,10 @@ }, { "bbox": [ - 44.35243225097656, - 751.4635620117188, - 85.42164611816406, - 758.9300537109375 + 44.35, + 751.46, + 85.42, + 758.93 ], "iref": "#/page-headers/9", "name": "page-header", @@ -72588,10 +72712,10 @@ }, { "bbox": [ - 528.5497436523438, - 751.4075317382812, - 550.62109375, - 758.0504760742188 + 528.55, + 751.41, + 550.62, + 758.05 ], "iref": "#/texts/76", "name": "text", @@ -72607,10 +72731,10 @@ }, { "bbox": [ - 44.78684997558594, - 695.0850830078125, - 549.5508422851562, - 730.6725463867188 + 44.79, + 695.09, + 549.55, + 730.67 ], "iref": "#/texts/77", "name": "text", @@ -72626,10 +72750,10 @@ }, { "bbox": [ - 44.71910095214844, - 655.5153198242188, - 236.7943572998047, - 666.9031982421875 + 44.72, + 655.52, + 236.79, + 666.9 ], "iref": "#/texts/78", "name": "subtitle-level-1", @@ -72645,10 +72769,10 @@ }, { "bbox": [ - 44.78636169433594, - 578.0709838867188, - 549.254638671875, - 640.1705932617188 + 44.79, + 578.07, + 549.25, + 640.17 ], "iref": "#/texts/79", "name": "text", @@ -72664,10 +72788,10 @@ }, { "bbox": [ - 44.733577728271484, - 539.0667114257812, - 548.8603515625, - 576.5675048828125 + 44.73, + 539.07, + 548.86, + 576.57 ], "iref": "#/texts/80", "name": "text", @@ -72683,10 +72807,10 @@ }, { "bbox": [ - 214.75270080566406, - 498.5877685546875, - 548.7813110351562, - 529.3681030273438 + 214.75, + 498.59, + 548.78, + 529.37 ], "iref": "#/texts/81", "name": "formula", @@ -72702,10 +72826,10 @@ }, { "bbox": [ - 44.784271240234375, - 435.0351257324219, - 548.7523193359375, - 470.5306396484375 + 44.78, + 435.04, + 548.75, + 470.53 ], "iref": "#/texts/82", "name": "text", @@ -72721,10 +72845,10 @@ }, { "bbox": [ - 234.89254760742188, - 399.494873046875, - 549.147216796875, - 425.90399169921875 + 234.89, + 399.49, + 549.15, + 425.9 ], "iref": "#/texts/83", "name": "formula", @@ -72740,10 +72864,10 @@ }, { "bbox": [ - 44.786224365234375, - 279.0730285644531, - 549.0149536132812, - 379.8307189941406 + 44.79, + 279.07, + 549.01, + 379.83 ], "iref": "#/texts/84", "name": "text", @@ -72759,10 +72883,10 @@ }, { "bbox": [ - 44.786224365234375, - 253.05079650878906, - 549.2977294921875, - 275.7553405761719 + 44.79, + 253.05, + 549.3, + 275.76 ], "iref": "#/texts/85", "name": "text", @@ -72778,10 +72902,10 @@ }, { "bbox": [ - 43.776466369628906, - 213.4808349609375, - 380.18682861328125, - 224.8687286376953 + 43.78, + 213.48, + 380.19, + 224.87 ], "iref": "#/texts/86", "name": "subtitle-level-1", @@ -72797,10 +72921,10 @@ }, { "bbox": [ - 44.78739929199219, - 58.08219528198242, - 550.3234252929688, - 197.4915771484375 + 44.79, + 58.08, + 550.32, + 197.49 ], "iref": "#/texts/87", "name": "text", @@ -72816,10 +72940,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/88", "name": "text", @@ -72835,10 +72959,10 @@ }, { "bbox": [ - 45.74378967285156, - 751.4075317382812, - 68.55958557128906, - 758.9868774414062 + 45.74, + 751.41, + 68.56, + 758.99 ], "iref": "#/page-headers/10", "name": "page-header", @@ -72854,10 +72978,10 @@ }, { "bbox": [ - 510.634765625, - 751.4635620117188, - 550.921142578125, - 758.3907470703125 + 510.63, + 751.46, + 550.92, + 758.39 ], "iref": "#/page-headers/11", "name": "page-header", @@ -72873,10 +72997,10 @@ }, { "bbox": [ - 96.34707641601562, - 537.8071899414062, - 496.8702697753906, - 731.7752075195312 + 96.35, + 537.81, + 496.87, + 731.78 ], "iref": "#/figures/2", "name": "picture", @@ -72892,10 +73016,10 @@ }, { "bbox": [ - 46.00423812866211, - 491.7976379394531, - 543.2025756835938, - 523.7771606445312 + 46.0, + 491.8, + 543.2, + 523.78 ], "iref": "#/figures/2/captions/0", "name": "caption", @@ -72911,10 +73035,10 @@ }, { "bbox": [ - 46.486663818359375, - 370.0644836425781, - 551.9771728515625, - 457.6360168457031 + 46.49, + 370.06, + 551.98, + 457.64 ], "iref": "#/texts/89", "name": "text", @@ -72930,10 +73054,10 @@ }, { "bbox": [ - 46.486663818359375, - 239.97216796875, - 551.4871215820312, - 366.491455078125 + 46.49, + 239.97, + 551.49, + 366.49 ], "iref": "#/texts/90", "name": "text", @@ -72949,10 +73073,10 @@ }, { "bbox": [ - 45.14011764526367, - 200.4981231689453, - 333.7398986816406, - 211.88601684570312 + 45.14, + 200.5, + 333.74, + 211.89 ], "iref": "#/texts/91", "name": "subtitle-level-1", @@ -72968,10 +73092,10 @@ }, { "bbox": [ - 45.9116325378418, - 162.0589599609375, - 551.3727416992188, - 184.45217895507812 + 45.91, + 162.06, + 551.37, + 184.45 ], "iref": "#/texts/92", "name": "text", @@ -72987,10 +73111,10 @@ }, { "bbox": [ - 46.21662902832031, - 84.04818725585938, - 550.9126586914062, - 158.48593139648438 + 46.22, + 84.05, + 550.91, + 158.49 ], "iref": "#/texts/93", "name": "text", @@ -73006,10 +73130,10 @@ }, { "bbox": [ - 44.992271423339844, - 45.01641845703125, - 552.1865844726562, - 80.5264892578125 + 44.99, + 45.02, + 552.19, + 80.53 ], "iref": "#/texts/94", "name": "text", @@ -73025,10 +73149,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/95", "name": "text", @@ -73044,10 +73168,10 @@ }, { "bbox": [ - 44.34560012817383, - 751.4635620117188, - 84.67137145996094, - 758.0504760742188 + 44.35, + 751.46, + 84.67, + 758.05 ], "iref": "#/page-headers/12", "name": "page-header", @@ -73063,10 +73187,10 @@ }, { "bbox": [ - 528.5497436523438, - 751.4075317382812, - 550.62109375, - 758.0504760742188 + 528.55, + 751.41, + 550.62, + 758.05 ], "iref": "#/texts/96", "name": "text", @@ -73082,10 +73206,10 @@ }, { "bbox": [ - 116.26325988769531, - 507.8388977050781, - 473.644775390625, - 731.2719116210938 + 116.26, + 507.84, + 473.64, + 731.27 ], "iref": "#/figures/3", "name": "picture", @@ -73101,10 +73225,10 @@ }, { "bbox": [ - 44.78739929199219, - 447.43023681640625, - 541.6075439453125, - 491.6891174316406 + 44.79, + 447.43, + 541.61, + 491.69 ], "iref": "#/figures/3/captions/0", "name": "caption", @@ -73120,10 +73244,10 @@ }, { "bbox": [ - 44.418067932128906, - 395.521728515625, - 176.333251953125, - 406.9096374511719 + 44.42, + 395.52, + 176.33, + 406.91 ], "iref": "#/texts/97", "name": "subtitle-level-1", @@ -73139,10 +73263,10 @@ }, { "bbox": [ - 44.78739929199219, - 343.8106384277344, - 548.7684326171875, - 379.5713806152344 + 44.79, + 343.81, + 548.77, + 379.57 ], "iref": "#/texts/98", "name": "text", @@ -73158,10 +73282,10 @@ }, { "bbox": [ - 245.61886596679688, - 303.5643005371094, - 549.354736328125, - 334.3446350097656 + 245.62, + 303.56, + 549.35, + 334.34 ], "iref": "#/texts/99", "name": "formula", @@ -73177,10 +73301,10 @@ }, { "bbox": [ - 44.27131652832031, - 266.0909118652344, - 323.5520935058594, - 275.5295104980469 + 44.27, + 266.09, + 323.55, + 275.53 ], "iref": "#/texts/100", "name": "text", @@ -73196,10 +73320,10 @@ }, { "bbox": [ - 44.087921142578125, - 226.52023315429688, - 183.25424194335938, - 237.9081268310547 + 44.09, + 226.52, + 183.25, + 237.91 ], "iref": "#/texts/101", "name": "subtitle-level-1", @@ -73215,10 +73339,10 @@ }, { "bbox": [ - 44.12942886352539, - 149.07611083984375, - 549.1555786132812, - 210.865478515625 + 44.13, + 149.08, + 549.16, + 210.87 ], "iref": "#/texts/102", "name": "text", @@ -73234,10 +73358,10 @@ }, { "bbox": [ - 213.45111083984375, - 107.99786376953125, - 548.7833251953125, - 139.26446533203125 + 213.45, + 108.0, + 548.78, + 139.26 ], "iref": "#/texts/103", "name": "formula", @@ -73253,10 +73377,10 @@ }, { "bbox": [ - 44.78630447387695, - 45.0455436706543, - 548.7993774414062, - 80.76483154296875 + 44.79, + 45.05, + 548.8, + 80.76 ], "iref": "#/texts/104", "name": "text", @@ -73272,10 +73396,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/105", "name": "text", @@ -73291,10 +73415,10 @@ }, { "bbox": [ - 45.890689849853516, - 743.98095703125, - 143.1890869140625, - 761.30615234375 + 45.89, + 743.98, + 143.19, + 761.31 ], "iref": "#/figures/4", "name": "picture", @@ -73310,10 +73434,10 @@ }, { "bbox": [ - 510.634765625, - 751.4635620117188, - 550.8926391601562, - 758.5383911132812 + 510.63, + 751.46, + 550.89, + 758.54 ], "iref": "#/page-headers/13", "name": "page-header", @@ -73329,10 +73453,10 @@ }, { "bbox": [ - 44.981788635253906, - 720.4783935546875, - 201.29905700683594, - 731.9963989257812 + 44.98, + 720.48, + 201.3, + 732.0 ], "iref": "#/texts/106", "name": "subtitle-level-1", @@ -73348,10 +73472,10 @@ }, { "bbox": [ - 46.0963020324707, - 656.0805053710938, - 554.1248779296875, - 705.2210693359375 + 46.1, + 656.08, + 554.12, + 705.22 ], "iref": "#/texts/107", "name": "text", @@ -73367,10 +73491,10 @@ }, { "bbox": [ - 45.49040985107422, - 616.5106201171875, - 214.94256591796875, - 627.93359375 + 45.49, + 616.51, + 214.94, + 627.93 ], "iref": "#/texts/108", "name": "subtitle-level-1", @@ -73386,10 +73510,10 @@ }, { "bbox": [ - 45.356536865234375, - 578.0712890625, - 552.450927734375, - 600.5599365234375 + 45.36, + 578.07, + 552.45, + 600.56 ], "iref": "#/texts/109", "name": "text", @@ -73405,10 +73529,10 @@ }, { "bbox": [ - 46.00928497314453, - 500.0617370605469, - 551.898193359375, - 574.4982299804688 + 46.01, + 500.06, + 551.9, + 574.5 ], "iref": "#/texts/110", "name": "text", @@ -73424,10 +73548,10 @@ }, { "bbox": [ - 45.801177978515625, - 448.0732421875, - 552.126953125, - 496.556396484375 + 45.8, + 448.07, + 552.13, + 496.56 ], "iref": "#/texts/111", "name": "text", @@ -73443,10 +73567,10 @@ }, { "bbox": [ - 46.02473449707031, - 408.5044250488281, - 321.5076904296875, - 419.892333984375 + 46.02, + 408.5, + 321.51, + 419.89 ], "iref": "#/texts/112", "name": "subtitle-level-1", @@ -73462,10 +73586,10 @@ }, { "bbox": [ - 46.301429748535156, - 357.0820007324219, - 550.6118774414062, - 392.4583435058594 + 46.3, + 357.08, + 550.61, + 392.46 ], "iref": "#/texts/113", "name": "text", @@ -73481,10 +73605,10 @@ }, { "bbox": [ - 46.488189697265625, - 253.0490264892578, - 551.0360107421875, - 353.4529724121094 + 46.49, + 253.05, + 551.04, + 353.45 ], "iref": "#/texts/114", "name": "text", @@ -73500,10 +73624,10 @@ }, { "bbox": [ - 46.440311431884766, - 188.080810546875, - 551.396484375, - 249.4759979248047 + 46.44, + 188.08, + 551.4, + 249.48 ], "iref": "#/texts/115", "name": "text", @@ -73519,10 +73643,10 @@ }, { "bbox": [ - 46.27632141113281, - 136.03631591796875, - 550.9563598632812, - 184.4517822265625 + 46.28, + 136.04, + 550.96, + 184.45 ], "iref": "#/texts/116", "name": "text", @@ -73538,10 +73662,10 @@ }, { "bbox": [ - 46.42215347290039, - 58.08152389526367, - 551.0359497070312, - 132.46327209472656 + 46.42, + 58.08, + 551.04, + 132.46 ], "iref": "#/texts/117", "name": "text", @@ -73557,10 +73681,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/118", "name": "text", @@ -73576,10 +73700,10 @@ }, { "bbox": [ - 43.98883056640625, - 751.4635620117188, - 84.67137145996094, - 758.0504760742188 + 43.99, + 751.46, + 84.67, + 758.05 ], "iref": "#/page-headers/14", "name": "page-header", @@ -73595,10 +73719,10 @@ }, { "bbox": [ - 525.1477661132812, - 751.4075317382812, - 548.775146484375, - 758.0504760742188 + 525.15, + 751.41, + 548.78, + 758.05 ], "iref": "#/texts/119", "name": "text", @@ -73614,10 +73738,10 @@ }, { "bbox": [ - 48.36570739746094, - 477.8360900878906, - 548.3624267578125, - 732.3331298828125 + 48.37, + 477.84, + 548.36, + 732.33 ], "iref": "#/figures/5", "name": "picture", @@ -73633,10 +73757,10 @@ }, { "bbox": [ - 44.78739929199219, - 428.34173583984375, - 541.0477905273438, - 460.564697265625 + 44.79, + 428.34, + 541.05, + 460.56 ], "iref": "#/figures/5/captions/0", "name": "caption", @@ -73652,10 +73776,10 @@ }, { "bbox": [ - 44.78684997558594, - 331.06005859375, - 550.6510620117188, - 405.4977722167969 + 44.79, + 331.06, + 550.65, + 405.5 ], "iref": "#/texts/120", "name": "text", @@ -73671,10 +73795,10 @@ }, { "bbox": [ - 44.489322662353516, - 291.4902038574219, - 365.9893798828125, - 302.87811279296875 + 44.49, + 291.49, + 365.99, + 302.88 ], "iref": "#/texts/121", "name": "subtitle-level-1", @@ -73690,10 +73814,10 @@ }, { "bbox": [ - 44.785736083984375, - 175.04168701171875, - 549.7868041992188, - 275.5009460449219 + 44.79, + 175.04, + 549.79, + 275.5 ], "iref": "#/texts/122", "name": "text", @@ -73709,10 +73833,10 @@ }, { "bbox": [ - 44.785736083984375, - 45.043888092041016, - 549.4429931640625, - 171.5908203125 + 44.79, + 45.04, + 549.44, + 171.59 ], "iref": "#/texts/123", "name": "text", @@ -73728,10 +73852,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/124", "name": "text", @@ -73747,10 +73871,10 @@ }, { "bbox": [ - 46.48820114135742, - 751.4075317382812, - 51.251686096191406, - 758.0504760742188 + 46.49, + 751.41, + 51.25, + 758.05 ], "iref": "#/texts/125", "name": "text", @@ -73766,10 +73890,10 @@ }, { "bbox": [ - 56.12232208251953, - 751.4075317382812, - 70.11566162109375, - 758.0504760742188 + 56.12, + 751.41, + 70.12, + 758.05 ], "iref": "#/texts/126", "name": "text", @@ -73785,10 +73909,10 @@ }, { "bbox": [ - 510.634765625, - 751.4635620117188, - 550.7427368164062, - 758.252197265625 + 510.63, + 751.46, + 550.74, + 758.25 ], "iref": "#/page-headers/15", "name": "page-header", @@ -73804,10 +73928,10 @@ }, { "bbox": [ - 55.876461029052734, - 606.848876953125, - 541.853759765625, - 729.6771850585938 + 55.88, + 606.85, + 541.85, + 729.68 ], "iref": "#/figures/6", "name": "picture", @@ -73823,10 +73947,10 @@ }, { "bbox": [ - 44.766658782958984, - 585.4602661132812, - 387.12310791015625, - 593.5936279296875 + 44.77, + 585.46, + 387.12, + 593.59 ], "iref": "#/figures/6/captions/0", "name": "caption", @@ -73842,10 +73966,10 @@ }, { "bbox": [ - 45.36357116699219, - 526.083984375, - 552.5618286132812, - 548.4772338867188 + 45.36, + 526.08, + 552.56, + 548.48 ], "iref": "#/texts/127", "name": "text", @@ -73861,10 +73985,10 @@ }, { "bbox": [ - 46.48820114135742, - 448.0732421875, - 552.16748046875, - 522.4549560546875 + 46.49, + 448.07, + 552.17, + 522.45 ], "iref": "#/texts/128", "name": "text", @@ -73880,10 +74004,10 @@ }, { "bbox": [ - 46.228458404541016, - 382.8196716308594, - 552.1286010742188, - 444.5987854003906 + 46.23, + 382.82, + 552.13, + 444.6 ], "iref": "#/texts/129", "name": "text", @@ -73899,10 +74023,10 @@ }, { "bbox": [ - 46.48820114135742, - 357.0803527832031, - 309.6529846191406, - 366.4904479980469 + 46.49, + 357.08, + 309.65, + 366.49 ], "iref": "#/texts/130", "name": "list-item", @@ -73918,10 +74042,10 @@ }, { "bbox": [ - 46.48820114135742, - 344.0412292480469, - 336.8304748535156, - 353.6436767578125 + 46.49, + 344.04, + 336.83, + 353.64 ], "iref": "#/texts/131", "name": "list-item", @@ -73937,10 +74061,10 @@ }, { "bbox": [ - 45.47064971923828, - 331.05810546875, - 478.3088684082031, - 340.54962158203125 + 45.47, + 331.06, + 478.31, + 340.55 ], "iref": "#/texts/132", "name": "list-item", @@ -73956,10 +74080,10 @@ }, { "bbox": [ - 46.16604232788086, - 214.04542541503906, - 551.7832641601562, - 314.4459533691406 + 46.17, + 214.05, + 551.78, + 314.45 ], "iref": "#/texts/133", "name": "text", @@ -73975,10 +74099,10 @@ }, { "bbox": [ - 46.26358413696289, - 149.0762481689453, - 551.3743896484375, - 210.68536376953125 + 46.26, + 149.08, + 551.37, + 210.69 ], "iref": "#/texts/134", "name": "text", @@ -73994,10 +74118,10 @@ }, { "bbox": [ - 45.70681381225586, - 71.06546783447266, - 551.875732421875, - 145.5064697265625 + 45.71, + 71.07, + 551.88, + 145.51 ], "iref": "#/texts/135", "name": "text", @@ -74013,10 +74137,10 @@ }, { "bbox": [ - 46.488380432128906, - 45.0432014465332, - 551.8381958007812, - 67.6728515625 + 46.49, + 45.04, + 551.84, + 67.67 ], "iref": "#/texts/136", "name": "text", @@ -74032,10 +74156,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/137", "name": "text", @@ -74051,10 +74175,10 @@ }, { "bbox": [ - 44.31840515136719, - 751.4635620117188, - 84.67137145996094, - 758.0541381835938 + 44.32, + 751.46, + 84.67, + 758.05 ], "iref": "#/page-headers/16", "name": "page-header", @@ -74070,10 +74194,10 @@ }, { "bbox": [ - 525.1477661132812, - 751.4075317382812, - 529.9112548828125, - 758.0504760742188 + 525.15, + 751.41, + 529.91, + 758.05 ], "iref": "#/texts/138", "name": "text", @@ -74089,10 +74213,10 @@ }, { "bbox": [ - 534.7818603515625, - 751.4075317382812, - 548.775146484375, - 758.0504760742188 + 534.78, + 751.41, + 548.78, + 758.05 ], "iref": "#/texts/139", "name": "text", @@ -74108,10 +74232,10 @@ }, { "bbox": [ - 45.15538024902344, - 607.3761596679688, - 548.95361328125, - 731.4898681640625 + 45.16, + 607.38, + 548.95, + 731.49 ], "iref": "#/figures/7", "name": "picture", @@ -74127,10 +74251,10 @@ }, { "bbox": [ - 44.35472869873047, - 537.0355224609375, - 539.2632446289062, - 593.7362670898438 + 44.35, + 537.04, + 539.26, + 593.74 ], "iref": "#/figures/7/captions/0", "name": "text", @@ -74146,10 +74270,10 @@ }, { "bbox": [ - 44.49153518676758, - 441.90771484375, - 181.1155242919922, - 498.2774658203125 + 44.49, + 441.91, + 181.12, + 498.28 ], "iref": "#/tables/0/captions/0", "name": "caption", @@ -74165,10 +74289,10 @@ }, { "bbox": [ - 210.0027313232422, - 346.577880859375, - 549.0220336914062, - 499.1263427734375 + 210.0, + 346.58, + 549.02, + 499.13 ], "iref": "#/tables/0", "name": "table", @@ -74184,10 +74308,10 @@ }, { "bbox": [ - 44.78739929199219, - 292.05572509765625, - 549.0201416015625, - 314.4489440917969 + 44.79, + 292.06, + 549.02, + 314.45 ], "iref": "#/texts/140", "name": "text", @@ -74203,10 +74327,10 @@ }, { "bbox": [ - 44.786376953125, - 188.07875061035156, - 550.8748779296875, - 288.5342712402344 + 44.79, + 188.08, + 550.87, + 288.53 ], "iref": "#/texts/141", "name": "text", @@ -74222,10 +74346,10 @@ }, { "bbox": [ - 44.73537826538086, - 148.51072692871094, - 178.22747802734375, - 159.89862060546875 + 44.74, + 148.51, + 178.23, + 159.9 ], "iref": "#/texts/142", "name": "subtitle-level-1", @@ -74241,10 +74365,10 @@ }, { "bbox": [ - 44.78739929199219, - 58.0830192565918, - 549.515625, - 132.5465087890625 + 44.79, + 58.08, + 549.52, + 132.55 ], "iref": "#/texts/143", "name": "text", @@ -74260,10 +74384,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/144", "name": "text", @@ -74279,10 +74403,10 @@ }, { "bbox": [ - 46.48820114135742, - 751.4075317382812, - 70.11566162109375, - 758.0504760742188 + 46.49, + 751.41, + 70.12, + 758.05 ], "iref": "#/texts/145", "name": "text", @@ -74298,10 +74422,10 @@ }, { "bbox": [ - 510.634765625, - 751.3934326171875, - 551.0859985351562, - 759.209228515625 + 510.63, + 751.39, + 551.09, + 759.21 ], "iref": "#/page-headers/17", "name": "page-header", @@ -74317,10 +74441,10 @@ }, { "bbox": [ - 46.38566589355469, - 708.0682373046875, - 552.190673828125, - 731.0924072265625 + 46.39, + 708.07, + 552.19, + 731.09 ], "iref": "#/texts/146", "name": "text", @@ -74336,10 +74460,10 @@ }, { "bbox": [ - 45.289154052734375, - 669.0628051757812, - 553.278076171875, - 705.6804809570312 + 45.29, + 669.06, + 553.28, + 705.68 ], "iref": "#/texts/147", "name": "text", @@ -74355,10 +74479,10 @@ }, { "bbox": [ - 44.96582794189453, - 643.04052734375, - 553.867431640625, - 666.6377563476562 + 44.97, + 643.04, + 553.87, + 666.64 ], "iref": "#/texts/148", "name": "text", @@ -74374,10 +74498,10 @@ }, { "bbox": [ - 46.48820114135742, - 616.512939453125, - 242.9811553955078, - 628.0685424804688 + 46.49, + 616.51, + 242.98, + 628.07 ], "iref": "#/texts/149", "name": "subtitle-level-1", @@ -74393,10 +74517,10 @@ }, { "bbox": [ - 46.48820114135742, - 603.7968139648438, - 209.16476440429688, - 615.1295166015625 + 46.49, + 603.8, + 209.16, + 615.13 ], "iref": "#/texts/150", "name": "text", @@ -74412,10 +74536,10 @@ }, { "bbox": [ - 45.64805603027344, - 577.8392333984375, - 84.40357971191406, - 589.0214233398438 + 45.65, + 577.84, + 84.4, + 589.02 ], "iref": "#/texts/151", "name": "subtitle-level-1", @@ -74431,10 +74555,10 @@ }, { "bbox": [ - 45.716941833496094, - 539.067138671875, - 288.83966064453125, - 575.9967041015625 + 45.72, + 539.07, + 288.84, + 576.0 ], "iref": "#/texts/152", "name": "text", @@ -74450,10 +74574,10 @@ }, { "bbox": [ - 45.982421875, - 512.6180419921875, - 110.57768249511719, - 524.0657958984375 + 45.98, + 512.62, + 110.58, + 524.07 ], "iref": "#/texts/153", "name": "subtitle-level-1", @@ -74469,10 +74593,10 @@ }, { "bbox": [ - 46.48820114135742, - 498.1862487792969, - 411.1214904785156, - 507.86468505859375 + 46.49, + 498.19, + 411.12, + 507.86 ], "iref": "#/texts/154", "name": "list-item", @@ -74488,10 +74612,10 @@ }, { "bbox": [ - 46.17177200317383, - 472.4082946777344, - 552.9000854492188, - 493.8719482421875 + 46.17, + 472.41, + 552.9, + 493.87 ], "iref": "#/texts/155", "name": "list-item", @@ -74507,10 +74631,10 @@ }, { "bbox": [ - 46.39039993286133, - 457.71929931640625, - 129.30548095703125, - 468.0890197753906 + 46.39, + 457.72, + 129.31, + 468.09 ], "iref": "#/texts/156", "name": "list-item", @@ -74526,10 +74650,10 @@ }, { "bbox": [ - 45.71389389038086, - 443.1494140625, - 242.0704345703125, - 453.0476989746094 + 45.71, + 443.15, + 242.07, + 453.05 ], "iref": "#/texts/157", "name": "list-item", @@ -74545,10 +74669,10 @@ }, { "bbox": [ - 46.020606994628906, - 417.41619873046875, - 554.6400756835938, - 438.90777587890625 + 46.02, + 417.42, + 554.64, + 438.91 ], "iref": "#/texts/158", "name": "list-item", @@ -74564,10 +74688,10 @@ }, { "bbox": [ - 46.48814010620117, - 402.9024353027344, - 321.26422119140625, - 412.63861083984375 + 46.49, + 402.9, + 321.26, + 412.64 ], "iref": "#/texts/159", "name": "list-item", @@ -74583,10 +74707,10 @@ }, { "bbox": [ - 46.00100326538086, - 376.937744140625, - 554.378662109375, - 398.0555114746094 + 46.0, + 376.94, + 554.38, + 398.06 ], "iref": "#/texts/160", "name": "list-item", @@ -74602,10 +74726,10 @@ }, { "bbox": [ - 46.0579719543457, - 350.9154052734375, - 553.2630004882812, - 372.03350830078125 + 46.06, + 350.92, + 553.26, + 372.03 ], "iref": "#/texts/161", "name": "list-item", @@ -74621,10 +74745,10 @@ }, { "bbox": [ - 45.94832229614258, - 335.78765869140625, - 129.86572265625, - 346.3191833496094 + 45.95, + 335.79, + 129.87, + 346.32 ], "iref": "#/texts/162", "name": "list-item", @@ -74640,10 +74764,10 @@ }, { "bbox": [ - 45.82542419433594, - 321.9457092285156, - 234.11181640625, - 331.8630065917969 + 45.83, + 321.95, + 234.11, + 331.86 ], "iref": "#/texts/163", "name": "list-item", @@ -74659,10 +74783,10 @@ }, { "bbox": [ - 46.478782653808594, - 307.19293212890625, - 269.6688537597656, - 316.9698486328125 + 46.48, + 307.19, + 269.67, + 316.97 ], "iref": "#/texts/164", "name": "list-item", @@ -74678,10 +74802,10 @@ }, { "bbox": [ - 46.01924514770508, - 292.9189147949219, - 301.0096130371094, - 302.8531799316406 + 46.02, + 292.92, + 301.01, + 302.85 ], "iref": "#/texts/165", "name": "list-item", @@ -74697,10 +74821,10 @@ }, { "bbox": [ - 46.444217681884766, - 278.1666564941406, - 187.92904663085938, - 288.1064453125 + 46.44, + 278.17, + 187.93, + 288.11 ], "iref": "#/texts/166", "name": "list-item", @@ -74716,10 +74840,10 @@ }, { "bbox": [ - 46.00947952270508, - 263.8026123046875, - 169.3743896484375, - 274.1329345703125 + 46.01, + 263.8, + 169.37, + 274.13 ], "iref": "#/texts/167", "name": "list-item", @@ -74735,10 +74859,10 @@ }, { "bbox": [ - 46.049869537353516, - 231.931396484375, - 123.2709732055664, - 244.548095703125 + 46.05, + 231.93, + 123.27, + 244.55 ], "iref": "#/texts/168", "name": "subtitle-level-1", @@ -74754,10 +74878,10 @@ }, { "bbox": [ - 50.6671142578125, - 207.4257049560547, - 552.3800659179688, - 228.917724609375 + 50.67, + 207.43, + 552.38, + 228.92 ], "iref": "#/texts/169", "name": "list-item", @@ -74773,10 +74897,10 @@ }, { "bbox": [ - 50.74010467529297, - 184.40769958496094, - 552.61669921875, - 205.76568603515625 + 50.74, + 184.41, + 552.62, + 205.77 ], "iref": "#/texts/170", "name": "list-item", @@ -74792,10 +74916,10 @@ }, { "bbox": [ - 50.74015808105469, - 161.3896942138672, - 552.6810302734375, - 182.65234375 + 50.74, + 161.39, + 552.68, + 182.65 ], "iref": "#/texts/171", "name": "list-item", @@ -74811,10 +74935,10 @@ }, { "bbox": [ - 50.16819763183594, - 126.91963195800781, - 552.5728759765625, - 159.62261962890625 + 50.17, + 126.92, + 552.57, + 159.62 ], "iref": "#/texts/172", "name": "list-item", @@ -74830,10 +74954,10 @@ }, { "bbox": [ - 50.49177551269531, - 103.90162658691406, - 553.5820922851562, - 124.90191650390625 + 50.49, + 103.9, + 553.58, + 124.9 ], "iref": "#/texts/173", "name": "list-item", @@ -74849,10 +74973,10 @@ }, { "bbox": [ - 50.74018859863281, - 92.39262390136719, - 436.9924011230469, - 101.68670654296875 + 50.74, + 92.39, + 436.99, + 101.69 ], "iref": "#/texts/174", "name": "list-item", @@ -74868,10 +74992,10 @@ }, { "bbox": [ - 50.74017333984375, - 69.43157196044922, - 552.4933471679688, - 90.58172607421875 + 50.74, + 69.43, + 552.49, + 90.58 ], "iref": "#/texts/175", "name": "list-item", @@ -74887,10 +75011,10 @@ }, { "bbox": [ - 50.37576675415039, - 46.413570404052734, - 553.1749267578125, - 67.59844970703125 + 50.38, + 46.41, + 553.17, + 67.6 ], "iref": "#/texts/176", "name": "list-item", @@ -74906,10 +75030,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/177", "name": "text", @@ -74925,10 +75049,10 @@ }, { "bbox": [ - 44.473201751708984, - 751.4635620117188, - 84.89160919189453, - 758.80615234375 + 44.47, + 751.46, + 84.89, + 758.81 ], "iref": "#/page-headers/18", "name": "page-header", @@ -74944,10 +75068,10 @@ }, { "bbox": [ - 454.5641784667969, - 745.4571533203125, - 549.099365234375, - 761.863037109375 + 454.56, + 745.46, + 549.1, + 761.86 ], "iref": "#/figures/8", "name": "picture", @@ -74963,10 +75087,10 @@ }, { "bbox": [ - 46.63217544555664, - 722.4282836914062, - 362.7469787597656, - 731.7239990234375 + 46.63, + 722.43, + 362.75, + 731.72 ], "iref": "#/texts/178", "name": "list-item", @@ -74982,10 +75106,10 @@ }, { "bbox": [ - 44.78684997558594, - 699.5198364257812, - 549.7481689453125, - 720.4119262695312 + 44.79, + 699.52, + 549.75, + 720.41 ], "iref": "#/texts/179", "name": "list-item", @@ -75001,10 +75125,10 @@ }, { "bbox": [ - 44.7877197265625, - 688.0108642578125, - 238.66644287109375, - 697.144287109375 + 44.79, + 688.01, + 238.67, + 697.14 ], "iref": "#/texts/180", "name": "list-item", @@ -75020,10 +75144,10 @@ }, { "bbox": [ - 44.54977798461914, - 676.5018920898438, - 243.0414581298828, - 685.6976318359375 + 44.55, + 676.5, + 243.04, + 685.7 ], "iref": "#/texts/181", "name": "list-item", @@ -75039,10 +75163,10 @@ }, { "bbox": [ - 44.7877197265625, - 653.5408935546875, - 548.7638549804688, - 674.378662109375 + 44.79, + 653.54, + 548.76, + 674.38 ], "iref": "#/texts/182", "name": "list-item", @@ -75058,10 +75182,10 @@ }, { "bbox": [ - 44.7877197265625, - 630.52294921875, - 548.82861328125, - 651.5768432617188 + 44.79, + 630.52, + 548.83, + 651.58 ], "iref": "#/texts/183", "name": "list-item", @@ -75077,10 +75201,10 @@ }, { "bbox": [ - 44.787750244140625, - 607.5050048828125, - 550.8438720703125, - 628.0836181640625 + 44.79, + 607.51, + 550.84, + 628.08 ], "iref": "#/texts/184", "name": "list-item", @@ -75096,10 +75220,10 @@ }, { "bbox": [ - 44.787750244140625, - 595.9960327148438, - 474.9829406738281, - 604.6593627929688 + 44.79, + 596.0, + 474.98, + 604.66 ], "iref": "#/texts/185", "name": "list-item", @@ -75115,10 +75239,10 @@ }, { "bbox": [ - 44.786895751953125, - 573.0350341796875, - 548.8020629882812, - 592.54248046875 + 44.79, + 573.04, + 548.8, + 592.54 ], "iref": "#/texts/186", "name": "list-item", @@ -75134,10 +75258,10 @@ }, { "bbox": [ - 44.786865234375, - 550.01708984375, - 548.7230834960938, - 569.8275146484375 + 44.79, + 550.02, + 548.72, + 569.83 ], "iref": "#/texts/187", "name": "list-item", @@ -75153,10 +75277,10 @@ }, { "bbox": [ - 44.78601837158203, - 526.9991455078125, - 550.565185546875, - 546.7464599609375 + 44.79, + 527.0, + 550.57, + 546.75 ], "iref": "#/texts/188", "name": "list-item", @@ -75172,10 +75296,10 @@ }, { "bbox": [ - 57.16337966918945, - 468.5407409667969, - 529.73583984375, - 491.138916015625 + 57.16, + 468.54, + 529.74, + 491.14 ], "iref": "#/texts/189", "name": "text", @@ -75191,10 +75315,10 @@ }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], "iref": "#/texts/190", "name": "text", @@ -75517,21 +75641,29 @@ "en", 1.0 ], + [ + "language", + 2144509362215609527, + "TEXT", + "#/texts/0", + "en", + 0.41 + ], [ "semantic", 2144509362215609527, "TEXT", "#/texts/0", - "meta-data", + "reference", 1.0 ], [ "language", - 2144509362215609527, + 16672720454366774824, "TEXT", - "#/texts/0", + "#/texts/1", "en", - 0.4099999964237213 + 0.75 ], [ "semantic", @@ -75539,15 +75671,15 @@ "TEXT", "#/texts/1", "header", - 0.8999999761581421 + 0.9 ], [ "language", - 16672720454366774824, + 16781763356419781679, "TEXT", - "#/texts/1", - "en", - 0.75 + "#/texts/2", + "nl", + 0.45 ], [ "semantic", @@ -75555,15 +75687,15 @@ "TEXT", "#/texts/2", "meta-data", - 0.6100000143051147 + 0.61 ], [ "language", - 16781763356419781679, + 3352447812305581329, "TEXT", - "#/texts/2", - "nl", - 0.44999998807907104 + "#/texts/3", + "ceb", + 0.49 ], [ "semantic", @@ -75575,11 +75707,11 @@ ], [ "language", - 3352447812305581329, + 14877831450145300436, "TEXT", - "#/texts/3", - "ceb", - 0.49000000953674316 + "#/texts/4", + "it", + 0.36 ], [ "semantic", @@ -75591,11 +75723,11 @@ ], [ "language", - 14877831450145300436, + 3352447812305581329, "TEXT", - "#/texts/4", - "it", - 0.36000001430511475 + "#/texts/5", + "ceb", + 0.49 ], [ "semantic", @@ -75607,27 +75739,27 @@ ], [ "language", - 3352447812305581329, + 13336841394978214677, "TEXT", - "#/texts/5", - "ceb", - 0.49000000953674316 + "#/texts/6", + "de", + 0.57 ], [ "semantic", 13336841394978214677, "TEXT", "#/texts/6", - "meta-data", - 0.5899999737739563 + "reference", + 0.59 ], [ "language", - 13336841394978214677, + 15325526562897377208, "TEXT", - "#/texts/6", - "de", - 0.5699999928474426 + "#/texts/7", + "en", + 0.81 ], [ "semantic", @@ -75639,11 +75771,11 @@ ], [ "language", - 15325526562897377208, + 4017434568255781081, "TEXT", - "#/texts/7", + "#/texts/8", "en", - 0.8100000023841858 + 0.34 ], [ "semantic", @@ -75651,15 +75783,15 @@ "TEXT", "#/texts/8", "meta-data", - 0.9300000071525574 + 0.93 ], [ "language", - 4017434568255781081, + 8487024695951375934, "TEXT", - "#/texts/8", + "#/texts/9", "en", - 0.3400000035762787 + 0.32 ], [ "semantic", @@ -75671,11 +75803,11 @@ ], [ "language", - 8487024695951375934, + 11695737263227886476, "TEXT", - "#/texts/9", + "#/texts/10", "en", - 0.3199999928474426 + 0.93 ], [ "semantic", @@ -75683,15 +75815,15 @@ "TEXT", "#/texts/10", "text", - 0.9599999785423279 + 0.96 ], [ "language", - 11695737263227886476, + 8500733160758672230, "TEXT", - "#/texts/10", - "en", - 0.9300000071525574 + "#/texts/11", + "es", + 0.37 ], [ "semantic", @@ -75703,11 +75835,11 @@ ], [ "language", - 8500733160758672230, + 4452030907228745864, "TEXT", - "#/texts/11", - "es", - 0.3700000047683716 + "#/texts/12", + "en", + 0.62 ], [ "semantic", @@ -75715,15 +75847,15 @@ "TEXT", "#/texts/12", "text", - 0.8700000047683716 + 0.87 ], [ "language", - 4452030907228745864, + 11913688961435238004, "TEXT", - "#/texts/12", + "#/texts/13", "en", - 0.6200000047683716 + 0.64 ], [ "semantic", @@ -75735,11 +75867,11 @@ ], [ "language", - 11913688961435238004, + 9977041563469582014, "TEXT", - "#/texts/13", + "#/texts/14", "en", - 0.6399999856948853 + 0.96 ], [ "semantic", @@ -75747,15 +75879,15 @@ "TEXT", "#/texts/14", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 9977041563469582014, + 4361549266817300114, "TEXT", - "#/texts/14", + "#/texts/15", "en", - 0.9599999785423279 + 0.19 ], [ "semantic", @@ -75763,15 +75895,15 @@ "TEXT", "#/texts/15", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 4361549266817300114, + 8425126282903547933, "TEXT", - "#/texts/15", + "#/texts/16", "en", - 0.1899999976158142 + 0.93 ], [ "semantic", @@ -75779,15 +75911,15 @@ "TEXT", "#/texts/16", "text", - 0.9399999976158142 + 0.94 ], [ "language", - 8425126282903547933, + 16507313240019459642, "TEXT", - "#/texts/16", + "#/texts/17", "en", - 0.9300000071525574 + 0.91 ], [ "semantic", @@ -75795,15 +75927,15 @@ "TEXT", "#/texts/17", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 16507313240019459642, + 7900229969942228522, "TEXT", - "#/texts/17", + "#/texts/18", "en", - 0.9100000262260437 + 0.99 ], [ "semantic", @@ -75811,15 +75943,15 @@ "TEXT", "#/texts/18", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 7900229969942228522, + 10081303962589804251, "TEXT", - "#/texts/18", + "#/texts/19", "en", - 0.9900000095367432 + 0.92 ], [ "semantic", @@ -75831,11 +75963,11 @@ ], [ "language", - 10081303962589804251, + 12186698460099365002, "TEXT", - "#/texts/19", + "#/texts/20", "en", - 0.9200000166893005 + 0.51 ], [ "semantic", @@ -75843,15 +75975,15 @@ "TEXT", "#/texts/20", "header", - 0.49000000953674316 + 0.49 ], [ "language", - 12186698460099365002, + 14190244699299580163, "TEXT", - "#/texts/20", + "#/texts/21", "en", - 0.5099999904632568 + 0.63 ], [ "semantic", @@ -75859,15 +75991,15 @@ "TEXT", "#/texts/21", "text", - 0.9599999785423279 + 0.96 ], [ "language", - 14190244699299580163, + 1376279050886549305, "TEXT", - "#/texts/21", + "#/texts/22", "en", - 0.6299999952316284 + 0.47 ], [ "semantic", @@ -75875,15 +76007,15 @@ "TEXT", "#/texts/22", "header", - 0.800000011920929 + 0.8 ], [ "language", - 1376279050886549305, + 10155628801693924200, "TEXT", - "#/texts/22", + "#/texts/23", "en", - 0.4699999988079071 + 0.93 ], [ "semantic", @@ -75891,15 +76023,15 @@ "TEXT", "#/texts/23", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 10155628801693924200, + 9107499507097280105, "TEXT", - "#/texts/23", + "#/texts/24", "en", - 0.9300000071525574 + 0.93 ], [ "semantic", @@ -75907,15 +76039,15 @@ "TEXT", "#/texts/24", "text", - 0.6100000143051147 + 0.61 ], [ "language", - 9107499507097280105, + 7248467870339433322, "TEXT", - "#/texts/24", + "#/texts/25", "en", - 0.9300000071525574 + 0.94 ], [ "semantic", @@ -75927,11 +76059,11 @@ ], [ "language", - 7248467870339433322, + 13346892078888080449, "TEXT", - "#/texts/25", + "#/texts/26", "en", - 0.9399999976158142 + 0.89 ], [ "semantic", @@ -75939,15 +76071,15 @@ "TEXT", "#/texts/26", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 13346892078888080449, + 1118972765223422660, "TEXT", - "#/texts/26", + "#/texts/27", "en", - 0.8899999856948853 + 0.91 ], [ "semantic", @@ -75955,15 +76087,15 @@ "TEXT", "#/texts/27", "text", - 0.8299999833106995 + 0.83 ], [ "language", - 1118972765223422660, + 324023167304456371, "TEXT", - "#/texts/27", + "#/texts/28", "en", - 0.9100000262260437 + 0.93 ], [ "semantic", @@ -75971,15 +76103,15 @@ "TEXT", "#/texts/28", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 324023167304456371, + 4651508276868765576, "TEXT", - "#/texts/28", + "#/texts/29", "en", - 0.9300000071525574 + 0.73 ], [ "semantic", @@ -75987,15 +76119,15 @@ "TEXT", "#/texts/29", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 4651508276868765576, + 3052020526349962744, "TEXT", - "#/texts/29", + "#/texts/30", "en", - 0.7300000190734863 + 0.93 ], [ "semantic", @@ -76003,15 +76135,15 @@ "TEXT", "#/texts/30", "text", - 0.949999988079071 + 0.95 ], [ "language", - 3052020526349962744, + 6725501529910185390, "TEXT", - "#/texts/30", + "#/texts/31", "en", - 0.9300000071525574 + 0.98 ], [ "semantic", @@ -76019,15 +76151,15 @@ "TEXT", "#/texts/31", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 6725501529910185390, + 14814111183601762276, "TEXT", - "#/texts/31", + "#/texts/32", "en", - 0.9800000190734863 + 0.91 ], [ "semantic", @@ -76035,15 +76167,15 @@ "TEXT", "#/texts/32", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 14814111183601762276, + 18391264192891079539, "TEXT", - "#/texts/32", + "#/texts/33", "en", - 0.9100000262260437 + 0.78 ], [ "semantic", @@ -76051,15 +76183,15 @@ "TEXT", "#/texts/33", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18391264192891079539, + 4361549266681704196, "TEXT", - "#/texts/33", + "#/texts/34", "en", - 0.7799999713897705 + 0.4 ], [ "semantic", @@ -76067,15 +76199,15 @@ "TEXT", "#/texts/34", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 4361549266681704196, + 8043608144162608258, "TEXT", - "#/texts/34", + "#/texts/35", "en", - 0.4000000059604645 + 0.94 ], [ "semantic", @@ -76083,15 +76215,15 @@ "TEXT", "#/texts/35", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 8043608144162608258, + 7159467829896778939, "TEXT", - "#/texts/35", + "#/texts/36", "en", - 0.9399999976158142 + 0.44 ], [ "semantic", @@ -76103,11 +76235,11 @@ ], [ "language", - 7159467829896778939, + 5617240156952377, "TEXT", - "#/texts/36", + "#/texts/37", "en", - 0.4399999976158142 + 0.94 ], [ "semantic", @@ -76115,15 +76247,15 @@ "TEXT", "#/texts/37", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 5617240156952377, + 3276490574487379366, "TEXT", - "#/texts/37", + "#/texts/38", "en", - 0.9399999976158142 + 0.84 ], [ "semantic", @@ -76131,15 +76263,15 @@ "TEXT", "#/texts/38", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 3276490574487379366, + 3367451956962330174, "TEXT", - "#/texts/38", + "#/texts/39", "en", - 0.8399999737739563 + 0.94 ], [ "semantic", @@ -76151,11 +76283,11 @@ ], [ "language", - 3367451956962330174, + 5509744459704235873, "TEXT", - "#/texts/39", + "#/texts/40", "en", - 0.9399999976158142 + 0.88 ], [ "semantic", @@ -76163,15 +76295,15 @@ "TEXT", "#/texts/40", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 5509744459704235873, + 18391264192891079539, "TEXT", - "#/texts/40", + "#/texts/41", "en", - 0.8799999952316284 + 0.78 ], [ "semantic", @@ -76179,15 +76311,15 @@ "TEXT", "#/texts/41", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18391264192891079539, + 4361549176688508574, "TEXT", - "#/texts/41", + "#/texts/42", "en", - 0.7799999713897705 + 0.17 ], [ "semantic", @@ -76195,15 +76327,15 @@ "TEXT", "#/texts/42", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 4361549176688508574, + 12374482891052873875, "TEXT", - "#/texts/42", + "#/texts/43", "en", - 0.17000000178813934 + 0.55 ], [ "semantic", @@ -76211,15 +76343,15 @@ "TEXT", "#/texts/43", "header", - 0.5699999928474426 + 0.57 ], [ "language", - 12374482891052873875, + 2755397864153233778, "TEXT", - "#/texts/43", + "#/texts/44", "en", - 0.550000011920929 + 0.9 ], [ "semantic", @@ -76227,15 +76359,15 @@ "TEXT", "#/texts/44", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 2755397864153233778, + 4698316471746130896, "TEXT", - "#/texts/44", + "#/texts/45", "en", - 0.8999999761581421 + 0.91 ], [ "semantic", @@ -76243,15 +76375,15 @@ "TEXT", "#/texts/45", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 4698316471746130896, + 11827267218358801841, "TEXT", - "#/texts/45", + "#/texts/46", "en", - 0.9100000262260437 + 0.93 ], [ "semantic", @@ -76259,15 +76391,15 @@ "TEXT", "#/texts/46", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 11827267218358801841, + 6297710299044869343, "TEXT", - "#/texts/46", - "en", - 0.9300000071525574 + "#/texts/47", + "fr", + 0.28 ], [ "semantic", @@ -76275,15 +76407,15 @@ "TEXT", "#/texts/47", "header", - 0.8299999833106995 + 0.83 ], [ "language", - 6297710299044869343, + 7158837349769150986, "TEXT", - "#/texts/47", - "fr", - 0.2800000011920929 + "#/texts/48", + "en", + 0.88 ], [ "semantic", @@ -76295,11 +76427,11 @@ ], [ "language", - 7158837349769150986, + 1150871476689677866, "TEXT", - "#/texts/48", + "#/texts/49", "en", - 0.8799999952316284 + 0.93 ], [ "semantic", @@ -76311,11 +76443,11 @@ ], [ "language", - 1150871476689677866, + 5163702913945903725, "TEXT", - "#/texts/49", + "#/texts/50", "en", - 0.9300000071525574 + 0.96 ], [ "semantic", @@ -76323,15 +76455,15 @@ "TEXT", "#/texts/50", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 5163702913945903725, + 5462319091745771382, "TEXT", - "#/texts/50", + "#/texts/51", "en", - 0.9599999785423279 + 0.9 ], [ "semantic", @@ -76339,15 +76471,15 @@ "TEXT", "#/texts/51", "text", - 0.5899999737739563 + 0.59 ], [ "language", - 5462319091745771382, + 18391264192891079539, "TEXT", - "#/texts/51", + "#/texts/52", "en", - 0.8999999761581421 + 0.78 ], [ "semantic", @@ -76355,15 +76487,15 @@ "TEXT", "#/texts/52", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18391264192891079539, + 958124839653591304, "TEXT", - "#/texts/52", + "#/texts/53", "en", - 0.7799999713897705 + 0.94 ], [ "semantic", @@ -76371,15 +76503,15 @@ "TEXT", "#/texts/53", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 958124839653591304, + 1448405324616602032, "TEXT", - "#/texts/53", + "#/texts/54", "en", - 0.9399999976158142 + 0.87 ], [ "semantic", @@ -76387,15 +76519,15 @@ "TEXT", "#/texts/54", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 1448405324616602032, + 2617775076168299948, "TEXT", - "#/texts/54", + "#/texts/55", "en", - 0.8700000047683716 + 0.79 ], [ "semantic", @@ -76403,15 +76535,15 @@ "TEXT", "#/texts/55", "header", - 0.800000011920929 + 0.8 ], [ "language", - 2617775076168299948, + 13974986056043304735, "TEXT", - "#/texts/55", + "#/texts/56", "en", - 0.7900000214576721 + 0.93 ], [ "semantic", @@ -76419,15 +76551,15 @@ "TEXT", "#/texts/56", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 13974986056043304735, + 5985285694705576020, "TEXT", - "#/texts/56", + "#/texts/57", "en", - 0.9300000071525574 + 0.84 ], [ "semantic", @@ -76435,15 +76567,15 @@ "TEXT", "#/texts/57", "header", - 0.8199999928474426 + 0.82 ], [ "language", - 5985285694705576020, + 11235296141350659290, "TEXT", - "#/texts/57", + "#/texts/58", "en", - 0.8399999737739563 + 0.96 ], [ "semantic", @@ -76451,15 +76583,15 @@ "TEXT", "#/texts/58", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 11235296141350659290, + 18391264192891079539, "TEXT", - "#/texts/58", + "#/texts/59", "en", - 0.9599999785423279 + 0.78 ], [ "semantic", @@ -76467,15 +76599,15 @@ "TEXT", "#/texts/59", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18391264192891079539, + 4361549266576336732, "TEXT", - "#/texts/59", - "en", - 0.7799999713897705 + "#/texts/60", + "eu", + 0.2 ], [ "semantic", @@ -76483,15 +76615,15 @@ "TEXT", "#/texts/60", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 4361549266576336732, + 5771309285006424458, "TEXT", - "#/texts/60", - "eu", - 0.20000000298023224 + "#/texts/61", + "en", + 0.93 ], [ "semantic", @@ -76499,15 +76631,15 @@ "TEXT", "#/texts/61", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 5771309285006424458, + 5371685212527510397, "TEXT", - "#/texts/61", + "#/texts/62", "en", - 0.9300000071525574 + 0.77 ], [ "semantic", @@ -76515,15 +76647,15 @@ "TEXT", "#/texts/62", "header", - 0.949999988079071 + 0.95 ], [ "language", - 5371685212527510397, + 7817257645383866853, "TEXT", - "#/texts/62", + "#/texts/63", "en", - 0.7699999809265137 + 0.92 ], [ "semantic", @@ -76531,15 +76663,15 @@ "TEXT", "#/texts/63", "text", - 0.9399999976158142 + 0.94 ], [ "language", - 7817257645383866853, + 2929626768872004841, "TEXT", - "#/texts/63", + "#/texts/64", "en", - 0.9200000166893005 + 0.81 ], [ "semantic", @@ -76547,15 +76679,15 @@ "TEXT", "#/texts/64", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 2929626768872004841, + 15879756297712818143, "TEXT", - "#/texts/64", + "#/texts/65", "en", - 0.8100000023841858 + 0.75 ], [ "semantic", @@ -76563,15 +76695,15 @@ "TEXT", "#/texts/65", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 15879756297712818143, + 16116531546352845311, "TEXT", - "#/texts/65", + "#/texts/66", "en", - 0.75 + 0.97 ], [ "semantic", @@ -76579,15 +76711,15 @@ "TEXT", "#/texts/66", "text", - 0.8899999856948853 + 0.89 ], [ "language", - 16116531546352845311, + 9541434157786316356, "TEXT", - "#/texts/66", + "#/texts/67", "en", - 0.9700000286102295 + 0.96 ], [ "semantic", @@ -76595,15 +76727,15 @@ "TEXT", "#/texts/67", "text", - 0.9599999785423279 + 0.96 ], [ "language", - 9541434157786316356, + 997682002692959482, "TEXT", - "#/texts/67", + "#/texts/68", "en", - 0.9599999785423279 + 0.9 ], [ "semantic", @@ -76611,15 +76743,15 @@ "TEXT", "#/texts/68", "text", - 0.9599999785423279 + 0.96 ], [ "language", - 997682002692959482, + 11590138063543342276, "TEXT", - "#/texts/68", + "#/texts/69", "en", - 0.8999999761581421 + 0.51 ], [ "semantic", @@ -76627,15 +76759,15 @@ "TEXT", "#/texts/69", "header", - 0.8799999952316284 + 0.88 ], [ "language", - 11590138063543342276, + 16380310806374538602, "TEXT", - "#/texts/69", + "#/texts/70", "en", - 0.5099999904632568 + 0.87 ], [ "semantic", @@ -76643,15 +76775,15 @@ "TEXT", "#/texts/70", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 16380310806374538602, + 5393976293631695754, "TEXT", - "#/texts/70", + "#/texts/71", "en", - 0.8700000047683716 + 0.9 ], [ "semantic", @@ -76659,15 +76791,15 @@ "TEXT", "#/texts/71", "text", - 0.8799999952316284 + 0.88 ], [ "language", - 5393976293631695754, + 1988335831916069382, "TEXT", - "#/texts/71", + "#/texts/72", "en", - 0.8999999761581421 + 0.94 ], [ "semantic", @@ -76675,15 +76807,15 @@ "TEXT", "#/texts/72", "text", - 0.6200000047683716 + 0.62 ], [ "language", - 1988335831916069382, + 5147764798816678886, "TEXT", - "#/texts/72", + "#/texts/73", "en", - 0.9399999976158142 + 0.88 ], [ "semantic", @@ -76691,15 +76823,15 @@ "TEXT", "#/texts/73", "text", - 0.8600000143051147 + 0.86 ], [ "language", - 5147764798816678886, + 285583876932865368, "TEXT", - "#/texts/73", + "#/texts/74", "en", - 0.8799999952316284 + 0.97 ], [ "semantic", @@ -76707,15 +76839,15 @@ "TEXT", "#/texts/74", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 285583876932865368, + 18391264192891079539, "TEXT", - "#/texts/74", + "#/texts/75", "en", - 0.9700000286102295 + 0.78 ], [ "semantic", @@ -76723,15 +76855,15 @@ "TEXT", "#/texts/75", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18391264192891079539, + 4361549257370278754, "TEXT", - "#/texts/75", - "en", - 0.7799999713897705 + "#/texts/76", + "zh", + 0.42 ], [ "semantic", @@ -76739,15 +76871,15 @@ "TEXT", "#/texts/76", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 4361549257370278754, - "TEXT", - "#/texts/76", - "zh", - 0.41999998688697815 + 13183039880198077038, + "TEXT", + "#/texts/77", + "en", + 0.92 ], [ "semantic", @@ -76755,15 +76887,15 @@ "TEXT", "#/texts/77", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 13183039880198077038, + 13428900458866068249, "TEXT", - "#/texts/77", + "#/texts/78", "en", - 0.9200000166893005 + 0.9 ], [ "semantic", @@ -76771,15 +76903,15 @@ "TEXT", "#/texts/78", "header", - 0.800000011920929 + 0.8 ], [ "language", - 13428900458866068249, + 1430911655724119030, "TEXT", - "#/texts/78", + "#/texts/79", "en", - 0.8999999761581421 + 0.93 ], [ "semantic", @@ -76787,15 +76919,15 @@ "TEXT", "#/texts/79", "text", - 0.9599999785423279 + 0.96 ], [ "language", - 1430911655724119030, + 13770706479324480755, "TEXT", - "#/texts/79", + "#/texts/80", "en", - 0.9300000071525574 + 0.93 ], [ "semantic", @@ -76803,15 +76935,15 @@ "TEXT", "#/texts/80", "text", - 0.8899999856948853 + 0.89 ], [ "language", - 13770706479324480755, + 11165481757050847950, "TEXT", - "#/texts/80", + "#/texts/81", "en", - 0.9300000071525574 + 0.12 ], [ "semantic", @@ -76823,11 +76955,11 @@ ], [ "language", - 11165481757050847950, + 9572077971492738329, "TEXT", - "#/texts/81", + "#/texts/82", "en", - 0.11999999731779099 + 0.94 ], [ "semantic", @@ -76835,15 +76967,15 @@ "TEXT", "#/texts/82", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 9572077971492738329, + 14951391138799557075, "TEXT", - "#/texts/82", - "en", - 0.9399999976158142 + "#/texts/83", + "pl", + 0.13 ], [ "semantic", @@ -76855,11 +76987,11 @@ ], [ "language", - 14951391138799557075, + 16602156009514813718, "TEXT", - "#/texts/83", - "pl", - 0.12999999523162842 + "#/texts/84", + "en", + 0.96 ], [ "semantic", @@ -76867,15 +76999,15 @@ "TEXT", "#/texts/84", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 16602156009514813718, + 7162849562576593449, "TEXT", - "#/texts/84", + "#/texts/85", "en", - 0.9599999785423279 + 0.96 ], [ "semantic", @@ -76883,15 +77015,15 @@ "TEXT", "#/texts/85", "text", - 0.7900000214576721 + 0.79 ], [ "language", - 7162849562576593449, + 15385417954505503552, "TEXT", - "#/texts/85", + "#/texts/86", "en", - 0.9599999785423279 + 0.84 ], [ "semantic", @@ -76899,15 +77031,15 @@ "TEXT", "#/texts/86", "meta-data", - 0.9900000095367432 + 0.99 ], [ "language", - 15385417954505503552, + 10815650641518265876, "TEXT", - "#/texts/86", + "#/texts/87", "en", - 0.8399999737739563 + 0.95 ], [ "semantic", @@ -76915,15 +77047,15 @@ "TEXT", "#/texts/87", "text", - 0.9100000262260437 + 0.91 ], [ "language", - 10815650641518265876, + 18391264192891079539, "TEXT", - "#/texts/87", + "#/texts/88", "en", - 0.949999988079071 + 0.78 ], [ "semantic", @@ -76931,15 +77063,15 @@ "TEXT", "#/texts/88", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18391264192891079539, + 12004249365408683930, "TEXT", - "#/texts/88", + "#/texts/89", "en", - 0.7799999713897705 + 0.92 ], [ "semantic", @@ -76947,15 +77079,15 @@ "TEXT", "#/texts/89", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 12004249365408683930, + 7223381657047466215, "TEXT", - "#/texts/89", + "#/texts/90", "en", - 0.9200000166893005 + 0.93 ], [ "semantic", @@ -76963,15 +77095,15 @@ "TEXT", "#/texts/90", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 7223381657047466215, + 15132906055887224772, "TEXT", - "#/texts/90", + "#/texts/91", "en", - 0.9300000071525574 + 0.83 ], [ "semantic", @@ -76979,15 +77111,15 @@ "TEXT", "#/texts/91", "header", - 0.7099999785423279 + 0.71 ], [ "language", - 15132906055887224772, + 17129434987283608290, "TEXT", - "#/texts/91", + "#/texts/92", "en", - 0.8299999833106995 + 0.95 ], [ "semantic", @@ -76995,15 +77127,15 @@ "TEXT", "#/texts/92", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 17129434987283608290, + 10350406469077463155, "TEXT", - "#/texts/92", + "#/texts/93", "en", - 0.949999988079071 + 0.93 ], [ "semantic", @@ -77011,15 +77143,15 @@ "TEXT", "#/texts/93", "text", - 0.9300000071525574 + 0.93 ], [ "language", - 10350406469077463155, + 16949854269270315165, "TEXT", - "#/texts/93", + "#/texts/94", "en", - 0.9300000071525574 + 0.91 ], [ "semantic", @@ -77027,15 +77159,15 @@ "TEXT", "#/texts/94", "text", - 0.9599999785423279 + 0.96 ], [ "language", - 16949854269270315165, + 18391264192891079539, "TEXT", - "#/texts/94", + "#/texts/95", "en", - 0.9100000262260437 + 0.78 ], [ "semantic", @@ -77043,15 +77175,15 @@ "TEXT", "#/texts/95", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18391264192891079539, + 4361549266593946746, "TEXT", - "#/texts/95", - "en", - 0.7799999713897705 + "#/texts/96", + "fr", + 0.37 ], [ "semantic", @@ -77059,15 +77191,15 @@ "TEXT", "#/texts/96", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 4361549266593946746, + 9802652237802670052, "TEXT", - "#/texts/96", - "fr", - 0.3700000047683716 + "#/texts/97", + "zh", + 0.18 ], [ "semantic", @@ -77075,15 +77207,15 @@ "TEXT", "#/texts/97", "header", - 0.7099999785423279 + 0.71 ], [ "language", - 9802652237802670052, + 5524728206729419689, "TEXT", - "#/texts/97", - "zh", - 0.18000000715255737 + "#/texts/98", + "en", + 0.91 ], [ "semantic", @@ -77091,15 +77223,15 @@ "TEXT", "#/texts/98", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 5524728206729419689, + 4043385013945968936, "TEXT", - "#/texts/98", - "en", - 0.9100000262260437 + "#/texts/99", + "sv", + 0.11 ], [ "semantic", @@ -77111,11 +77243,11 @@ ], [ "language", - 4043385013945968936, + 11778884428660217326, "TEXT", - "#/texts/99", - "sv", - 0.10999999940395355 + "#/texts/100", + "en", + 0.83 ], [ "semantic", @@ -77127,11 +77259,11 @@ ], [ "language", - 11778884428660217326, + 12875050310340408203, "TEXT", - "#/texts/100", + "#/texts/101", "en", - 0.8299999833106995 + 0.37 ], [ "semantic", @@ -77139,15 +77271,15 @@ "TEXT", "#/texts/101", "meta-data", - 0.9900000095367432 + 0.99 ], [ "language", - 12875050310340408203, + 3785875504044487339, "TEXT", - "#/texts/101", + "#/texts/102", "en", - 0.3700000047683716 + 0.93 ], [ "semantic", @@ -77155,15 +77287,15 @@ "TEXT", "#/texts/102", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 3785875504044487339, + 12105626155924658285, "TEXT", - "#/texts/102", - "en", - 0.9300000071525574 + "#/texts/103", + "ja", + 0.12 ], [ "semantic", @@ -77175,11 +77307,11 @@ ], [ "language", - 12105626155924658285, + 16265612055607243129, "TEXT", - "#/texts/103", - "ja", - 0.11999999731779099 + "#/texts/104", + "en", + 0.92 ], [ "semantic", @@ -77187,15 +77319,15 @@ "TEXT", "#/texts/104", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 16265612055607243129, + 18391264192891079539, "TEXT", - "#/texts/104", + "#/texts/105", "en", - 0.9200000166893005 + 0.78 ], [ "semantic", @@ -77203,15 +77335,15 @@ "TEXT", "#/texts/105", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18391264192891079539, + 10252446451495472512, "TEXT", - "#/texts/105", + "#/texts/106", "en", - 0.7799999713897705 + 0.83 ], [ "semantic", @@ -77219,15 +77351,15 @@ "TEXT", "#/texts/106", "meta-data", - 0.9599999785423279 + 0.96 ], [ "language", - 10252446451495472512, + 17011944206067158637, "TEXT", - "#/texts/106", + "#/texts/107", "en", - 0.8299999833106995 + 0.93 ], [ "semantic", @@ -77235,15 +77367,15 @@ "TEXT", "#/texts/107", "text", - 0.9399999976158142 + 0.94 ], [ "language", - 17011944206067158637, + 16289627123982758705, "TEXT", - "#/texts/107", + "#/texts/108", "en", - 0.9300000071525574 + 0.52 ], [ "semantic", @@ -77251,15 +77383,15 @@ "TEXT", "#/texts/108", "meta-data", - 0.4399999976158142 + 0.44 ], [ "language", - 16289627123982758705, + 13969801897340997317, "TEXT", - "#/texts/108", + "#/texts/109", "en", - 0.5199999809265137 + 0.98 ], [ "semantic", @@ -77267,15 +77399,15 @@ "TEXT", "#/texts/109", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 13969801897340997317, + 105697770555684555, "TEXT", - "#/texts/109", + "#/texts/110", "en", - 0.9800000190734863 + 0.94 ], [ "semantic", @@ -77283,15 +77415,15 @@ "TEXT", "#/texts/110", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 105697770555684555, + 15938840672015995359, "TEXT", - "#/texts/110", + "#/texts/111", "en", - 0.9399999976158142 + 0.97 ], [ "semantic", @@ -77299,15 +77431,15 @@ "TEXT", "#/texts/111", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 15938840672015995359, + 16505790528099785698, "TEXT", - "#/texts/111", + "#/texts/112", "en", - 0.9700000286102295 + 0.36 ], [ "semantic", @@ -77315,15 +77447,15 @@ "TEXT", "#/texts/112", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 16505790528099785698, + 14738723905055920039, "TEXT", - "#/texts/112", + "#/texts/113", "en", - 0.36000001430511475 + 0.87 ], [ "semantic", @@ -77335,11 +77467,11 @@ ], [ "language", - 14738723905055920039, + 5699550326698755904, "TEXT", - "#/texts/113", + "#/texts/114", "en", - 0.8700000047683716 + 0.89 ], [ "semantic", @@ -77347,15 +77479,15 @@ "TEXT", "#/texts/114", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 5699550326698755904, + 11609131422778723150, "TEXT", - "#/texts/114", + "#/texts/115", "en", - 0.8899999856948853 + 0.91 ], [ "semantic", @@ -77363,15 +77495,15 @@ "TEXT", "#/texts/115", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 11609131422778723150, + 788128893109726279, "TEXT", - "#/texts/115", + "#/texts/116", "en", - 0.9100000262260437 + 0.97 ], [ "semantic", @@ -77379,15 +77511,15 @@ "TEXT", "#/texts/116", "text", - 0.9100000262260437 + 0.91 ], [ "language", - 788128893109726279, + 7029344862946908483, "TEXT", - "#/texts/116", + "#/texts/117", "en", - 0.9700000286102295 + 0.92 ], [ "semantic", @@ -77395,15 +77527,15 @@ "TEXT", "#/texts/117", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 7029344862946908483, + 18391264192891079539, "TEXT", - "#/texts/117", + "#/texts/118", "en", - 0.9200000166893005 + 0.78 ], [ "semantic", @@ -77411,15 +77543,15 @@ "TEXT", "#/texts/118", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18391264192891079539, + 2144926686518491811, "TEXT", - "#/texts/118", - "en", - 0.7799999713897705 + "#/texts/119", + "fr", + 0.22 ], [ "semantic", @@ -77427,15 +77559,15 @@ "TEXT", "#/texts/119", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 2144926686518491811, + 18333396269095847693, "TEXT", - "#/texts/119", - "fr", - 0.2199999988079071 + "#/texts/120", + "en", + 0.96 ], [ "semantic", @@ -77443,15 +77575,15 @@ "TEXT", "#/texts/120", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18333396269095847693, + 4030998538427149966, "TEXT", - "#/texts/120", + "#/texts/121", "en", - 0.9599999785423279 + 0.51 ], [ "semantic", @@ -77459,15 +77591,15 @@ "TEXT", "#/texts/121", "header", - 0.7599999904632568 + 0.76 ], [ "language", - 4030998538427149966, + 10295608624766759271, "TEXT", - "#/texts/121", + "#/texts/122", "en", - 0.5099999904632568 + 0.94 ], [ "semantic", @@ -77475,15 +77607,15 @@ "TEXT", "#/texts/122", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 10295608624766759271, + 10633780781731536747, "TEXT", - "#/texts/122", + "#/texts/123", "en", - 0.9399999976158142 + 0.95 ], [ "semantic", @@ -77491,15 +77623,15 @@ "TEXT", "#/texts/123", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 10633780781731536747, + 18391264192891079539, "TEXT", - "#/texts/123", + "#/texts/124", "en", - 0.949999988079071 + 0.78 ], [ "semantic", @@ -77507,15 +77639,15 @@ "TEXT", "#/texts/124", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18391264192891079539, + 1080447728722590413, "TEXT", - "#/texts/124", + "#/texts/125", "en", - 0.7799999713897705 + 0.14 ], [ "semantic", @@ -77527,11 +77659,11 @@ ], [ "language", - 1080447728722590413, + 4361549257087816853, "TEXT", - "#/texts/125", + "#/texts/126", "en", - 0.14000000059604645 + 0.95 ], [ "semantic", @@ -77539,15 +77671,15 @@ "TEXT", "#/texts/126", "text", - 0.8899999856948853 + 0.89 ], [ "language", - 4361549257087816853, + 10195664788154887804, "TEXT", - "#/texts/126", + "#/texts/127", "en", - 0.949999988079071 + 0.99 ], [ "semantic", @@ -77559,11 +77691,11 @@ ], [ "language", - 10195664788154887804, + 7538054744015619336, "TEXT", - "#/texts/127", + "#/texts/128", "en", - 0.9900000095367432 + 0.93 ], [ "semantic", @@ -77575,11 +77707,11 @@ ], [ "language", - 7538054744015619336, + 12426662601736619109, "TEXT", - "#/texts/128", + "#/texts/129", "en", - 0.9300000071525574 + 0.95 ], [ "semantic", @@ -77591,11 +77723,11 @@ ], [ "language", - 12426662601736619109, + 4162783521620221579, "TEXT", - "#/texts/129", + "#/texts/130", "en", - 0.949999988079071 + 0.87 ], [ "semantic", @@ -77603,15 +77735,15 @@ "TEXT", "#/texts/130", "header", - 0.46000000834465027 + 0.46 ], [ "language", - 4162783521620221579, + 5135259059216244866, "TEXT", - "#/texts/130", + "#/texts/131", "en", - 0.8700000047683716 + 0.94 ], [ "semantic", @@ -77619,15 +77751,15 @@ "TEXT", "#/texts/131", "text", - 0.7599999904632568 + 0.76 ], [ "language", - 5135259059216244866, + 16998817296948099535, "TEXT", - "#/texts/131", + "#/texts/132", "en", - 0.9399999976158142 + 0.97 ], [ "semantic", @@ -77635,15 +77767,15 @@ "TEXT", "#/texts/132", "text", - 0.8700000047683716 + 0.87 ], [ "language", - 16998817296948099535, + 1205649569241141618, "TEXT", - "#/texts/132", + "#/texts/133", "en", - 0.9700000286102295 + 0.94 ], [ "semantic", @@ -77651,15 +77783,15 @@ "TEXT", "#/texts/133", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 1205649569241141618, + 12257840490666828590, "TEXT", - "#/texts/133", + "#/texts/134", "en", - 0.9399999976158142 + 0.92 ], [ "semantic", @@ -77671,11 +77803,11 @@ ], [ "language", - 12257840490666828590, + 7040847965650746591, "TEXT", - "#/texts/134", + "#/texts/135", "en", - 0.9200000166893005 + 0.88 ], [ "semantic", @@ -77683,15 +77815,15 @@ "TEXT", "#/texts/135", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 7040847965650746591, + 7927601225025519287, "TEXT", - "#/texts/135", + "#/texts/136", "en", - 0.8799999952316284 + 0.89 ], [ "semantic", @@ -77703,11 +77835,11 @@ ], [ "language", - 7927601225025519287, + 18391264192891079539, "TEXT", - "#/texts/136", + "#/texts/137", "en", - 0.8899999856948853 + 0.78 ], [ "semantic", @@ -77715,15 +77847,15 @@ "TEXT", "#/texts/137", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18391264192891079539, + 1080447728722590402, "TEXT", - "#/texts/137", - "en", - 0.7799999713897705 + "#/texts/138", + "ja", + 0.13 ], [ "semantic", @@ -77735,11 +77867,11 @@ ], [ "language", - 1080447728722590402, + 4361549257087816853, "TEXT", - "#/texts/138", - "ja", - 0.12999999523162842 + "#/texts/139", + "en", + 0.95 ], [ "semantic", @@ -77747,15 +77879,15 @@ "TEXT", "#/texts/139", "text", - 0.8899999856948853 + 0.89 ], [ "language", - 4361549257087816853, + 8207961846673301043, "TEXT", - "#/texts/139", + "#/texts/140", "en", - 0.949999988079071 + 0.9 ], [ "semantic", @@ -77763,15 +77895,15 @@ "TEXT", "#/texts/140", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 8207961846673301043, + 11998199584890640594, "TEXT", - "#/texts/140", + "#/texts/141", "en", - 0.8999999761581421 + 0.96 ], [ "semantic", @@ -77783,11 +77915,11 @@ ], [ "language", - 11998199584890640594, + 16446129547721407877, "TEXT", - "#/texts/141", + "#/texts/142", "en", - 0.9599999785423279 + 0.69 ], [ "semantic", @@ -77799,11 +77931,11 @@ ], [ "language", - 16446129547721407877, + 6720443978031524294, "TEXT", - "#/texts/142", + "#/texts/143", "en", - 0.6899999976158142 + 0.89 ], [ "semantic", @@ -77811,15 +77943,15 @@ "TEXT", "#/texts/143", "text", - 0.800000011920929 + 0.8 ], [ "language", - 6720443978031524294, + 18391264192891079539, "TEXT", - "#/texts/143", + "#/texts/144", "en", - 0.8899999856948853 + 0.78 ], [ "semantic", @@ -77827,15 +77959,15 @@ "TEXT", "#/texts/144", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18391264192891079539, + 2144926730621142072, "TEXT", - "#/texts/144", - "en", - 0.7799999713897705 + "#/texts/145", + "pms", + 0.76 ], [ "semantic", @@ -77843,15 +77975,15 @@ "TEXT", "#/texts/145", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 2144926730621142072, + 14222671032550229818, "TEXT", - "#/texts/145", - "pms", - 0.7599999904632568 + "#/texts/146", + "en", + 0.89 ], [ "semantic", @@ -77859,15 +77991,15 @@ "TEXT", "#/texts/146", "text", - 0.6000000238418579 + 0.6 ], [ "language", - 14222671032550229818, + 17486770941839589126, "TEXT", - "#/texts/146", + "#/texts/147", "en", - 0.8899999856948853 + 0.99 ], [ "semantic", @@ -77875,15 +78007,15 @@ "TEXT", "#/texts/147", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 17486770941839589126, + 16574813224778118841, "TEXT", - "#/texts/147", + "#/texts/148", "en", - 0.9900000095367432 + 0.91 ], [ "semantic", @@ -77891,15 +78023,15 @@ "TEXT", "#/texts/148", "text", - 0.9700000286102295 + 0.97 ], [ "language", - 16574813224778118841, + 3356142343274371864, "TEXT", - "#/texts/148", + "#/texts/149", "en", - 0.9100000262260437 + 0.2 ], [ "semantic", @@ -77911,11 +78043,11 @@ ], [ "language", - 3356142343274371864, + 4778022085288441371, "TEXT", - "#/texts/149", + "#/texts/150", "en", - 0.20000000298023224 + 0.95 ], [ "semantic", @@ -77923,15 +78055,15 @@ "TEXT", "#/texts/150", "text", - 0.6299999952316284 + 0.63 ], [ "language", - 4778022085288441371, + 4361549257598904601, "TEXT", - "#/texts/150", - "en", - 0.949999988079071 + "#/texts/151", + "it", + 0.36 ], [ "semantic", @@ -77939,15 +78071,15 @@ "TEXT", "#/texts/151", "header", - 0.8500000238418579 + 0.85 ], [ "language", - 4361549257598904601, + 3523281823889115814, "TEXT", - "#/texts/151", - "it", - 0.36000001430511475 + "#/texts/152", + "en", + 0.3 ], [ "semantic", @@ -77955,15 +78087,15 @@ "TEXT", "#/texts/152", "meta-data", - 0.5799999833106995 + 0.58 ], [ "language", - 3523281823889115814, + 8500729849894221215, "TEXT", - "#/texts/152", + "#/texts/153", "en", - 0.30000001192092896 + 0.3 ], [ "semantic", @@ -77975,11 +78107,11 @@ ], [ "language", - 8500729849894221215, + 7813503946963688644, "TEXT", - "#/texts/153", + "#/texts/154", "en", - 0.30000001192092896 + 0.48 ], [ "semantic", @@ -77987,15 +78119,15 @@ "TEXT", "#/texts/154", "text", - 0.9900000095367432 + 0.99 ], [ "language", - 7813503946963688644, + 9230987401345399746, "TEXT", - "#/texts/154", + "#/texts/155", "en", - 0.47999998927116394 + 0.97 ], [ "semantic", @@ -78003,15 +78135,15 @@ "TEXT", "#/texts/155", "text", - 0.9100000262260437 + 0.91 ], [ "language", - 9230987401345399746, + 1997735398126013155, "TEXT", - "#/texts/155", + "#/texts/156", "en", - 0.9700000286102295 + 0.66 ], [ "semantic", @@ -78019,15 +78151,15 @@ "TEXT", "#/texts/156", "text", - 0.800000011920929 + 0.8 ], [ "language", - 1997735398126013155, + 13566764974477978642, "TEXT", - "#/texts/156", + "#/texts/157", "en", - 0.6600000262260437 + 0.75 ], [ "semantic", @@ -78039,11 +78171,11 @@ ], [ "language", - 13566764974477978642, + 4925537010788978399, "TEXT", - "#/texts/157", + "#/texts/158", "en", - 0.75 + 0.89 ], [ "semantic", @@ -78055,11 +78187,11 @@ ], [ "language", - 4925537010788978399, + 16552665876195410077, "TEXT", - "#/texts/158", + "#/texts/159", "en", - 0.8899999856948853 + 0.32 ], [ "semantic", @@ -78067,15 +78199,15 @@ "TEXT", "#/texts/159", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 16552665876195410077, + 17579390613842440572, "TEXT", - "#/texts/159", + "#/texts/160", "en", - 0.3199999928474426 + 0.72 ], [ "semantic", @@ -78083,15 +78215,15 @@ "TEXT", "#/texts/160", "text", - 0.800000011920929 + 0.8 ], [ "language", - 17579390613842440572, + 722212543953276862, "TEXT", - "#/texts/160", + "#/texts/161", "en", - 0.7200000286102295 + 0.94 ], [ "semantic", @@ -78099,15 +78231,15 @@ "TEXT", "#/texts/161", "text", - 0.9800000190734863 + 0.98 ], [ "language", - 722212543953276862, + 11085577343317113173, "TEXT", - "#/texts/161", + "#/texts/162", "en", - 0.9399999976158142 + 0.7 ], [ "semantic", @@ -78115,15 +78247,15 @@ "TEXT", "#/texts/162", "header", - 0.8199999928474426 + 0.82 ], [ "language", - 11085577343317113173, + 1792096630133661292, "TEXT", - "#/texts/162", - "en", - 0.699999988079071 + "#/texts/163", + "pl", + 0.19 ], [ "semantic", @@ -78131,15 +78263,15 @@ "TEXT", "#/texts/163", "reference", - 0.6000000238418579 + 0.6 ], [ "language", - 1792096630133661292, + 11462638369524745676, "TEXT", - "#/texts/163", - "pl", - 0.1899999976158142 + "#/texts/164", + "en", + 0.91 ], [ "semantic", @@ -78151,11 +78283,11 @@ ], [ "language", - 11462638369524745676, + 16611805225457383637, "TEXT", - "#/texts/164", + "#/texts/165", "en", - 0.9100000262260437 + 0.83 ], [ "semantic", @@ -78163,15 +78295,15 @@ "TEXT", "#/texts/165", "reference", - 0.4300000071525574 + 0.43 ], [ "language", - 16611805225457383637, + 1531505125666754945, "TEXT", - "#/texts/165", + "#/texts/166", "en", - 0.8299999833106995 + 0.26 ], [ "semantic", @@ -78179,15 +78311,15 @@ "TEXT", "#/texts/166", "reference", - 0.6600000262260437 + 0.66 ], [ "language", - 1531505125666754945, + 15684389308320953629, "TEXT", - "#/texts/166", + "#/texts/167", "en", - 0.25999999046325684 + 0.59 ], [ "semantic", @@ -78195,15 +78327,15 @@ "TEXT", "#/texts/167", "reference", - 0.6600000262260437 + 0.66 ], [ "language", - 15684389308320953629, + 14590754343934702701, "TEXT", - "#/texts/167", + "#/texts/168", "en", - 0.5899999737739563 + 0.33 ], [ "semantic", @@ -78215,11 +78347,11 @@ ], [ "language", - 14590754343934702701, + 10480452763767134455, "TEXT", - "#/texts/168", + "#/texts/169", "en", - 0.33000001311302185 + 0.52 ], [ "semantic", @@ -78227,15 +78359,15 @@ "TEXT", "#/texts/169", "reference", - 0.8299999833106995 + 0.83 ], [ "language", - 10480452763767134455, + 11866471329779366855, "TEXT", - "#/texts/169", + "#/texts/170", "en", - 0.5199999809265137 + 0.5 ], [ "semantic", @@ -78243,15 +78375,15 @@ "TEXT", "#/texts/170", "reference", - 0.949999988079071 + 0.95 ], [ "language", - 11866471329779366855, + 6016885898370676469, "TEXT", - "#/texts/170", + "#/texts/171", "en", - 0.5 + 0.7 ], [ "semantic", @@ -78259,15 +78391,15 @@ "TEXT", "#/texts/171", "reference", - 0.9200000166893005 + 0.92 ], [ "language", - 6016885898370676469, + 13946275785662847920, "TEXT", - "#/texts/171", + "#/texts/172", "en", - 0.699999988079071 + 0.63 ], [ "semantic", @@ -78275,15 +78407,15 @@ "TEXT", "#/texts/172", "reference", - 0.8199999928474426 + 0.82 ], [ "language", - 13946275785662847920, + 7693798302433367973, "TEXT", - "#/texts/172", + "#/texts/173", "en", - 0.6299999952316284 + 0.5 ], [ "semantic", @@ -78291,15 +78423,15 @@ "TEXT", "#/texts/173", "reference", - 0.9300000071525574 + 0.93 ], [ "language", - 7693798302433367973, + 3109792572574236398, "TEXT", - "#/texts/173", + "#/texts/174", "en", - 0.5 + 0.69 ], [ "semantic", @@ -78307,15 +78439,15 @@ "TEXT", "#/texts/174", "reference", - 0.949999988079071 + 0.95 ], [ "language", - 3109792572574236398, + 8111170387462350170, "TEXT", - "#/texts/174", + "#/texts/175", "en", - 0.6899999976158142 + 0.75 ], [ "semantic", @@ -78323,15 +78455,15 @@ "TEXT", "#/texts/175", "reference", - 0.9200000166893005 + 0.92 ], [ "language", - 8111170387462350170, + 14682702346227170925, "TEXT", - "#/texts/175", + "#/texts/176", "en", - 0.75 + 0.5 ], [ "semantic", @@ -78339,15 +78471,15 @@ "TEXT", "#/texts/176", "reference", - 0.8600000143051147 + 0.86 ], [ "language", - 14682702346227170925, + 18391264192891079539, "TEXT", - "#/texts/176", + "#/texts/177", "en", - 0.5 + 0.78 ], [ "semantic", @@ -78355,15 +78487,15 @@ "TEXT", "#/texts/177", "text", - 0.8999999761581421 + 0.9 ], [ "language", - 18391264192891079539, + 11430385775112165283, "TEXT", - "#/texts/177", + "#/texts/178", "en", - 0.7799999713897705 + 0.67 ], [ "semantic", @@ -78375,11 +78507,11 @@ ], [ "language", - 11430385775112165283, + 5825495964576843004, "TEXT", - "#/texts/178", + "#/texts/179", "en", - 0.6700000166893005 + 0.5 ], [ "semantic", @@ -78387,15 +78519,15 @@ "TEXT", "#/texts/179", "reference", - 0.699999988079071 + 0.7 ], [ "language", - 5825495964576843004, + 5698421097735371040, "TEXT", - "#/texts/179", + "#/texts/180", "en", - 0.5 + 0.31 ], [ "semantic", @@ -78403,15 +78535,15 @@ "TEXT", "#/texts/180", "text", - 0.5899999737739563 + 0.59 ], [ "language", - 5698421097735371040, + 5870535063942256428, "TEXT", - "#/texts/180", + "#/texts/181", "en", - 0.3100000023841858 + 0.45 ], [ "semantic", @@ -78419,15 +78551,15 @@ "TEXT", "#/texts/181", "reference", - 0.550000011920929 + 0.55 ], [ "language", - 5870535063942256428, + 18196767266655606709, "TEXT", - "#/texts/181", + "#/texts/182", "en", - 0.44999998807907104 + 0.69 ], [ "semantic", @@ -78435,15 +78567,15 @@ "TEXT", "#/texts/182", "reference", - 0.949999988079071 + 0.95 ], [ "language", - 18196767266655606709, + 3623403683642367845, "TEXT", - "#/texts/182", + "#/texts/183", "en", - 0.6899999976158142 + 0.45 ], [ "semantic", @@ -78451,15 +78583,15 @@ "TEXT", "#/texts/183", "reference", - 0.7799999713897705 + 0.78 ], [ "language", - 3623403683642367845, + 13936866850854297069, "TEXT", - "#/texts/183", + "#/texts/184", "en", - 0.44999998807907104 + 0.59 ], [ "semantic", @@ -78467,15 +78599,15 @@ "TEXT", "#/texts/184", "reference", - 0.9700000286102295 + 0.97 ], [ "language", - 13936866850854297069, + 8497015665124263236, "TEXT", - "#/texts/184", + "#/texts/185", "en", - 0.5899999737739563 + 0.41 ], [ "semantic", @@ -78483,15 +78615,15 @@ "TEXT", "#/texts/185", "reference", - 0.9800000190734863 + 0.98 ], [ "language", - 8497015665124263236, + 15947529491299956047, "TEXT", - "#/texts/185", + "#/texts/186", "en", - 0.4099999964237213 + 0.62 ], [ "semantic", @@ -78499,15 +78631,15 @@ "TEXT", "#/texts/186", "reference", - 0.7900000214576721 + 0.79 ], [ "language", - 15947529491299956047, + 14843401725435831033, "TEXT", - "#/texts/186", + "#/texts/187", "en", - 0.6200000047683716 + 0.63 ], [ "semantic", @@ -78515,15 +78647,15 @@ "TEXT", "#/texts/187", "reference", - 0.6600000262260437 + 0.66 ], [ "language", - 14843401725435831033, + 16676439669743530711, "TEXT", - "#/texts/187", + "#/texts/188", "en", - 0.6299999952316284 + 0.55 ], [ "semantic", @@ -78531,15 +78663,15 @@ "TEXT", "#/texts/188", "reference", - 0.8899999856948853 + 0.89 ], [ "language", - 16676439669743530711, + 2986547206451163051, "TEXT", - "#/texts/188", + "#/texts/189", "en", - 0.550000011920929 + 0.56 ], [ "semantic", @@ -78547,15 +78679,15 @@ "TEXT", "#/texts/189", "reference", - 0.699999988079071 + 0.7 ], [ "language", - 2986547206451163051, + 18391264192891079539, "TEXT", - "#/texts/189", + "#/texts/190", "en", - 0.5600000023841858 + 0.78 ], [ "semantic", @@ -78563,15 +78695,7 @@ "TEXT", "#/texts/190", "text", - 0.8999999761581421 - ], - [ - "language", - 18391264192891079539, - "TEXT", - "#/texts/190", - "en", - 0.7799999713897705 + 0.9 ] ], "headers": [ @@ -78604,16 +78728,16 @@ "type": "caption" } ], - "confidence": 0.9599999785423279, + "confidence": 0.96, "created_by": "high_conf_pred", "data": [ [ { "bbox": [ - 212.76950073242188, - 485.32318115234375, - 228.2480010986328, - 493.3896789550781 + 212.77, + 485.32, + 228.25, + 493.39 ], "col": 0, "col-header": false, @@ -78638,10 +78762,10 @@ }, { "bbox": [ - 280.4609375, - 485.32318115234375, - 315.0389404296875, - 493.3896789550781 + 280.46, + 485.32, + 315.04, + 493.39 ], "col": 1, "col-header": false, @@ -78666,10 +78790,10 @@ }, { "bbox": [ - 352.3488464355469, - 485.32318115234375, - 374.287353515625, - 493.3896789550781 + 352.35, + 485.32, + 374.29, + 493.39 ], "col": 2, "col-header": false, @@ -78694,10 +78818,10 @@ }, { "bbox": [ - 408.192138671875, - 485.32318115234375, - 430.1306457519531, - 493.3896789550781 + 408.19, + 485.32, + 430.13, + 493.39 ], "col": 3, "col-header": false, @@ -78722,10 +78846,10 @@ }, { "bbox": [ - 464.03546142578125, - 485.32318115234375, - 485.9739685058594, - 493.3896789550781 + 464.04, + 485.32, + 485.97, + 493.39 ], "col": 4, "col-header": false, @@ -78750,10 +78874,10 @@ }, { "bbox": [ - 519.8218383789062, - 485.32318115234375, - 541.7603149414062, - 493.3896789550781 + 519.82, + 485.32, + 541.76, + 493.39 ], "col": 5, "col-header": false, @@ -78780,10 +78904,10 @@ [ { "bbox": [ - 212.76950073242188, - 469.68743896484375, - 246.57400512695312, - 477.6859436035156 + 212.77, + 469.69, + 246.57, + 477.69 ], "col": 0, "col-header": false, @@ -78808,10 +78932,10 @@ }, { "bbox": [ - 280.4617919921875, - 469.68743896484375, - 294.4443054199219, - 477.6859436035156 + 280.46, + 469.69, + 294.44, + 477.69 ], "col": 1, "col-header": false, @@ -78836,10 +78960,10 @@ }, { "bbox": [ - 352.3488464355469, - 469.68743896484375, - 366.849853515625, - 477.6859436035156 + 352.35, + 469.69, + 366.85, + 477.69 ], "col": 2, "col-header": false, @@ -78864,10 +78988,10 @@ }, { "bbox": [ - 408.192138671875, - 469.68743896484375, - 422.6931457519531, - 477.6859436035156 + 408.19, + 469.69, + 422.69, + 477.69 ], "col": 3, "col-header": false, @@ -78892,10 +79016,10 @@ }, { "bbox": [ - 464.0354309082031, - 469.68743896484375, - 478.53643798828125, - 477.6859436035156 + 464.04, + 469.69, + 478.54, + 477.69 ], "col": 4, "col-header": false, @@ -78920,10 +79044,10 @@ }, { "bbox": [ - 519.82177734375, - 469.68743896484375, - 534.32275390625, - 477.6859436035156 + 519.82, + 469.69, + 534.32, + 477.69 ], "col": 5, "col-header": false, @@ -78973,10 +79097,10 @@ }, { "bbox": [ - 280.4609375, - 454.6636962890625, - 318.4134521484375, - 462.6622009277344 + 280.46, + 454.66, + 318.41, + 462.66 ], "col": 1, "col-header": false, @@ -79001,10 +79125,10 @@ }, { "bbox": [ - 352.3488464355469, - 454.6636962890625, - 366.849853515625, - 462.6622009277344 + 352.35, + 454.66, + 366.85, + 462.66 ], "col": 2, "col-header": false, @@ -79029,10 +79153,10 @@ }, { "bbox": [ - 408.192138671875, - 454.6636962890625, - 422.6931457519531, - 462.6622009277344 + 408.19, + 454.66, + 422.69, + 462.66 ], "col": 3, "col-header": false, @@ -79057,10 +79181,10 @@ }, { "bbox": [ - 464.0354309082031, - 454.6636962890625, - 478.53643798828125, - 462.6622009277344 + 464.04, + 454.66, + 478.54, + 462.66 ], "col": 4, "col-header": false, @@ -79085,10 +79209,10 @@ }, { "bbox": [ - 519.82177734375, - 454.6636962890625, - 534.32275390625, - 462.6622009277344 + 519.82, + 454.66, + 534.32, + 462.66 ], "col": 5, "col-header": false, @@ -79138,10 +79262,10 @@ }, { "bbox": [ - 280.4609375, - 439.63995361328125, - 298.6849365234375, - 447.6384582519531 + 280.46, + 439.64, + 298.68, + 447.64 ], "col": 1, "col-header": false, @@ -79166,10 +79290,10 @@ }, { "bbox": [ - 352.3479919433594, - 439.63995361328125, - 366.8489990234375, - 447.6384582519531 + 352.35, + 439.64, + 366.85, + 447.64 ], "col": 2, "col-header": false, @@ -79194,10 +79318,10 @@ }, { "bbox": [ - 408.1912841796875, - 439.63995361328125, - 422.6922912597656, - 447.6384582519531 + 408.19, + 439.64, + 422.69, + 447.64 ], "col": 3, "col-header": false, @@ -79222,10 +79346,10 @@ }, { "bbox": [ - 464.0345764160156, - 439.63995361328125, - 478.53558349609375, - 447.6384582519531 + 464.03, + 439.64, + 478.54, + 447.64 ], "col": 4, "col-header": false, @@ -79250,10 +79374,10 @@ }, { "bbox": [ - 519.8209228515625, - 439.63995361328125, - 534.3218994140625, - 447.6384582519531 + 519.82, + 439.64, + 534.32, + 447.64 ], "col": 5, "col-header": false, @@ -79280,10 +79404,10 @@ [ { "bbox": [ - 212.7694854736328, - 424.67315673828125, - 227.3724822998047, - 432.6716613769531 + 212.77, + 424.67, + 227.37, + 432.67 ], "col": 0, "col-header": false, @@ -79308,10 +79432,10 @@ }, { "bbox": [ - 280.4617919921875, - 424.67315673828125, - 294.4443054199219, - 432.6716613769531 + 280.46, + 424.67, + 294.44, + 432.67 ], "col": 1, "col-header": false, @@ -79336,10 +79460,10 @@ }, { "bbox": [ - 352.3488464355469, - 424.67315673828125, - 366.849853515625, - 432.6716613769531 + 352.35, + 424.67, + 366.85, + 432.67 ], "col": 2, "col-header": false, @@ -79364,10 +79488,10 @@ }, { "bbox": [ - 408.192138671875, - 424.67315673828125, - 422.6931457519531, - 432.6716613769531 + 408.19, + 424.67, + 422.69, + 432.67 ], "col": 3, "col-header": false, @@ -79392,10 +79516,10 @@ }, { "bbox": [ - 464.0354309082031, - 424.67315673828125, - 478.53643798828125, - 432.6716613769531 + 464.04, + 424.67, + 478.54, + 432.67 ], "col": 4, "col-header": false, @@ -79420,10 +79544,10 @@ }, { "bbox": [ - 519.82177734375, - 424.67315673828125, - 534.32275390625, - 432.6716613769531 + 519.82, + 424.67, + 534.32, + 432.67 ], "col": 5, "col-header": false, @@ -79473,10 +79597,10 @@ }, { "bbox": [ - 280.4609375, - 409.6494140625, - 318.4134521484375, - 417.6479187011719 + 280.46, + 409.65, + 318.41, + 417.65 ], "col": 1, "col-header": false, @@ -79501,10 +79625,10 @@ }, { "bbox": [ - 352.3488464355469, - 409.6494140625, - 366.849853515625, - 417.6479187011719 + 352.35, + 409.65, + 366.85, + 417.65 ], "col": 2, "col-header": false, @@ -79529,10 +79653,10 @@ }, { "bbox": [ - 408.192138671875, - 409.6494140625, - 422.6931457519531, - 417.6479187011719 + 408.19, + 409.65, + 422.69, + 417.65 ], "col": 3, "col-header": false, @@ -79557,10 +79681,10 @@ }, { "bbox": [ - 464.0354309082031, - 409.6494140625, - 478.53643798828125, - 417.6479187011719 + 464.04, + 409.65, + 478.54, + 417.65 ], "col": 4, "col-header": false, @@ -79585,10 +79709,10 @@ }, { "bbox": [ - 519.82177734375, - 409.6494140625, - 534.32275390625, - 417.6479187011719 + 519.82, + 409.65, + 534.32, + 417.65 ], "col": 5, "col-header": false, @@ -79638,10 +79762,10 @@ }, { "bbox": [ - 280.4609375, - 394.6826171875, - 298.6849365234375, - 402.6811218261719 + 280.46, + 394.68, + 298.68, + 402.68 ], "col": 1, "col-header": false, @@ -79666,10 +79790,10 @@ }, { "bbox": [ - 352.3479919433594, - 394.6826171875, - 366.8489990234375, - 402.6811218261719 + 352.35, + 394.68, + 366.85, + 402.68 ], "col": 2, "col-header": false, @@ -79694,10 +79818,10 @@ }, { "bbox": [ - 408.1912841796875, - 394.6826171875, - 422.6922912597656, - 402.6811218261719 + 408.19, + 394.68, + 422.69, + 402.68 ], "col": 3, "col-header": false, @@ -79722,10 +79846,10 @@ }, { "bbox": [ - 464.0345764160156, - 394.6826171875, - 478.53558349609375, - 402.6811218261719 + 464.03, + 394.68, + 478.54, + 402.68 ], "col": 4, "col-header": false, @@ -79750,10 +79874,10 @@ }, { "bbox": [ - 519.8209228515625, - 394.6826171875, - 534.3218994140625, - 402.6811218261719 + 519.82, + 394.68, + 534.32, + 402.68 ], "col": 5, "col-header": false, @@ -79780,10 +79904,10 @@ [ { "bbox": [ - 212.7694854736328, - 379.65887451171875, - 236.92648315429688, - 387.6573791503906 + 212.77, + 379.66, + 236.93, + 387.66 ], "col": 0, "col-header": false, @@ -79808,10 +79932,10 @@ }, { "bbox": [ - 280.4617919921875, - 379.65887451171875, - 294.4443054199219, - 387.6573791503906 + 280.46, + 379.66, + 294.44, + 387.66 ], "col": 1, "col-header": false, @@ -79836,10 +79960,10 @@ }, { "bbox": [ - 352.3488464355469, - 379.65887451171875, - 366.849853515625, - 387.6573791503906 + 352.35, + 379.66, + 366.85, + 387.66 ], "col": 2, "col-header": false, @@ -79864,10 +79988,10 @@ }, { "bbox": [ - 408.192138671875, - 379.65887451171875, - 422.6931457519531, - 387.6573791503906 + 408.19, + 379.66, + 422.69, + 387.66 ], "col": 3, "col-header": false, @@ -79892,10 +80016,10 @@ }, { "bbox": [ - 464.0354309082031, - 379.65887451171875, - 478.53643798828125, - 387.6573791503906 + 464.04, + 379.66, + 478.54, + 387.66 ], "col": 4, "col-header": false, @@ -79920,10 +80044,10 @@ }, { "bbox": [ - 519.82177734375, - 379.65887451171875, - 534.32275390625, - 387.6573791503906 + 519.82, + 379.66, + 534.32, + 387.66 ], "col": 5, "col-header": false, @@ -79973,10 +80097,10 @@ }, { "bbox": [ - 280.4609375, - 364.69207763671875, - 318.4134521484375, - 372.6905822753906 + 280.46, + 364.69, + 318.41, + 372.69 ], "col": 1, "col-header": false, @@ -80001,10 +80125,10 @@ }, { "bbox": [ - 352.3488464355469, - 364.69207763671875, - 366.849853515625, - 372.6905822753906 + 352.35, + 364.69, + 366.85, + 372.69 ], "col": 2, "col-header": false, @@ -80029,10 +80153,10 @@ }, { "bbox": [ - 408.192138671875, - 364.69207763671875, - 422.6931457519531, - 372.6905822753906 + 408.19, + 364.69, + 422.69, + 372.69 ], "col": 3, "col-header": false, @@ -80057,10 +80181,10 @@ }, { "bbox": [ - 464.0354309082031, - 364.69207763671875, - 478.53643798828125, - 372.6905822753906 + 464.04, + 364.69, + 478.54, + 372.69 ], "col": 4, "col-header": false, @@ -80085,10 +80209,10 @@ }, { "bbox": [ - 519.82177734375, - 364.69207763671875, - 534.32275390625, - 372.6905822753906 + 519.82, + 364.69, + 534.32, + 372.69 ], "col": 5, "col-header": false, @@ -80138,10 +80262,10 @@ }, { "bbox": [ - 280.4609375, - 349.6683349609375, - 298.6849365234375, - 357.6668395996094 + 280.46, + 349.67, + 298.68, + 357.67 ], "col": 1, "col-header": false, @@ -80166,10 +80290,10 @@ }, { "bbox": [ - 352.3479919433594, - 349.6683349609375, - 366.8489990234375, - 357.6668395996094 + 352.35, + 349.67, + 366.85, + 357.67 ], "col": 2, "col-header": false, @@ -80194,10 +80318,10 @@ }, { "bbox": [ - 408.1912841796875, - 349.6683349609375, - 422.6922912597656, - 357.6668395996094 + 408.19, + 349.67, + 422.69, + 357.67 ], "col": 3, "col-header": false, @@ -80222,10 +80346,10 @@ }, { "bbox": [ - 464.0345764160156, - 349.6683349609375, - 478.53558349609375, - 357.6668395996094 + 464.03, + 349.67, + 478.54, + 357.67 ], "col": 4, "col-header": false, @@ -80250,10 +80374,10 @@ }, { "bbox": [ - 519.8209228515625, - 349.6683349609375, - 534.3218994140625, - 357.6668395996094 + 519.82, + 349.67, + 534.32, + 357.67 ], "col": 5, "col-header": false, diff --git a/tests/test_nlp.py b/tests/test_nlp.py index b0224a17..f95fb710 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -3,6 +3,8 @@ import os import json +from tabulate import tabulate + from deepsearch_glm.nlp_utils import list_nlp_model_configs, init_nlp_model, \ extract_references_from_doc from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models @@ -10,7 +12,7 @@ from deepsearch_glm.nlp_train_semantic import train_semantic -GENERATE=False +GENERATE=True def round_floats(o): if isinstance(o, float): return round(o, 2) @@ -148,7 +150,6 @@ def test_03B_run_nlp_models_on_document(): check_dimensions(res["properties"]) -""" def test_03C_run_nlp_models_on_document(): model = init_nlp_model("language;semantic;sentence;term;verb;conn;geoloc;reference") @@ -184,7 +185,77 @@ def test_03C_run_nlp_models_on_document(): assert res==tdoc """ +def test_03D_run_nlp_models_on_document(): + + model_i = init_nlp_model("term") + model_j = init_nlp_model("semantic;language") + + model_ij = init_nlp_model("term;reference") + #model_ij = init_nlp_model("language;reference") + #model_ij = init_nlp_model("language;semantic") + + source = "./tests/data/docs/1806.02284.json" + target_i = "./tests/data/docs/1806.02284.nlp.i.json" + target_j = "./tests/data/docs/1806.02284.nlp.j.json" + target_ij = "./tests/data/docs/1806.02284.nlp.ij.json" + + if True: # generate the test-data + with open(source) as fr: + doc = json.load(fr) + + print("apply model_i") + res_i = model_i.apply_on_doc(doc) + #res_i = round_floats(res_i) + fw = open(target_i, "w") + fw.write(json.dumps(res_i, indent=2)+"\n") + fw.close() + + print("apply model_j") + #res_j = model_j.apply_on_doc(res_i) + res_j = model_j.apply_on_doc(doc) + res_j = round_floats(res_j) + + fw = open(target_j, "w") + fw.write(json.dumps(res_j, indent=2)+"\n") + fw.close() + + print("apply model_ij") + res_ij = model_ij.apply_on_doc(doc) + res_ij = round_floats(res_ij) + + fw = open(target_ij, "w") + fw.write(json.dumps(res_ij, indent=2)+"\n") + fw.close() + + assert len(res_j["properties"])==len(res_ij["properties"]) + assert len(res_j["instances"])==len(res_ij["instances"]) + + print(tabulate(res_j["properties"]["data"][0:10], + headers=res_j["properties"]["headers"])) + + print(tabulate(res_ij["properties"]["data"][0:10], + headers=res_ij["properties"]["headers"])) + + assert res_j["properties"]["data"]==res_ij["properties"]["data"] + + #assert res_j["instances"]==res_ij["instances"] + #assert res_j==res_ij + + else: + with open(source) as fr: + sdoc = json.load(fr) + + res = model.apply_on_doc(sdoc) + res = round_floats(res) + + with open(target) as fr: + tdoc = json.load(fr) + tdoc = round_floats(tdoc) + + assert res==tdoc +""" + def test_04A_terms(): source = "./tests/data/texts/terms.jsonl" From 74bf3f1a46b4f359fb3c131793e5529a46f9c67d Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Mon, 27 Nov 2023 14:15:16 +0100 Subject: [PATCH 15/22] fixed some of the tests Signed-off-by: Peter Staar --- src/andromeda/nlp/cls/semantic.h | 4 +- src/andromeda/nlp/ent/reference.h | 27 +++++++--- .../tooling/structs/subjects/document.h | 53 ++++++++++++++++--- tests/test_nlp.py | 7 +-- 4 files changed, 71 insertions(+), 20 deletions(-) diff --git a/src/andromeda/nlp/cls/semantic.h b/src/andromeda/nlp/cls/semantic.h index 3ce24f81..2240d234 100644 --- a/src/andromeda/nlp/cls/semantic.h +++ b/src/andromeda/nlp/cls/semantic.h @@ -69,7 +69,7 @@ namespace andromeda std::vector caption_refs; }; - const std::set nlp_model::dependencies = {LINK,NUMVAL}; + const std::set nlp_model::dependencies = {LINK, NUMVAL}; nlp_model::nlp_model(): fasttext_supervised_model(), @@ -279,6 +279,8 @@ namespace andromeda return false; //continue; // skip } + //LOG_S(INFO) << label << ", " << conf << ": " << text.substr(0, 64); + subj.properties.emplace_back(subj.get_hash(), TEXT, subj.get_self_ref(), get_name(), label, conf); subj.applied_models.insert(get_key()); diff --git a/src/andromeda/nlp/ent/reference.h b/src/andromeda/nlp/ent/reference.h index 86b72818..09296df4 100644 --- a/src/andromeda/nlp/ent/reference.h +++ b/src/andromeda/nlp/ent/reference.h @@ -45,7 +45,7 @@ namespace andromeda std::filesystem::path model_file; }; - const std::set nlp_model::dependencies = { SEMANTIC, LINK, NUMVAL}; + const std::set nlp_model::dependencies = { LINK, NUMVAL, SEMANTIC }; nlp_model::nlp_model(): model_file(get_crf_dir() / "reference/crf_reference.bin") @@ -69,6 +69,12 @@ namespace andromeda bool nlp_model::apply(subject& doc) { + if(not satisfies_dependencies(doc)) + { + return false; + } + + //LOG_S(INFO) << "#-texts: " << doc.texts.size(); for(auto& paragraph:doc.texts) { this->apply(*paragraph); @@ -79,18 +85,25 @@ namespace andromeda bool nlp_model::apply(subject& subj) { + //LOG_S(INFO) << __FILE__ << ":" << __LINE__ << "\t" << subj.get_text(); + if(not satisfies_dependencies(subj)) { + //LOG_S(WARNING) << "does not satisfy deps ... "; return false; } - + bool is_ref=false; for(auto& cls:subj.properties) - { - if((cls.get_type()==to_key(SEMANTIC)) and - (cls.is_label("reference"))) + { + if((cls.get_type()==to_key(SEMANTIC)) and (cls.is_label("reference"))) { is_ref = true; + //LOG_S(WARNING) << " => " << cls.get_type() << "\t" << cls.get_label(); + } + else + { + //LOG_S(INFO) << " => " << cls.get_type() << "\t" << cls.get_label(); } } @@ -109,7 +122,7 @@ namespace andromeda void nlp_model::run_model(subject& subj) { - //LOG_S(WARNING) << __FILE__ << ":" << __LINE__; + //LOG_S(WARNING) << __FILE__ << ":" << __LINE__ << "\t" << __FUNCTION__; std::vector crf_tokens={}; std::map ptid_to_wtid={}; @@ -251,7 +264,6 @@ namespace andromeda } // delete all non-reference instances - /* { auto itr=subj.instances.begin(); while(itr!=subj.instances.end()) @@ -266,7 +278,6 @@ namespace andromeda } } } - */ } } diff --git a/src/andromeda/tooling/structs/subjects/document.h b/src/andromeda/tooling/structs/subjects/document.h index af8af73d..32c7bae8 100644 --- a/src/andromeda/tooling/structs/subjects/document.h +++ b/src/andromeda/tooling/structs/subjects/document.h @@ -92,8 +92,11 @@ namespace andromeda void init_provs(); void show_provs(); + private: + void join_properties(); void join_instances(); + void join_applied_models(); private: @@ -280,8 +283,12 @@ namespace andromeda base_subject::from_json(doc, provs, other_lbl, other); - join_properties(); - join_instances(); + { + join_properties(); + join_instances(); + + join_applied_models(); + } return true; } @@ -628,24 +635,26 @@ namespace andromeda //} //} - LOG_S(INFO) << "#-instances: " << instances.size(); + //LOG_S(INFO) << "#-instances: " << instances.size(); std::sort(instances.begin(), instances.end()); + /* for(std::size_t l=0; l+1::join_applied_models() + { + for(auto& text:texts) + { + text->applied_models = this->applied_models; + } + + for(auto& table:tables) + { + table->applied_models = this->applied_models; + + for(auto& capt:table->captions) + { + capt->applied_models = this->applied_models; + } + } + + for(auto& figure:figures) + { + figure->applied_models = this->applied_models; + + for(auto& capt:figure->captions) + { + capt->applied_models = this->applied_models; + } + } + } } diff --git a/tests/test_nlp.py b/tests/test_nlp.py index c75606b2..001f06a1 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -35,7 +35,7 @@ def get_reduced_instances(instances): table=[] for row in instances["data"]: - if "texts" in row[4]: + if "reference" in row[0] and "texts" in row[4]: table.append([row[0], row[1], row[4], row[5], row[-2]]) return table, [headers[0], headers[1], headers[4], headers[5], headers[-2]] @@ -265,16 +265,17 @@ def test_03D(): #print(tabulate(table_j, headers=headers_j)) #print(tabulate(table_k, headers=headers_k)) + """ print("#-inst-i: ", len(table_i)) print("#-inst-j: ", len(table_j)) print("#-inst-k: ", len(table_k)) - + """ assert table_j==table_k #print("#-instances-j: ", len(res_j["instances"]["data"])) #print("#-instances-j: ", len(res_k["instances"]["data"])) - #assert len(res_j["instances"]["data"])==len(res_k["instances"]["data"]) + assert len(res_j["instances"]["data"])==len(res_k["instances"]["data"]) assert res_j["instances"]["data"]==res_k["instances"]["data"] assert res_j==res_k From cbcbb63612a3ea86efbb25120fd466b1fac5367d Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Tue, 28 Nov 2023 05:16:20 +0100 Subject: [PATCH 16/22] all nlp tests pass Signed-off-by: Peter Staar --- src/andromeda/tooling/structs/subjects/base.h | 12 +++++++++--- tests/data/docs/1806.02284.nlp.json | 1 - tests/data/docs/doc_01.nlp.json | 1 - tests/data/texts/terms.nlp.jsonl | 4 ++-- tests/data/texts/test_02A_text_01.jsonl | 2 +- tests/test_nlp.py | 2 +- 6 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/andromeda/tooling/structs/subjects/base.h b/src/andromeda/tooling/structs/subjects/base.h index d109a5ea..bf8a0cf1 100644 --- a/src/andromeda/tooling/structs/subjects/base.h +++ b/src/andromeda/tooling/structs/subjects/base.h @@ -45,6 +45,8 @@ namespace andromeda const static inline std::string confidence_lbl = "confidence"; // for tables and figures const static inline std::string created_by_lbl = "created_by"; // for tables and figures + + const static inline std::set implicit_models = {"lapos"}; public: @@ -309,6 +311,11 @@ namespace andromeda if(filters.size()==0 or filters.count(applied_models_lbl)) { + for(auto implicit_model:implicit_models) + { + applied_models.erase(implicit_model); + } + result[applied_models_lbl] = applied_models; } @@ -386,8 +393,7 @@ namespace andromeda } } - - std::set implicit_models={"lapos"}; + for(auto implicit_model:implicit_models) { applied_models.erase(implicit_model); @@ -418,7 +424,7 @@ namespace andromeda { nlohmann::json& json_vals = result[key]; json_vals = nlohmann::json::array({}); - + for(auto& val:vals) { json_vals.push_back(val->to_json(filters)); diff --git a/tests/data/docs/1806.02284.nlp.json b/tests/data/docs/1806.02284.nlp.json index 35603756..a8b18208 100644 --- a/tests/data/docs/1806.02284.nlp.json +++ b/tests/data/docs/1806.02284.nlp.json @@ -6,7 +6,6 @@ "expression", "geoloc", "language", - "lapos", "link", "name", "numval", diff --git a/tests/data/docs/doc_01.nlp.json b/tests/data/docs/doc_01.nlp.json index 186ee149..64f63d81 100644 --- a/tests/data/docs/doc_01.nlp.json +++ b/tests/data/docs/doc_01.nlp.json @@ -4,7 +4,6 @@ "cite", "expression", "language", - "lapos", "link", "name", "numval", diff --git a/tests/data/texts/terms.nlp.jsonl b/tests/data/texts/terms.nlp.jsonl index 3696bcb4..2054336e 100644 --- a/tests/data/texts/terms.nlp.jsonl +++ b/tests/data/texts/terms.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "hash": 9818235231875948258, "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, null, null, 0, 177, 0, 164, 0, 38, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, null, null, 7, 31, 7, 26, 1, 9, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, null, null, 16, 26, 16, 23, 4, 7, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206575305750373, 3269040892355287555, null, null, 16, 25, 16, 22, 4, 6, true, "[f\u0281\u0251\u0303s", "[f\u0281\u0251\u0303s"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, null, null, 27, 30, 24, 25, 7, 8, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, null, null, 48, 63, 43, 58, 12, 14, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, null, null, 64, 122, 59, 109, 14, 24, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, null, null, 73, 95, 68, 88, 17, 19, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, null, null, 96, 106, 89, 98, 19, 21, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, null, null, 107, 121, 99, 108, 21, 23, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, null, null, 123, 127, 110, 114, 25, 29, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, null, null, 124, 126, 111, 113, 26, 28, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, null, null, 128, 130, 115, 117, 29, 30, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, null, null, 133, 140, 120, 127, 31, 32, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, null, null, 141, 158, 128, 145, 32, 34, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, null, null, 159, 161, 146, 148, 34, 35, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, null, null, 162, 176, 149, 163, 35, 37, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, null, null, 170, 176, 157, 163, 36, 37, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, null, null, 178, 375, 165, 362, 38, 74, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, null, null, 186, 194, 173, 181, 40, 41, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, null, null, 195, 211, 182, 198, 41, 43, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, null, null, 204, 227, 191, 214, 42, 45, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, null, null, 216, 227, 203, 214, 44, 45, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, null, null, 228, 234, 215, 221, 45, 47, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, null, null, 235, 243, 222, 230, 47, 48, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 820203855428083856, 16279894764651307170, null, null, 252, 280, 239, 267, 50, 55, true, "Atlantic, Pacific and Indian", "Atlantic, Pacific and Indian"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, null, null, 252, 260, 239, 247, 50, 51, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, null, null, 262, 269, 249, 256, 52, 53, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 4553045173532721202, 17291436396596241777, null, null, 274, 287, 261, 274, 54, 56, true, "Indian oceans", "Indian oceans"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, null, null, 281, 293, 268, 280, 55, 60, true, "oceans,[XII]", "oceans,[XII]"], ["parenthesis", "square brackets", 9818235231875948258, "TEXT", "#", 1.0, 329104147687597164, 12284735790511259080, null, null, 288, 293, 275, 280, 57, 60, true, "[XII]", "[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895542235, 10796895691287030884, null, null, 289, 292, 276, 279, 58, 59, true, "XII", "XII"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 14650940714797320124, 6236592394333508229, null, null, 292, 300, 279, 287, 59, 61, true, "] giving", "] giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, null, null, 308, 314, 295, 301, 63, 65, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, null, null, 315, 361, 302, 348, 65, 70, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, null, null, 362, 368, 349, 355, 70, 72, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, null, null, 369, 374, 356, 361, 72, 73, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, null, null, 376, 637, 363, 624, 74, 125, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, null, null, 376, 410, 363, 397, 74, 78, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, null, null, 389, 395, 376, 382, 75, 76, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, null, null, 411, 415, 398, 402, 78, 79, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, null, null, 416, 438, 403, 425, 79, 82, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 79, 80, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 79, 80, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 81, 82, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 81, 82, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, null, null, 439, 445, 426, 432, 82, 84, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, null, null, 446, 451, 433, 438, 84, 85, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 86, 87, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 86, 87, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, null, null, 461, 467, 448, 454, 87, 89, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 92, 93, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 92, 93, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, null, null, 492, 498, 479, 485, 93, 95, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, null, null, 505, 521, 492, 508, 97, 100, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 97, 98, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 97, 98, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, null, null, 515, 521, 502, 508, 99, 100, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, null, null, 522, 528, 509, 515, 100, 102, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, null, null, 541, 558, 528, 545, 105, 108, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 105, 106, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 105, 106, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 107, 108, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 107, 108, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, null, null, 559, 565, 546, 552, 108, 110, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, null, null, 566, 571, 553, 558, 110, 111, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, null, null, 579, 594, 566, 581, 114, 116, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, null, null, 595, 603, 582, 590, 116, 118, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 118, 120, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 118, 120, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, null, null, 619, 625, 606, 612, 120, 122, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, null, null, 626, 636, 613, 623, 122, 124, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, null, null, 638, 961, 625, 948, 125, 183, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, null, null, 642, 659, 629, 646, 126, 128, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, null, null, 660, 667, 647, 654, 128, 129, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, null, null, 668, 676, 655, 663, 129, 131, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, null, null, 677, 682, 664, 669, 131, 132, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, null, null, 683, 689, 670, 676, 132, 134, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 134, 136, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 134, 136, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, null, null, 709, 717, 696, 704, 137, 139, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 139, 141, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 139, 141, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, null, null, 736, 742, 723, 729, 141, 143, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 143, 145, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 143, 145, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 147, 149, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 147, 149, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, null, null, 778, 798, 765, 785, 150, 152, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, null, null, 799, 806, 786, 793, 152, 153, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, null, null, 807, 820, 794, 807, 153, 155, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, null, null, 821, 823, 808, 810, 155, 156, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, null, null, 824, 864, 811, 851, 156, 163, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 156, 158, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 156, 158, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, null, null, 839, 851, 826, 838, 159, 161, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, null, null, 856, 864, 843, 851, 162, 163, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, null, null, 865, 871, 852, 858, 163, 165, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, null, null, 872, 886, 859, 873, 165, 167, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, null, null, 892, 910, 879, 897, 169, 172, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, null, null, 916, 928, 903, 915, 174, 176, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, null, null, 929, 931, 916, 918, 176, 177, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 177, 178, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 177, 178, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 180, 182, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 180, 182, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, null, null, 962, 1384, 949, 1371, 183, 289, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, null, null, 966, 991, 953, 978, 184, 187, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, null, null, 992, 1020, 979, 1007, 187, 194, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, null, null, 998, 1000, 985, 987, 189, 190, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, null, null, 1007, 1019, 994, 1006, 191, 193, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, null, null, 1021, 1025, 1008, 1012, 194, 195, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, null, null, 1028, 1036, 1015, 1023, 196, 197, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, null, null, 1037, 1041, 1024, 1028, 197, 198, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, null, null, 1042, 1044, 1029, 1031, 198, 199, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, null, null, 1045, 1052, 1032, 1039, 199, 206, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, null, null, 1053, 1056, 1040, 1043, 206, 208, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486414, 16516410147586311652, null, null, 1053, 1055, 1040, 1042, 206, 207, true, "km", "km"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235162, 2654033242220620585, null, null, 1055, 1056, 1042, 1043, 207, 208, true, "2", "2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, null, null, 1057, 1072, 1044, 1059, 208, 219, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, null, null, 1058, 1065, 1045, 1052, 209, 216, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, null, null, 1066, 1071, 1053, 1058, 216, 218, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, null, null, 1077, 1081, 1064, 1068, 220, 221, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, null, null, 1084, 1100, 1071, 1087, 222, 224, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, null, null, 1101, 1103, 1088, 1090, 224, 225, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, null, null, 1104, 1108, 1091, 1095, 225, 226, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, null, null, 1109, 1111, 1096, 1098, 226, 228, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, null, null, 1112, 1119, 1099, 1106, 228, 229, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, null, null, 1120, 1122, 1107, 1109, 229, 230, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, null, null, 1123, 1125, 1110, 1112, 230, 231, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, null, null, 1126, 1133, 1113, 1120, 231, 232, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, null, null, 1134, 1145, 1121, 1132, 232, 243, true, "2023.[5][8]", "2023.[5][8]"], ["numval", "year", 9818235231875948258, "TEXT", "#", 1.0, 389609625548777251, 4871157181485963100, null, null, 1134, 1138, 1121, 1125, 232, 236, true, "2023", "2023"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577901, 10796892691399633238, null, null, 1139, 1142, 1126, 1129, 237, 240, true, "[5]", "[5]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235157, 2654033131002543179, null, null, 1140, 1141, 1127, 1128, 238, 239, true, "5", "5"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577838, 10796892702691935623, null, null, 1142, 1145, 1129, 1132, 240, 243, true, "[8]", "[8]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235152, 2654033132467492508, null, null, 1143, 1144, 1130, 1131, 241, 242, true, "8", "8"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, null, null, 1146, 1152, 1133, 1139, 243, 244, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, null, null, 1153, 1155, 1140, 1142, 244, 245, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14782540711164886662, 14111360077134393327, null, null, 1158, 1170, 1145, 1157, 246, 248, true, "unitary semi", "unitary semi"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, null, null, 1166, 1183, 1153, 1170, 247, 250, true, "semi-presidential", "semi-presidential"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9493572096187311884, 17586523526652496832, null, null, 1171, 1192, 1158, 1179, 249, 251, true, "presidential republic", "presidential republic"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, null, null, 1193, 1197, 1180, 1184, 251, 252, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, null, null, 1202, 1209, 1189, 1196, 253, 254, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, null, null, 1210, 1212, 1197, 1199, 254, 255, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, null, null, 1213, 1218, 1200, 1205, 255, 256, true, "Paris", "Paris"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, null, null, 1224, 1233, 1211, 1220, 258, 261, true, "countrys", "country's"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263873511, null, null, 1224, 1231, 1211, 1218, 258, 259, true, "country", "country"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13491731564569135959, 5310634626438687925, null, null, 1232, 1246, 1219, 1233, 260, 263, true, "s largest city", "s largest city"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, null, null, 1251, 1286, 1238, 1273, 264, 269, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, null, null, 1269, 1286, 1256, 1273, 267, 269, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, null, null, 1288, 1311, 1275, 1298, 270, 274, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, null, null, 1312, 1319, 1299, 1306, 274, 275, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, null, null, 1320, 1383, 1307, 1370, 275, 288, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, null, null, 1320, 1329, 1307, 1316, 275, 276, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, null, null, 1331, 1335, 1318, 1322, 277, 278, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, null, null, 1337, 1345, 1324, 1332, 279, 280, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, null, null, 1347, 1352, 1334, 1339, 281, 282, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, null, null, 1354, 1362, 1341, 1349, 283, 284, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, null, null, 1364, 1374, 1351, 1361, 285, 286, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, null, null, 1379, 1383, 1366, 1370, 287, 288, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.93], ["semantic", 9818235231875948258, "TEXT", "#", "text", 0.82]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "hash": 4522339299074192207, "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, null, null, 0, 188, 0, 188, 0, 40, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, null, null, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, null, null, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, null, null, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301128091, null, null, 24, 33, 24, 33, 5, 6, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587921185, null, null, 34, 41, 34, 41, 6, 7, true, "pairing", "pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, null, null, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, null, null, 45, 53, 45, 53, 8, 11, true, "two-band", "two-band"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206570221100871, 2911818818181444888, null, null, 49, 55, 49, 55, 10, 12, true, "band s", "band s"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, null, null, 54, 60, 54, 60, 11, 14, true, "s-wave", "s-wave"], ["term", "enum-term-mark-2", 4522339299074192207, "TEXT", "#", 1.0, 8560127426779937860, 4026994879422986240, null, null, 56, 66, 56, 66, 13, 16, true, "wave and d", "wave and d"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625633602560, 14144633872330801396, null, null, 56, 60, 56, 60, 13, 14, true, "wave", "wave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, null, null, 65, 71, 65, 71, 15, 18, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5267005535915851615, 13852357345485708038, null, null, 67, 87, 67, 87, 17, 19, true, "wave superconductors", "wave superconductors"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, null, null, 88, 92, 88, 92, 19, 20, true, "with", "with"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, null, null, 93, 96, 93, 96, 20, 23, true, "D4h", "D4h"], ["numval", "ival", 4522339299074192207, "TEXT", "#", 1.0, 17767354399704235156, 8513040951015345484, null, null, 94, 95, 94, 95, 21, 22, true, "4", "4"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 2516792725790519961, 10765065347046652233, null, null, 95, 105, 95, 105, 22, 24, true, "h symmetry", "h symmetry"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, null, null, 106, 113, 106, 113, 24, 26, true, "in both", "in both"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, null, null, 114, 127, 114, 127, 26, 29, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183561878, null, null, 114, 118, 114, 118, 26, 27, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1366921581602115232, 15058186165846257397, null, null, 119, 137, 119, 137, 28, 30, true, "reversal invariant", "reversal invariant"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, null, null, 146, 148, 146, 148, 32, 33, true, "as", "as"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, null, null, 149, 162, 149, 162, 33, 36, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183567675, null, null, 149, 153, 149, 153, 33, 34, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16155708024079339904, 14846007814114510811, null, null, 154, 171, 154, 171, 35, 37, true, "reversal symmetry", "reversal symmetry"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, null, null, 172, 180, 172, 180, 37, 38, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, null, null, 181, 187, 181, 187, 38, 39, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, null, null, 189, 384, 189, 384, 40, 75, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, null, null, 193, 201, 193, 201, 41, 42, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, null, null, 202, 204, 202, 204, 42, 43, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, null, null, 205, 214, 205, 214, 43, 44, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, null, null, 215, 244, 215, 244, 44, 47, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, null, null, 249, 264, 249, 264, 48, 50, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, null, null, 265, 271, 265, 271, 50, 52, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, null, null, 272, 286, 272, 286, 52, 53, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, null, null, 288, 293, 288, 293, 54, 55, true, "nodes", "nodes"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, null, null, 298, 309, 298, 309, 56, 60, true, "(dis)appear", "(dis)appear"], ["parenthesis", "round brackets", 4522339299074192207, "TEXT", "#", 1.0, 329104053577713079, 7302082272979819201, null, null, 298, 303, 298, 303, 56, 59, true, "(dis)", "(dis)"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 12178341415895452094, 8713100074317547395, null, null, 299, 302, 299, 302, 57, 58, true, "dis", "dis"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 16381206574684919940, 8627590102959499799, null, null, 303, 309, 303, 309, 59, 60, true, "appear", "appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, null, null, 311, 316, 311, 316, 61, 62, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, null, null, 322, 327, 322, 327, 64, 65, true, "leave", "leave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, null, null, 328, 341, 328, 341, 65, 68, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4859188827321755536, 9887725278734779219, null, null, 333, 351, 333, 351, 67, 69, true, "symmetry locations", "symmetry locations"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, null, null, 357, 374, 357, 374, 70, 72, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, null, null, 375, 383, 375, 383, 72, 74, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, null, null, 385, 594, 385, 594, 75, 114, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, null, null, 398, 404, 398, 404, 77, 79, true, "in the", "in the"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, null, null, 405, 411, 405, 411, 79, 82, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 3545604367994270661, 11829255560935036292, null, null, 407, 416, 407, 416, 81, 83, true, "wave case", "wave case"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, null, null, 421, 425, 421, 425, 85, 86, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, null, null, 426, 430, 426, 430, 86, 87, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, null, null, 440, 454, 440, 454, 89, 91, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, null, null, 455, 475, 455, 475, 91, 93, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, null, null, 481, 490, 481, 490, 94, 95, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, null, null, 491, 498, 491, 498, 95, 96, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, null, null, 499, 508, 499, 508, 96, 97, true, "increases", "increases"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, null, null, 515, 526, 515, 526, 99, 102, true, "zero-energy", "zero-energy"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1885602650026083434, 12476719833465444023, null, null, 520, 534, 520, 534, 101, 103, true, "energy Andreev", "energy Andreev"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104159325585799, 66191664906118763, null, null, 535, 540, 535, 540, 103, 104, true, "bound", "bound"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433796974, null, null, 541, 547, 541, 547, 104, 105, true, "states", "states"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, null, null, 548, 555, 548, 555, 105, 107, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, null, null, 560, 570, 560, 570, 108, 109, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, null, null, 571, 573, 571, 573, 109, 110, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, null, null, 574, 593, 574, 593, 110, 113, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.87], ["semantic", 4522339299074192207, "TEXT", "#", "text", 0.97]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} +{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "hash": 9818235231875948258, "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, null, null, 0, 177, 0, 164, 0, 38, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, null, null, 7, 31, 7, 26, 1, 9, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, null, null, 16, 26, 16, 23, 4, 7, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206575305750373, 3269040892355287555, null, null, 16, 25, 16, 22, 4, 6, true, "[f\u0281\u0251\u0303s", "[f\u0281\u0251\u0303s"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, null, null, 27, 30, 24, 25, 7, 8, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, null, null, 48, 63, 43, 58, 12, 14, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, null, null, 64, 122, 59, 109, 14, 24, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, null, null, 73, 95, 68, 88, 17, 19, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, null, null, 96, 106, 89, 98, 19, 21, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, null, null, 107, 121, 99, 108, 21, 23, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, null, null, 123, 127, 110, 114, 25, 29, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, null, null, 124, 126, 111, 113, 26, 28, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, null, null, 128, 130, 115, 117, 29, 30, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, null, null, 133, 140, 120, 127, 31, 32, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, null, null, 141, 158, 128, 145, 32, 34, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, null, null, 159, 161, 146, 148, 34, 35, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, null, null, 162, 176, 149, 163, 35, 37, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, null, null, 170, 176, 157, 163, 36, 37, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, null, null, 178, 375, 165, 362, 38, 74, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, null, null, 186, 194, 173, 181, 40, 41, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, null, null, 195, 211, 182, 198, 41, 43, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, null, null, 204, 227, 191, 214, 42, 45, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, null, null, 216, 227, 203, 214, 44, 45, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, null, null, 228, 234, 215, 221, 45, 47, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, null, null, 235, 243, 222, 230, 47, 48, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 820203855428083856, 16279894764651307170, null, null, 252, 280, 239, 267, 50, 55, true, "Atlantic, Pacific and Indian", "Atlantic, Pacific and Indian"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, null, null, 252, 260, 239, 247, 50, 51, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, null, null, 262, 269, 249, 256, 52, 53, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 4553045173532721202, 17291436396596241777, null, null, 274, 287, 261, 274, 54, 56, true, "Indian oceans", "Indian oceans"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, null, null, 281, 293, 268, 280, 55, 60, true, "oceans,[XII]", "oceans,[XII]"], ["parenthesis", "square brackets", 9818235231875948258, "TEXT", "#", 1.0, 329104147687597164, 12284735790511259080, null, null, 288, 293, 275, 280, 57, 60, true, "[XII]", "[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895542235, 10796895691287030884, null, null, 289, 292, 276, 279, 58, 59, true, "XII", "XII"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 14650940714797320124, 6236592394333508229, null, null, 292, 300, 279, 287, 59, 61, true, "] giving", "] giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, null, null, 308, 314, 295, 301, 63, 65, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, null, null, 315, 361, 302, 348, 65, 70, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, null, null, 362, 368, 349, 355, 70, 72, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, null, null, 369, 374, 356, 361, 72, 73, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, null, null, 376, 637, 363, 624, 74, 125, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, null, null, 376, 410, 363, 397, 74, 78, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, null, null, 389, 395, 376, 382, 75, 76, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, null, null, 411, 415, 398, 402, 78, 79, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, null, null, 416, 438, 403, 425, 79, 82, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 79, 80, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 79, 80, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 81, 82, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 81, 82, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, null, null, 439, 445, 426, 432, 82, 84, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, null, null, 446, 451, 433, 438, 84, 85, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 86, 87, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 86, 87, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, null, null, 461, 467, 448, 454, 87, 89, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 92, 93, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 92, 93, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, null, null, 492, 498, 479, 485, 93, 95, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, null, null, 505, 521, 492, 508, 97, 100, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 97, 98, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 97, 98, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, null, null, 515, 521, 502, 508, 99, 100, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, null, null, 522, 528, 509, 515, 100, 102, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, null, null, 541, 558, 528, 545, 105, 108, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 105, 106, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 105, 106, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 107, 108, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 107, 108, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, null, null, 559, 565, 546, 552, 108, 110, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, null, null, 566, 571, 553, 558, 110, 111, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, null, null, 579, 594, 566, 581, 114, 116, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, null, null, 595, 603, 582, 590, 116, 118, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 118, 120, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 118, 120, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, null, null, 619, 625, 606, 612, 120, 122, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, null, null, 626, 636, 613, 623, 122, 124, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, null, null, 638, 961, 625, 948, 125, 183, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, null, null, 642, 659, 629, 646, 126, 128, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, null, null, 660, 667, 647, 654, 128, 129, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, null, null, 668, 676, 655, 663, 129, 131, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, null, null, 677, 682, 664, 669, 131, 132, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, null, null, 683, 689, 670, 676, 132, 134, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 134, 136, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 134, 136, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, null, null, 709, 717, 696, 704, 137, 139, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 139, 141, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 139, 141, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, null, null, 736, 742, 723, 729, 141, 143, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 143, 145, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 143, 145, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 147, 149, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 147, 149, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, null, null, 778, 798, 765, 785, 150, 152, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, null, null, 799, 806, 786, 793, 152, 153, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, null, null, 807, 820, 794, 807, 153, 155, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, null, null, 821, 823, 808, 810, 155, 156, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, null, null, 824, 864, 811, 851, 156, 163, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 156, 158, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 156, 158, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, null, null, 839, 851, 826, 838, 159, 161, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, null, null, 856, 864, 843, 851, 162, 163, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, null, null, 865, 871, 852, 858, 163, 165, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, null, null, 872, 886, 859, 873, 165, 167, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, null, null, 892, 910, 879, 897, 169, 172, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, null, null, 916, 928, 903, 915, 174, 176, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, null, null, 929, 931, 916, 918, 176, 177, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 177, 178, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 177, 178, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 180, 182, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 180, 182, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, null, null, 962, 1384, 949, 1371, 183, 289, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, null, null, 966, 991, 953, 978, 184, 187, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, null, null, 992, 1020, 979, 1007, 187, 194, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, null, null, 998, 1000, 985, 987, 189, 190, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, null, null, 1007, 1019, 994, 1006, 191, 193, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, null, null, 1021, 1025, 1008, 1012, 194, 195, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, null, null, 1028, 1036, 1015, 1023, 196, 197, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, null, null, 1037, 1041, 1024, 1028, 197, 198, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, null, null, 1042, 1044, 1029, 1031, 198, 199, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, null, null, 1045, 1052, 1032, 1039, 199, 206, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, null, null, 1053, 1056, 1040, 1043, 206, 208, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486414, 16516410147586311652, null, null, 1053, 1055, 1040, 1042, 206, 207, true, "km", "km"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235162, 2654033242220620585, null, null, 1055, 1056, 1042, 1043, 207, 208, true, "2", "2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, null, null, 1057, 1072, 1044, 1059, 208, 219, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, null, null, 1058, 1065, 1045, 1052, 209, 216, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, null, null, 1066, 1071, 1053, 1058, 216, 218, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, null, null, 1077, 1081, 1064, 1068, 220, 221, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, null, null, 1084, 1100, 1071, 1087, 222, 224, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, null, null, 1101, 1103, 1088, 1090, 224, 225, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, null, null, 1104, 1108, 1091, 1095, 225, 226, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, null, null, 1109, 1111, 1096, 1098, 226, 228, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, null, null, 1112, 1119, 1099, 1106, 228, 229, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, null, null, 1120, 1122, 1107, 1109, 229, 230, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, null, null, 1123, 1125, 1110, 1112, 230, 231, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, null, null, 1126, 1133, 1113, 1120, 231, 232, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, null, null, 1134, 1145, 1121, 1132, 232, 243, true, "2023.[5][8]", "2023.[5][8]"], ["numval", "year", 9818235231875948258, "TEXT", "#", 1.0, 389609625548777251, 4871157181485963100, null, null, 1134, 1138, 1121, 1125, 232, 236, true, "2023", "2023"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577901, 10796892691399633238, null, null, 1139, 1142, 1126, 1129, 237, 240, true, "[5]", "[5]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235157, 2654033131002543179, null, null, 1140, 1141, 1127, 1128, 238, 239, true, "5", "5"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577838, 10796892702691935623, null, null, 1142, 1145, 1129, 1132, 240, 243, true, "[8]", "[8]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235152, 2654033132467492508, null, null, 1143, 1144, 1130, 1131, 241, 242, true, "8", "8"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, null, null, 1146, 1152, 1133, 1139, 243, 244, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, null, null, 1153, 1155, 1140, 1142, 244, 245, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14782540711164886662, 14111360077134393327, null, null, 1158, 1170, 1145, 1157, 246, 248, true, "unitary semi", "unitary semi"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, null, null, 1166, 1183, 1153, 1170, 247, 250, true, "semi-presidential", "semi-presidential"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9493572096187311884, 17586523526652496832, null, null, 1171, 1192, 1158, 1179, 249, 251, true, "presidential republic", "presidential republic"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, null, null, 1193, 1197, 1180, 1184, 251, 252, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, null, null, 1202, 1209, 1189, 1196, 253, 254, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, null, null, 1210, 1212, 1197, 1199, 254, 255, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, null, null, 1213, 1218, 1200, 1205, 255, 256, true, "Paris", "Paris"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, null, null, 1224, 1233, 1211, 1220, 258, 261, true, "countrys", "country's"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263873511, null, null, 1224, 1231, 1211, 1218, 258, 259, true, "country", "country"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13491731564569135959, 5310634626438687925, null, null, 1232, 1246, 1219, 1233, 260, 263, true, "s largest city", "s largest city"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, null, null, 1251, 1286, 1238, 1273, 264, 269, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, null, null, 1269, 1286, 1256, 1273, 267, 269, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, null, null, 1288, 1311, 1275, 1298, 270, 274, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, null, null, 1312, 1319, 1299, 1306, 274, 275, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, null, null, 1320, 1383, 1307, 1370, 275, 288, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, null, null, 1320, 1329, 1307, 1316, 275, 276, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, null, null, 1331, 1335, 1318, 1322, 277, 278, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, null, null, 1337, 1345, 1324, 1332, 279, 280, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, null, null, 1347, 1352, 1334, 1339, 281, 282, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, null, null, 1354, 1362, 1341, 1349, 283, 284, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, null, null, 1364, 1374, 1351, 1361, 285, 286, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, null, null, 1379, 1383, 1366, 1370, 287, 288, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.93], ["semantic", 9818235231875948258, "TEXT", "#", "text", 0.82]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} +{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "hash": 4522339299074192207, "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, null, null, 0, 188, 0, 188, 0, 40, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, null, null, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, null, null, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, null, null, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301128091, null, null, 24, 33, 24, 33, 5, 6, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587921185, null, null, 34, 41, 34, 41, 6, 7, true, "pairing", "pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, null, null, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, null, null, 45, 53, 45, 53, 8, 11, true, "two-band", "two-band"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206570221100871, 2911818818181444888, null, null, 49, 55, 49, 55, 10, 12, true, "band s", "band s"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, null, null, 54, 60, 54, 60, 11, 14, true, "s-wave", "s-wave"], ["term", "enum-term-mark-2", 4522339299074192207, "TEXT", "#", 1.0, 8560127426779937860, 4026994879422986240, null, null, 56, 66, 56, 66, 13, 16, true, "wave and d", "wave and d"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625633602560, 14144633872330801396, null, null, 56, 60, 56, 60, 13, 14, true, "wave", "wave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, null, null, 65, 71, 65, 71, 15, 18, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5267005535915851615, 13852357345485708038, null, null, 67, 87, 67, 87, 17, 19, true, "wave superconductors", "wave superconductors"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, null, null, 88, 92, 88, 92, 19, 20, true, "with", "with"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, null, null, 93, 96, 93, 96, 20, 23, true, "D4h", "D4h"], ["numval", "ival", 4522339299074192207, "TEXT", "#", 1.0, 17767354399704235156, 8513040951015345484, null, null, 94, 95, 94, 95, 21, 22, true, "4", "4"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 2516792725790519961, 10765065347046652233, null, null, 95, 105, 95, 105, 22, 24, true, "h symmetry", "h symmetry"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, null, null, 106, 113, 106, 113, 24, 26, true, "in both", "in both"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, null, null, 114, 127, 114, 127, 26, 29, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183561878, null, null, 114, 118, 114, 118, 26, 27, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1366921581602115232, 15058186165846257397, null, null, 119, 137, 119, 137, 28, 30, true, "reversal invariant", "reversal invariant"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, null, null, 146, 148, 146, 148, 32, 33, true, "as", "as"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, null, null, 149, 162, 149, 162, 33, 36, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183567675, null, null, 149, 153, 149, 153, 33, 34, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16155708024079339904, 14846007814114510811, null, null, 154, 171, 154, 171, 35, 37, true, "reversal symmetry", "reversal symmetry"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, null, null, 172, 180, 172, 180, 37, 38, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, null, null, 181, 187, 181, 187, 38, 39, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, null, null, 189, 384, 189, 384, 40, 75, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, null, null, 193, 201, 193, 201, 41, 42, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, null, null, 202, 204, 202, 204, 42, 43, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, null, null, 205, 214, 205, 214, 43, 44, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, null, null, 215, 244, 215, 244, 44, 47, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, null, null, 249, 264, 249, 264, 48, 50, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, null, null, 265, 271, 265, 271, 50, 52, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, null, null, 272, 286, 272, 286, 52, 53, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, null, null, 288, 293, 288, 293, 54, 55, true, "nodes", "nodes"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, null, null, 298, 309, 298, 309, 56, 60, true, "(dis)appear", "(dis)appear"], ["parenthesis", "round brackets", 4522339299074192207, "TEXT", "#", 1.0, 329104053577713079, 7302082272979819201, null, null, 298, 303, 298, 303, 56, 59, true, "(dis)", "(dis)"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 12178341415895452094, 8713100074317547395, null, null, 299, 302, 299, 302, 57, 58, true, "dis", "dis"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 16381206574684919940, 8627590102959499799, null, null, 303, 309, 303, 309, 59, 60, true, "appear", "appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, null, null, 311, 316, 311, 316, 61, 62, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, null, null, 322, 327, 322, 327, 64, 65, true, "leave", "leave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, null, null, 328, 341, 328, 341, 65, 68, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4859188827321755536, 9887725278734779219, null, null, 333, 351, 333, 351, 67, 69, true, "symmetry locations", "symmetry locations"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, null, null, 357, 374, 357, 374, 70, 72, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, null, null, 375, 383, 375, 383, 72, 74, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, null, null, 385, 594, 385, 594, 75, 114, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, null, null, 398, 404, 398, 404, 77, 79, true, "in the", "in the"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, null, null, 405, 411, 405, 411, 79, 82, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 3545604367994270661, 11829255560935036292, null, null, 407, 416, 407, 416, 81, 83, true, "wave case", "wave case"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, null, null, 421, 425, 421, 425, 85, 86, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, null, null, 426, 430, 426, 430, 86, 87, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, null, null, 440, 454, 440, 454, 89, 91, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, null, null, 455, 475, 455, 475, 91, 93, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, null, null, 481, 490, 481, 490, 94, 95, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, null, null, 491, 498, 491, 498, 95, 96, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, null, null, 499, 508, 499, 508, 96, 97, true, "increases", "increases"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, null, null, 515, 526, 515, 526, 99, 102, true, "zero-energy", "zero-energy"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1885602650026083434, 12476719833465444023, null, null, 520, 534, 520, 534, 101, 103, true, "energy Andreev", "energy Andreev"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104159325585799, 66191664906118763, null, null, 535, 540, 535, 540, 103, 104, true, "bound", "bound"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433796974, null, null, 541, 547, 541, 547, 104, 105, true, "states", "states"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, null, null, 548, 555, 548, 555, 105, 107, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, null, null, 560, 570, 560, 570, 108, 109, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, null, null, 571, 573, 571, 573, 109, 110, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, null, null, 574, 593, 574, 593, 110, 113, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.87], ["semantic", 4522339299074192207, "TEXT", "#", "text", 0.97]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} diff --git a/tests/data/texts/test_02A_text_01.jsonl b/tests/data/texts/test_02A_text_01.jsonl index cadf9345..fd5d3ffe 100644 --- a/tests/data/texts/test_02A_text_01.jsonl +++ b/tests/data/texts/test_02A_text_01.jsonl @@ -1 +1 @@ -{"applied-models": ["cite", "expression", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "#", "hash": 253473544312511038, "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, null, null, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, null, null, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, null, null, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"applied-models": ["cite", "expression", "language", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "#", "hash": 253473544312511038, "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, null, null, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, null, null, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, null, null, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} diff --git a/tests/test_nlp.py b/tests/test_nlp.py index 001f06a1..cf8a406a 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -429,7 +429,7 @@ def test_04C(): assert res==data -def test_05_to_legacy(): +def test_05A(): model = init_nlp_model("reference;term") From c87de6f3fdc2119baf51de2586a12ab287a15e74 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Wed, 29 Nov 2023 10:59:04 +0100 Subject: [PATCH 17/22] updated the tokenization Signed-off-by: Peter Staar --- deepsearch_glm/nlp_train_reference.py | 34 +- .../tooling/structs/elements/text_element.h | 53 +- src/andromeda/tooling/structs/subjects/base.h | 15 +- .../tooling/structs/tokens/char_constants.h | 2 +- tests/data/docs/1806.02284.nlp.json | 3401 +++++----- tests/data/docs/doc_01.nlp.json | 5589 ++++++++--------- tests/data/texts/references.nlp.jsonl | 4 +- tests/data/texts/semantics.nlp.jsonl | 14 +- tests/data/texts/terms.nlp.jsonl | 4 +- tests/data/texts/test_02A_text_01.jsonl | 2 +- tests/data/texts/test_02B_text_01.jsonl | 2 +- tests/test_nlp.py | 6 +- 12 files changed, 4680 insertions(+), 4446 deletions(-) diff --git a/deepsearch_glm/nlp_train_reference.py b/deepsearch_glm/nlp_train_reference.py index 2630bc6c..f114b78b 100644 --- a/deepsearch_glm/nlp_train_reference.py +++ b/deepsearch_glm/nlp_train_reference.py @@ -91,15 +91,40 @@ def extract_references(filenames, ofile): except: continue - odoc = nlp_model.apply_on_doc(idoc) + if random.random()<0.9: + training_sample = True + else: + training_sample = False + odoc = nlp_model.apply_on_doc(idoc) + + props = pd.DataFrame(odoc["properties"]["data"], + columns=odoc["properties"]["headers"]) + + props_refs = props[props["label"]=="reference"] + #print(props_refs) + refs_hash = list(props_refs["subj_hash"]) + + texts = pd.DataFrame.from_records(odoc["texts"]) + #print(texts) + + refs = pd.merge(props_refs, texts, how='inner', on=['subj_hash']) + #print(refs[refs["confidence"]>0.95][["confidence", "text"]]) + + for i,ref in refs.iterrows(): + + if ref["confidence"]>0.95 and len(ref["text"])>32: + item = {"training-sample": training_sample, "text": ref["text"]} + fw.write(json.dumps(item)+"\n") + + #input("continue ...") + + """ for item in odoc["texts"]: if "properties" not in item: continue - df = pd.DataFrame(item["properties"]["data"], - columns=item["properties"]["headers"]) if (df[df["type"]=="semantic"]["label"]=="reference").bool(): #print(item["text"]) @@ -113,7 +138,8 @@ def extract_references(filenames, ofile): item = {"training-sample": training_sample, "text": item["text"]} fw.write(json.dumps(item)+"\n") - + """ + fw.close() print("#-items: ", total) diff --git a/src/andromeda/tooling/structs/elements/text_element.h b/src/andromeda/tooling/structs/elements/text_element.h index 34771c0c..232eee44 100644 --- a/src/andromeda/tooling/structs/elements/text_element.h +++ b/src/andromeda/tooling/structs/elements/text_element.h @@ -357,14 +357,15 @@ namespace andromeda { std::string tmp = char_tokens.at(j).str(); - if(constants::spaces.count(tmp) or - constants::brackets.count(tmp) or + if(constants::spaces.count(tmp) or + constants::brackets.count(tmp) or constants::punktuation.count(tmp) or - constants::numbers.count(tmp) ) - { - stop = true; - } - + constants::numbers.count(tmp)) + { + stop = true; + } + + if((not stop) or (j-i)==0) { dst += char_tokens.at(j).len(); @@ -378,6 +379,8 @@ namespace andromeda { stop = true; } + + //LOG_S(INFO) << stop << "\t" << tmp << "\t" << ss.str(); } std::string word = ss.str(); @@ -390,6 +393,42 @@ namespace andromeda char_l += dst; } + + // contract all pure numbers (0-9) into integers + auto curr = word_tokens.begin(); + auto prev = word_tokens.begin(); + while(curr != word_tokens.end()) + { + if(curr==word_tokens.begin()) + { + curr++; + } + else + { + auto prev_wrd = prev->get_word(); + auto curr_wrd = curr->get_word(); + + auto prev_char = prev_wrd.back(); + auto curr_char = curr_wrd.back(); + + if('0'<=prev_char and prev_char<='9' and + '0'<=curr_char and curr_char<='9' and + prev->get_rng(1)==curr->get_rng(0)) + { + prev_wrd += curr_wrd; + + word_token token(prev->get_rng(0), prev_wrd); + *prev = token; + + curr = word_tokens.erase(curr); + } + else + { + prev++; + curr++; + } + } + } } void text_element::contract_word_tokens() diff --git a/src/andromeda/tooling/structs/subjects/base.h b/src/andromeda/tooling/structs/subjects/base.h index bf8a0cf1..042a0086 100644 --- a/src/andromeda/tooling/structs/subjects/base.h +++ b/src/andromeda/tooling/structs/subjects/base.h @@ -24,9 +24,10 @@ namespace andromeda const static inline std::string recs_lbl = "records"; const static inline std::string prov_lbl = "prov"; - const static inline std::string hash_lbl = "hash"; - //const static inline std::string text_lbl = "text"; + const static inline std::string subj_hash_lbl = "subj_hash"; + const static inline std::string text_hash_lbl = "text_hash"; // for text + const static inline std::string dloc_lbl = "dloc"; // location in the document const static inline std::string sref_lbl = "sref"; // self-reference via path const static inline std::string jref_lbl = "$ref"; // json-ref convention @@ -34,12 +35,11 @@ namespace andromeda const static inline std::string name_lbl = "name"; const static inline std::string type_lbl = "type"; - const static inline std::string applied_models_lbl = "applied-models"; + const static inline std::string applied_models_lbl = "applied_models"; const static inline std::string text_lbl = "text"; // for text const static inline std::string orig_lbl = "orig"; // for text - const static inline std::string text_hash_lbl = "text-hash"; // for text - + const static inline std::string table_data_lbl = "data"; // for tables and figures const static inline std::string figure_data_lbl = "data"; // for tables and figures @@ -286,7 +286,7 @@ namespace andromeda nlohmann::json result = nlohmann::json::object({}); { - result[hash_lbl] = hash; + result[subj_hash_lbl] = hash; result[dloc_lbl] = dloc; result[sref_lbl] = sref; } @@ -343,7 +343,8 @@ namespace andromeda bool base_subject::_from_json(const nlohmann::json& item) { - hash = item.value(hash_lbl, hash); + hash = item.value(subj_hash_lbl, hash); + dloc = item.value(dloc_lbl, dloc); sref = item.value(sref_lbl, sref); diff --git a/src/andromeda/tooling/structs/tokens/char_constants.h b/src/andromeda/tooling/structs/tokens/char_constants.h index 3fd00215..0427e013 100644 --- a/src/andromeda/tooling/structs/tokens/char_constants.h +++ b/src/andromeda/tooling/structs/tokens/char_constants.h @@ -30,7 +30,7 @@ namespace andromeda "#"}; const std::set constants::special_words={"''"}; - + const std::set constants::abbreviations={"e.g.", "i.e.", "et al.", "etc."}; } diff --git a/tests/data/docs/1806.02284.nlp.json b/tests/data/docs/1806.02284.nlp.json index a8b18208..67011a98 100644 --- a/tests/data/docs/1806.02284.nlp.json +++ b/tests/data/docs/1806.02284.nlp.json @@ -1,6 +1,6 @@ { "_s3_data": {}, - "applied-models": [ + "applied_models": [ "cite", "conn", "expression", @@ -433,7 +433,6 @@ "captions": [ { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/14", - "hash": 16535999405521191333, "orig": "Figure 1: A diagram of the conversion pipeline in the Corpus Conversion Service platform. It consists of 5 components: (1) Parsing of the document and its contained bitmap images, (2) Annotating the text of the parsed documents with layout semantic labels, (3) Training models based on the ground-truth acquired by the annotations, (4) Applying machine learned models on the parsed documents to determine the layout semantic label of each cell and finally (5) Assembling the document into a structured data format (e. g. JSON). The main conversion pipeline is depicted in blue and allows you to process and convert documents at scale into a structured data format. The green and orange sections can be used optionally, in order to process scanned documents (green) or train new models based on human annotation (orange).", "prov": [ { @@ -441,6 +440,7 @@ } ], "sref": "#/figures/0/captions/0", + "subj_hash": 16535999405521191333, "text": "Figure 1: A diagram of the conversion pipeline in the Corpus Conversion Service platform. It consists of 5 components: (1) Parsing of the document and its contained bitmap images, (2) Annotating the text of the parsed documents with layout semantic labels, (3) Training models based on the ground-truth acquired by the annotations, (4) Applying machine learned models on the parsed documents to determine the layout semantic label of each cell and finally (5) Assembling the document into a structured data format (e. g. JSON). The main conversion pipeline is depicted in blue and allows you to process and convert documents at scale into a structured data format. The green and orange sections can be used optionally, in order to process scanned documents (green) or train new models based on human annotation (orange).", "text-hash": 9615465947839001361, "type": "caption" @@ -450,7 +450,6 @@ "created_by": "unknown", "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/0", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -458,13 +457,13 @@ } ], "sref": "#/figures/0", + "subj_hash": 18446744073709551615, "type": "figure" }, { "captions": [ { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/29", - "hash": 9115121388992506886, "orig": "Figure 3: The labelled cells annotated on the title page of a poster abstract about the CCS [11]. Here, the title, authors, affiliation, subtitle, main-text, caption and picture labels are represented respectively as red, green, purple, dark-red, yellow, orange and ivory.", "prov": [ { @@ -472,6 +471,7 @@ } ], "sref": "#/figures/1/captions/0", + "subj_hash": 9115121388992506886, "text": "Figure 3: The labelled cells annotated on the title page of a poster abstract about the CCS [11]. Here, the title, authors, affiliation, subtitle, main-text, caption and picture labels are represented respectively as red, green, purple, dark-red, yellow, orange and ivory.", "text-hash": 17324714532994059892, "type": "caption" @@ -481,7 +481,6 @@ "created_by": "unknown", "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/1", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -489,13 +488,13 @@ } ], "sref": "#/figures/1", + "subj_hash": 18446744073709551615, "type": "figure" }, { "captions": [ { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/24", - "hash": 14775249782836392461, "orig": "Figure 2: The cells obtained for the title page of a poster abstract about the CCS [11] after the parsing stage. During the parsing, we extract all bounding boxes of the text (or cells) in such a way that they all have: (1) a maximum width, (2) are only single line and (3) split into multiple cells in case of listidentifiers, multi-columns or crossing vertical lines (such as in tables).", "prov": [ { @@ -503,6 +502,7 @@ } ], "sref": "#/figures/2/captions/0", + "subj_hash": 14775249782836392461, "text": "Figure 2: The cells obtained for the title page of a poster abstract about the CCS [11] after the parsing stage. During the parsing, we extract all bounding boxes of the text (or cells) in such a way that they all have: (1) a maximum width, (2) are only single line and (3) split into multiple cells in case of listidentifiers, multi-columns or crossing vertical lines (such as in tables).", "text-hash": 6754994759646241897, "type": "caption" @@ -512,7 +512,6 @@ "created_by": "unknown", "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/2", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -520,13 +519,13 @@ } ], "sref": "#/figures/2", + "subj_hash": 18446744073709551615, "type": "figure" }, { "captions": [ { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/41", - "hash": 7479698582664857938, "orig": "Figure 4: The annotation rate of pages for two different collections (Physical Review B and Elsevier papers) as a function of the number of annotated pages. As one can observe, the mean annotation rate is increasing after each training (depicted by a vertical dashed red line). After the first training, the human annotator is presented a pre-annotated page, using the predictions from the latest model. As the predictions become better with increasing size of the ground-truth, less corrections need to be made and hence more pages can be annotated in similar time intervals.", "prov": [ { @@ -534,6 +533,7 @@ } ], "sref": "#/figures/3/captions/0", + "subj_hash": 7479698582664857938, "text": "Figure 4: The annotation rate of pages for two different collections (Physical Review B and Elsevier papers) as a function of the number of annotated pages. As one can observe, the mean annotation rate is increasing after each training (depicted by a vertical dashed red line). After the first training, the human annotator is presented a pre-annotated page, using the predictions from the latest model. As the predictions become better with increasing size of the ground-truth, less corrections need to be made and hence more pages can be annotated in similar time intervals.", "text-hash": 504280783932681152, "type": "caption" @@ -543,7 +543,6 @@ "created_by": "unknown", "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/3", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -551,13 +550,13 @@ } ], "sref": "#/figures/3", + "subj_hash": 18446744073709551615, "type": "figure" }, { "captions": [ { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/46", - "hash": 17801697261174341699, "orig": "Figure 5: A typical image of a parsed PDF page that is fed to the default models. In red, we show the detection of the tables combined with the confidence of the model. The results displayed here originate from the YOLOv2 model.", "prov": [ { @@ -565,6 +564,7 @@ } ], "sref": "#/figures/4/captions/0", + "subj_hash": 17801697261174341699, "text": "Figure 5: A typical image of a parsed PDF page that is fed to the default models. In red, we show the detection of the tables combined with the confidence of the model. The results displayed here originate from the YOLOv2 model.", "text-hash": 8628591081653072559, "type": "caption" @@ -574,7 +574,6 @@ "created_by": "unknown", "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/4", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -582,13 +581,13 @@ } ], "sref": "#/figures/4", + "subj_hash": 18446744073709551615, "type": "figure" }, { "captions": [ { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/80", - "hash": 3206590615695639432, "orig": "Figure 6: Diagram of the architecture of our platform. The architecture is composed from 4 layers: an interface layer with REST-API and frontend, an orchestration layer with a message broker and results backend, a compute layer consisting out of a variable number of asynchronous workers and finally a storage layer providing a NoSQL database and an object store. The NoSQL database stores the queryable meta-data of each file that is stored in the object store.", "prov": [ { @@ -596,6 +595,7 @@ } ], "sref": "#/figures/5/captions/0", + "subj_hash": 3206590615695639432, "text": "Figure 6: Diagram of the architecture of our platform. The architecture is composed from 4 layers: an interface layer with REST-API and frontend, an orchestration layer with a message broker and results backend, a compute layer consisting out of a variable number of asynchronous workers and finally a storage layer providing a NoSQL database and an object store. The NoSQL database stores the queryable meta-data of each file that is stored in the object store.", "text-hash": 4488590919374042342, "type": "paragraph" @@ -605,7 +605,6 @@ "created_by": "unknown", "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/5", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -613,13 +612,13 @@ } ], "sref": "#/figures/5", + "subj_hash": 18446744073709551615, "type": "figure" }, { "captions": [ { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/85", - "hash": 6667504298804810757, "orig": "Figure 7: Evolution of number of users and number of PDF pages on the platform. The jumps in the number of pages originates from big ingestions of documents performed by some users. This proves that the CCS platform is also able to accomodate these short burst of extreme activity.", "prov": [ { @@ -627,6 +626,7 @@ } ], "sref": "#/figures/6/captions/0", + "subj_hash": 6667504298804810757, "text": "Figure 7: Evolution of number of users and number of PDF pages on the platform. The jumps in the number of pages originates from big ingestions of documents performed by some users. This proves that the CCS platform is also able to accomodate these short burst of extreme activity.", "text-hash": 14863303056159196785, "type": "caption" @@ -636,7 +636,6 @@ "created_by": "unknown", "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/6", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -644,13 +643,13 @@ } ], "sref": "#/figures/6", + "subj_hash": 18446744073709551615, "type": "figure" }, { "captions": [ { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/90", - "hash": 16175086861512378818, "orig": "Figure 8: Speedup in the pipeline components as a function of the number of worker nodes (each with four cores, running four local worker processes).", "prov": [ { @@ -658,6 +657,7 @@ } ], "sref": "#/figures/7/captions/0", + "subj_hash": 16175086861512378818, "text": "Figure 8: Speedup in the pipeline components as a function of the number of worker nodes (each with four cores, running four local worker processes).", "text-hash": 9976536719025941296, "type": "caption" @@ -667,7 +667,6 @@ "created_by": "unknown", "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/figures/7", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -675,6 +674,7 @@ } ], "sref": "#/figures/7", + "subj_hash": 18446744073709551615, "type": "figure" } ], @@ -733,7 +733,6 @@ "footnotes": [ { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/0", - "hash": 13109829297289816265, "orig": "Permission to make digital or hard copies of all or part of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for components of this work owned by others than the author(s) must be honored. Abstracting with credit is permitted. To copy otherwise, or republish, to post on servers or to redistribute to lists, requires prior specific permission and/or a fee. Request permissions from permissions@acm.org.", "prov": [ { @@ -741,13 +740,13 @@ } ], "sref": "#/footnotes/0", + "subj_hash": 13109829297289816265, "text": "Permission to make digital or hard copies of all or part of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for components of this work owned by others than the author(s) must be honored. Abstracting with credit is permitted. To copy otherwise, or republish, to post on servers or to redistribute to lists, requires prior specific permission and/or a fee. Request permissions from permissions@acm.org.", "text-hash": 13032800243621120549, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/1", - "hash": 6056950725387475159, "orig": "KDD \u201918, August 19-23, 2018, London, United Kingdom", "prov": [ { @@ -755,13 +754,13 @@ } ], "sref": "#/footnotes/1", + "subj_hash": 6056950725387475159, "text": "KDD \u201918, August 19-23, 2018, London, United Kingdom", "text-hash": 15473297532078357059, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/2", - "hash": 82667377498161992, "orig": "\u00a9 2018 Copyright held by the owner/author(s). Publication rights licensed to ACM. ACM ISBN 978-1-4503-5552-0/18/08...$15.00", "prov": [ { @@ -769,13 +768,13 @@ } ], "sref": "#/footnotes/2", + "subj_hash": 82667377498161992, "text": "\u00a9 2018 Copyright held by the owner/author(s). Publication rights licensed to ACM. ACM ISBN 978-1-4503-5552-0/18/08...$15.00", "text-hash": 3001373187661149606, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/3", - "hash": 4157740687705909538, "orig": "https://doi.org/10.1145/3219819.3219834", "prov": [ { @@ -783,13 +782,13 @@ } ], "sref": "#/footnotes/3", + "subj_hash": 4157740687705909538, "text": "https://doi.org/10.1145/3219819.3219834", "text-hash": 3547103316902677392, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/4", - "hash": 11592315251976452419, "orig": "$^{1}$This number originates from a keynote talk by Phil Ydens, Adobe\u2019s VP Engineering for Document Cloud. A video of the presentation can be found here: https://youtu.be/ 5Axw6OGPYHw", "prov": [ { @@ -797,13 +796,13 @@ } ], "sref": "#/footnotes/4", + "subj_hash": 11592315251976452419, "text": "$^{1}$This number originates from a keynote talk by Phil Ydens, Adobe\u2019s VP Engineering for Document Cloud. A video of the presentation can be found here: https://youtu.be/ 5Axw6OGPYHw", "text-hash": 14549584251446631343, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/5", - "hash": 14606262418347792388, "orig": "$^{2}$This is clearly the case on the popular arXiv scientific online repository: https://arxiv. org/help/stats/2012_by_area/index", "prov": [ { @@ -811,13 +810,13 @@ } ], "sref": "#/footnotes/5", + "subj_hash": 14606262418347792388, "text": "$^{2}$This is clearly the case on the popular arXiv scientific online repository: https://arxiv. org/help/stats/2012_by_area/index", "text-hash": 7221931865252575858, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/6", - "hash": 7599391434737032939, "orig": "$^{3}$https://www.xpdfreader.com", "prov": [ { @@ -825,13 +824,13 @@ } ], "sref": "#/footnotes/6", + "subj_hash": 7599391434737032939, "text": "$^{3}$https://www.xpdfreader.com", "text-hash": 104933780092600391, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/7", - "hash": 9645151231942484724, "orig": "$^{4}$http://tabula.technology/", "prov": [ { @@ -839,13 +838,13 @@ } ], "sref": "#/footnotes/7", + "subj_hash": 9645151231942484724, "text": "$^{4}$http://tabula.technology/", "text-hash": 11894228156061308002, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/8", - "hash": 4601317523235901886, "orig": "$^{5}$https://www.abbyy.com/", "prov": [ { @@ -853,13 +852,13 @@ } ], "sref": "#/footnotes/8", + "subj_hash": 4601317523235901886, "text": "$^{5}$https://www.abbyy.com/", "text-hash": 3391629868238619420, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/9", - "hash": 1678429643964197526, "orig": "$^{6}$https://www.nuance.com/", "prov": [ { @@ -867,13 +866,13 @@ } ], "sref": "#/footnotes/9", + "subj_hash": 1678429643964197526, "text": "$^{6}$https://www.nuance.com/", "text-hash": 1693441792396921860, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/10", - "hash": 9599864648545137978, "orig": "$^{7}$https://www.ibm.com/us-en/marketplace/data-capture-and-imaging", "prov": [ { @@ -881,13 +880,13 @@ } ], "sref": "#/footnotes/10", + "subj_hash": 9599864648545137978, "text": "$^{7}$https://www.ibm.com/us-en/marketplace/data-capture-and-imaging", "text-hash": 11939931591922575256, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/11", - "hash": 11599600757439696813, "orig": "$^{8}$a line of text might be printed character-by-character, word-by-word or the entire text snippet.", "prov": [ { @@ -895,13 +894,13 @@ } ], "sref": "#/footnotes/11", + "subj_hash": 11599600757439696813, "text": "$^{8}$a line of text might be printed character-by-character, word-by-word or the entire text snippet.", "text-hash": 14551310605717713161, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/12", - "hash": 8672351490975826115, "orig": "$^{9}$http://qpdf.sourceforge.net/", "prov": [ { @@ -909,13 +908,13 @@ } ], "sref": "#/footnotes/12", + "subj_hash": 8672351490975826115, "text": "$^{9}$http://qpdf.sourceforge.net/", "text-hash": 17478669388996915759, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/13", - "hash": 13163501967272675186, "orig": "$^{10}$It is important to notice that there is no restriction on the number of labels nor the semantic meaning of these labels. The only limitation one has is that the set of semantic labels needs to be consistent across the dataset, but this is evidently true for any type of ML algorithm.", "prov": [ { @@ -923,13 +922,13 @@ } ], "sref": "#/footnotes/13", + "subj_hash": 13163501967272675186, "text": "$^{10}$It is important to notice that there is no restriction on the number of labels nor the semantic meaning of these labels. The only limitation one has is that the set of semantic labels needs to be consistent across the dataset, but this is evidently true for any type of ML algorithm.", "text-hash": 13266614683838167520, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/14", - "hash": 16307739621375260129, "orig": "$^{11}$All the data is coming from the bulk data download https://arxiv.org/help/bulk_data_s3", "prov": [ { @@ -937,13 +936,13 @@ } ], "sref": "#/footnotes/14", + "subj_hash": 16307739621375260129, "text": "$^{11}$All the data is coming from the bulk data download https://arxiv.org/help/bulk_data_s3", "text-hash": 10131428201408538445, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/15", - "hash": 16584453941359713372, "orig": "$^{12}$https://journals.aps.org/prb", "prov": [ { @@ -951,13 +950,13 @@ } ], "sref": "#/footnotes/15", + "subj_hash": 16584453941359713372, "text": "$^{12}$https://journals.aps.org/prb", "text-hash": 9846388834475228858, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/16", - "hash": 7152618592130781617, "orig": "$^{13}$https://www.openapis.org/", "prov": [ { @@ -965,13 +964,13 @@ } ], "sref": "#/footnotes/16", + "subj_hash": 7152618592130781617, "text": "$^{13}$https://www.openapis.org/", "text-hash": 831347610428179229, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/17", - "hash": 6593099618554401757, "orig": "$^{14}$https://www.rabbitmq.com/", "prov": [ { @@ -979,13 +978,13 @@ } ], "sref": "#/footnotes/17", + "subj_hash": 6593099618554401757, "text": "$^{14}$https://www.rabbitmq.com/", "text-hash": 15235037228412732729, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/18", - "hash": 7200807455610600839, "orig": "$^{15}$https://www.redis.io/", "prov": [ { @@ -993,13 +992,13 @@ } ], "sref": "#/footnotes/18", + "subj_hash": 7200807455610600839, "text": "$^{15}$https://www.redis.io/", "text-hash": 782710111840296691, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/19", - "hash": 1602196689966359724, "orig": "$^{16}$http://www.celeryproject.org/", "prov": [ { @@ -1007,13 +1006,13 @@ } ], "sref": "#/footnotes/19", + "subj_hash": 1602196689966359724, "text": "$^{16}$http://www.celeryproject.org/", "text-hash": 1778492971410642442, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/20", - "hash": 4503261997707320357, "orig": "$^{17}$https://www.mongodb.com/", "prov": [ { @@ -1021,13 +1020,13 @@ } ], "sref": "#/footnotes/20", + "subj_hash": 4503261997707320357, "text": "$^{17}$https://www.mongodb.com/", "text-hash": 3489272016069066385, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/21", - "hash": 2838531283607966593, "orig": "$^{18}$https://kubernetes.io/", "prov": [ { @@ -1035,13 +1034,13 @@ } ], "sref": "#/footnotes/21", + "subj_hash": 2838531283607966593, "text": "$^{18}$https://kubernetes.io/", "text-hash": 5145030134774826221, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/22", - "hash": 3398848297472714606, "orig": "$^{19}$ibm.biz/privatecloud", "prov": [ { @@ -1049,13 +1048,13 @@ } ], "sref": "#/footnotes/22", + "subj_hash": 3398848297472714606, "text": "$^{19}$ibm.biz/privatecloud", "text-hash": 4585077909629360588, "type": "footnote" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/footnotes/23", - "hash": 6724984968154270143, "orig": "$^{20}$We don\u2019t show the number of documents, since the number of pages in a document can range from 1 to well above 1000. Consequently, the number of pages is a more robust metric to measure the scaling with regard to the corpus size.", "prov": [ { @@ -1063,12 +1062,12 @@ } ], "sref": "#/footnotes/23", + "subj_hash": 6724984968154270143, "text": "$^{20}$We don\u2019t show the number of documents, since the number of pages in a document can range from 1 to well above 1000. Consequently, the number of pages is a more robust metric to measure the scaling with regard to the corpus size.", "text-hash": 14814952417700014875, "type": "footnote" } ], - "hash": 18446744073709551615, "instances": { "data": [ [ @@ -1423,7 +1422,7 @@ 92, 96, 18, - 22, + 21, true, "[11]", "[11]" @@ -1444,7 +1443,7 @@ 93, 95, 19, - 21, + 20, true, "11", "11" @@ -1486,7 +1485,7 @@ 83, 87, 17, - 21, + 20, true, "[11]", "[11]" @@ -1507,7 +1506,7 @@ 84, 86, 18, - 20, + 19, true, "11", "11" @@ -1527,8 +1526,8 @@ 185, 175, 185, - 38, - 42, + 37, + 41, true, "(or cells)", "(or cells)" @@ -1548,8 +1547,8 @@ 223, 220, 223, - 51, - 54, + 50, + 53, true, "(1)", "(1)" @@ -1569,8 +1568,8 @@ 222, 221, 222, + 51, 52, - 53, true, "1", "1" @@ -1590,8 +1589,8 @@ 244, 241, 244, - 58, - 61, + 57, + 60, true, "(2)", "(2)" @@ -1611,8 +1610,8 @@ 243, 242, 243, + 58, 59, - 60, true, "2", "2" @@ -1632,8 +1631,8 @@ 273, 270, 273, - 66, - 69, + 65, + 68, true, "(3)", "(3)" @@ -1653,8 +1652,8 @@ 272, 271, 272, + 66, 67, - 68, true, "3", "3" @@ -1674,8 +1673,8 @@ 388, 369, 388, - 85, - 91, + 84, + 90, true, "(such as in tables)", "(such as in tables)" @@ -1948,7 +1947,7 @@ 128, 133, 28, - 33, + 29, true, "25000", "25000" @@ -1968,8 +1967,8 @@ 170, 160, 170, + 34, 38, - 42, true, "(per page)", "(per page)" @@ -1989,8 +1988,8 @@ 228, 195, 223, - 45, - 54, + 41, + 50, true, "(Recall= \u211b and Precision= \ud835\udcab)", "(Recall= \u211b and Precision= \ud835\udcab)" @@ -2010,8 +2009,8 @@ 250, 241, 245, - 57, - 61, + 53, + 54, true, "5000", "5000" @@ -2031,8 +2030,8 @@ 336, 330, 331, - 76, - 77, + 69, + 70, true, "1", "1" @@ -2052,8 +2051,8 @@ 425, 419, 420, - 95, - 96, + 88, + 89, true, "8", "8" @@ -2073,8 +2072,8 @@ 456, 448, 451, - 102, - 105, + 95, + 96, true, "100", "100" @@ -2116,7 +2115,7 @@ 184, 187, 30, - 33, + 31, true, "400", "400" @@ -2128,19 +2127,19 @@ "TEXT", "#/texts/0", 1.0, - 14650440320693190822, - 16777106899184965109, + 15358376557624922247, + 16767804341034909078, null, null, 0, - 8, + 10, 0, - 8, + 10, 0, - 4, + 3, true, - "arXiv:18", - "arXiv:18" + "arXiv:1806", + "arXiv:1806" ], [ "reference", @@ -2157,12 +2156,54 @@ 22, 20, 22, - 15, - 16, + 8, + 9, true, "cs", "cs" ], + [ + "reference", + "title", + 7377574370756688828, + "TEXT", + "#/texts/0", + 1.0, + 15441160910541480776, + 218888595256728797, + null, + null, + 23, + 25, + 23, + 25, + 10, + 11, + true, + "DL", + "DL" + ], + [ + "reference", + "title", + 7377574370756688828, + "TEXT", + "#/texts/0", + 1.0, + 2633454640929888599, + 16744695038281265103, + null, + null, + 27, + 38, + 27, + 38, + 12, + 15, + true, + "24 May 2018", + "24 May 2018" + ], [ "reference", "title", @@ -4762,7 +4803,7 @@ 1197, 1398, 220, - 260, + 259, true, "This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output.", "This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output." @@ -5140,7 +5181,7 @@ 1339, 1342, 248, - 251, + 250, true, "99%", "99%" @@ -5161,7 +5202,7 @@ 1339, 1341, 248, - 250, + 249, true, "99", "99" @@ -5181,8 +5222,8 @@ 1347, 1343, 1347, + 250, 251, - 252, true, "with", "with" @@ -5202,8 +5243,8 @@ 1354, 1348, 1354, + 251, 252, - 253, true, "regard", "regard" @@ -5223,8 +5264,8 @@ 1357, 1355, 1357, + 252, 253, - 254, true, "to", "to" @@ -5244,8 +5285,8 @@ 1365, 1358, 1365, + 253, 254, - 255, true, "content", "content" @@ -5265,8 +5306,8 @@ 1376, 1366, 1376, + 254, 255, - 256, true, "conversion", "conversion" @@ -5286,8 +5327,8 @@ 1379, 1377, 1379, + 255, 256, - 257, true, "to", "to" @@ -5307,8 +5348,8 @@ 1397, 1380, 1397, - 257, - 259, + 256, + 258, true, "structured output", "structured output" @@ -5328,8 +5369,8 @@ 1554, 1399, 1554, - 260, - 286, + 259, + 283, true, "The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements." @@ -5349,8 +5390,8 @@ 1415, 1403, 1415, - 261, - 263, + 260, + 262, true, "CCS platform", "CCS platform" @@ -5370,8 +5411,8 @@ 1437, 1416, 1437, - 263, - 266, + 262, + 265, true, "is currently deployed", "is currently deployed" @@ -5391,8 +5432,8 @@ 1440, 1438, 1440, + 265, 266, - 267, true, "on", "on" @@ -5412,8 +5453,8 @@ 1468, 1441, 1468, - 267, - 270, + 266, + 269, true, "IBM internal infrastructure", "IBM internal infrastructure" @@ -5433,8 +5474,8 @@ 1480, 1473, 1480, + 270, 271, - 272, true, "serving", "serving" @@ -5454,8 +5495,8 @@ 1490, 1486, 1490, + 272, 273, - 274, true, "than", "than" @@ -5475,8 +5516,8 @@ 1494, 1491, 1494, + 273, 274, - 277, true, "250", "250" @@ -5496,8 +5537,8 @@ 1507, 1495, 1507, - 277, - 279, + 274, + 276, true, "active users", "active users" @@ -5517,8 +5558,8 @@ 1511, 1508, 1511, - 279, - 280, + 276, + 277, true, "for", "for" @@ -5538,8 +5579,8 @@ 1533, 1512, 1533, + 277, 280, - 283, true, "knowledge-engineering", "knowledge-engineering" @@ -5559,8 +5600,8 @@ 1521, 1512, 1521, - 280, - 281, + 277, + 278, true, "knowledge", "knowledge" @@ -5580,8 +5621,8 @@ 1553, 1522, 1553, + 279, 282, - 285, true, "engineering project engagements", "engineering project engagements" @@ -5672,7 +5713,7 @@ ], [ "reference", - "title", + "date", 11222145795862225841, "TEXT", "#/texts/10", @@ -5686,7 +5727,7 @@ 62, 66, 14, - 18, + 15, true, "2018", "2018" @@ -5706,8 +5747,8 @@ 151, 68, 151, - 19, - 32, + 16, + 29, true, "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale", "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale" @@ -5727,8 +5768,8 @@ 247, 154, 247, - 34, - 54, + 31, + 49, true, "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining", "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining" @@ -5740,19 +5781,19 @@ "TEXT", "#/texts/10", 1.0, - 7560959377963587448, - 13397916472410285088, + 17017808558592810577, + 1917644983122671206, null, null, 249, - 259, + 267, 249, - 259, - 55, - 59, + 267, + 50, + 56, true, - "August 19-", - "August 19-" + "August 19-23, 2018", + "August 19-23, 2018" ], [ "reference", @@ -5769,8 +5810,8 @@ 291, 269, 291, - 67, - 71, + 57, + 61, true, "London, United Kingdom", "London, United Kingdom" @@ -5790,8 +5831,8 @@ 315, 298, 315, - 74, - 80, + 64, + 70, true, "New York, NY, USA", "New York, NY, USA" @@ -5811,12 +5852,33 @@ 344, 326, 344, - 84, - 94, + 74, + 83, true, "https://doi.org/10", "https://doi.org/10" ], + [ + "reference", + "url", + 11222145795862225841, + "TEXT", + "#/texts/10", + 1.0, + 7680709109455866852, + 9531684221895358060, + null, + null, + 346, + 366, + 346, + 366, + 84, + 89, + true, + "1145/3219819.3219834", + "1145/3219819.3219834" + ], [ "sentence", "", @@ -31264,7 +31326,7 @@ 338, 445, 68, - 99, + 98, true, "Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$.", "Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$." @@ -31537,7 +31599,7 @@ 434, 444, 90, - 98, + 97, true, "etc^{10}", "etc$^{10}$" @@ -31558,7 +31620,7 @@ 440, 442, 94, - 96, + 95, true, "10", "10" @@ -31578,8 +31640,8 @@ 532, 446, 532, - 99, - 115, + 98, + 114, true, "In the annotator tool, each layout semantic label is visually represented by a colour.", "In the annotator tool, each layout semantic label is visually represented by a colour." @@ -31599,8 +31661,8 @@ 452, 446, 452, - 99, - 101, + 98, + 100, true, "In the", "In the" @@ -31620,8 +31682,8 @@ 467, 453, 467, - 101, - 103, + 100, + 102, true, "annotator tool", "annotator tool" @@ -31641,8 +31703,8 @@ 495, 474, 495, - 105, - 108, + 104, + 107, true, "layout semantic label", "layout semantic label" @@ -31662,8 +31724,8 @@ 519, 496, 519, - 108, - 111, + 107, + 110, true, "is visually represented", "is visually represented" @@ -31683,8 +31745,8 @@ 524, 520, 524, - 111, - 113, + 110, + 112, true, "by a", "by a" @@ -31704,8 +31766,8 @@ 531, 525, 531, + 112, 113, - 114, true, "colour", "colour" @@ -31725,8 +31787,8 @@ 675, 533, 675, - 115, - 145, + 114, + 144, true, "By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3.", "By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3." @@ -31746,8 +31808,8 @@ 535, 533, 535, + 114, 115, - 116, true, "By", "By" @@ -31767,8 +31829,8 @@ 545, 536, 545, + 115, 116, - 117, true, "assigning", "assigning" @@ -31788,8 +31850,8 @@ 554, 548, 554, + 117, 118, - 119, true, "colour", "colour" @@ -31809,8 +31871,8 @@ 562, 555, 562, - 119, - 121, + 118, + 120, true, "to each", "to each" @@ -31830,8 +31892,8 @@ 577, 563, 577, - 121, - 123, + 120, + 122, true, "semantic label", "semantic label" @@ -31851,8 +31913,8 @@ 587, 583, 587, + 124, 125, - 126, true, "task", "task" @@ -31872,8 +31934,8 @@ 590, 588, 590, + 125, 126, - 127, true, "of", "of" @@ -31893,8 +31955,8 @@ 610, 591, 610, - 127, - 129, + 126, + 128, true, "semantic annotation", "semantic annotation" @@ -31914,8 +31976,8 @@ 624, 611, 624, - 129, - 131, + 128, + 130, true, "is translated", "is translated" @@ -31935,8 +31997,8 @@ 631, 625, 631, - 131, - 133, + 130, + 132, true, "into a", "into a" @@ -31956,8 +32018,8 @@ 646, 632, 646, - 133, - 136, + 132, + 135, true, "colouring-task", "colouring-task" @@ -31977,8 +32039,8 @@ 641, 632, 641, + 132, 133, - 134, true, "colouring", "colouring" @@ -31998,8 +32060,8 @@ 646, 642, 646, + 134, 135, - 136, true, "task", "task" @@ -32019,8 +32081,8 @@ 650, 648, 650, + 136, 137, - 138, true, "as", "as" @@ -32040,8 +32102,8 @@ 662, 651, 662, - 138, - 141, + 137, + 140, true, "can be seen", "can be seen" @@ -32061,8 +32123,8 @@ 665, 663, 665, + 140, 141, - 142, true, "in", "in" @@ -32082,8 +32144,8 @@ 672, 666, 672, + 141, 142, - 143, true, "Figure", "Figure" @@ -32103,8 +32165,8 @@ 674, 673, 674, + 142, 143, - 144, true, "3", "3" @@ -32124,8 +32186,8 @@ 766, 676, 766, - 145, - 162, + 144, + 161, true, "Since humans are very efficient in visual recognition, this task comes very natural to us.", "Since humans are very efficient in visual recognition, this task comes very natural to us." @@ -32145,8 +32207,8 @@ 681, 676, 681, + 144, 145, - 146, true, "Since", "Since" @@ -32166,8 +32228,8 @@ 688, 682, 688, + 145, 146, - 147, true, "humans", "humans" @@ -32187,8 +32249,8 @@ 697, 689, 697, - 147, - 149, + 146, + 148, true, "are very", "are very" @@ -32208,8 +32270,8 @@ 710, 698, 710, - 149, - 151, + 148, + 150, true, "efficient in", "efficient in" @@ -32229,8 +32291,8 @@ 729, 711, 729, - 151, - 153, + 150, + 152, true, "visual recognition", "visual recognition" @@ -32250,8 +32312,8 @@ 740, 736, 740, + 154, 155, - 156, true, "task", "task" @@ -32271,8 +32333,8 @@ 751, 741, 751, - 156, - 158, + 155, + 157, true, "comes very", "comes very" @@ -32292,8 +32354,8 @@ 762, 760, 762, + 158, 159, - 160, true, "to", "to" @@ -32313,8 +32375,8 @@ 919, 767, 919, - 162, - 189, + 161, + 187, true, "The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns.", "The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns." @@ -32334,8 +32396,8 @@ 784, 771, 784, - 163, - 165, + 162, + 164, true, "required time", "required time" @@ -32355,8 +32417,8 @@ 802, 785, 802, - 165, - 168, + 164, + 167, true, "spent to annotate", "spent to annotate" @@ -32376,8 +32438,8 @@ 793, 791, 793, + 165, 166, - 167, true, "to", "to" @@ -32397,8 +32459,8 @@ 816, 805, 816, - 169, - 171, + 168, + 170, true, "single page", "single page" @@ -32418,8 +32480,8 @@ 825, 817, 825, + 170, 171, - 172, true, "starting", "starting" @@ -32439,8 +32501,8 @@ 834, 826, 834, - 172, - 174, + 171, + 173, true, "from the", "from the" @@ -32460,8 +32522,8 @@ 849, 835, 849, - 174, - 176, + 173, + 175, true, "parsing output", "parsing output" @@ -32481,8 +32543,8 @@ 870, 850, 870, - 176, - 180, + 175, + 179, true, "has shown to average", "has shown to average" @@ -32502,8 +32564,8 @@ 862, 860, 862, + 177, 178, - 179, true, "to", "to" @@ -32523,8 +32585,8 @@ 873, 871, 873, + 179, 180, - 181, true, "at", "at" @@ -32544,8 +32606,8 @@ 876, 874, 876, + 180, 181, - 183, true, "30", "30" @@ -32565,8 +32627,8 @@ 884, 877, 884, - 183, - 184, + 181, + 182, true, "seconds", "seconds" @@ -32586,8 +32648,8 @@ 889, 885, 889, - 184, - 185, + 182, + 183, true, "over", "over" @@ -32607,8 +32669,8 @@ 918, 890, 918, - 185, - 188, + 183, + 186, true, "various annotation campaigns", "various annotation campaigns" @@ -36388,7 +36450,7 @@ 446, 539, 85, - 106, + 105, true, "The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection.", "The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection." @@ -36577,7 +36639,7 @@ 508, 510, 98, - 100, + 99, true, "10", "10" @@ -36597,8 +36659,8 @@ 514, 511, 514, + 99, 100, - 101, true, "for", "for" @@ -36618,8 +36680,8 @@ 527, 515, 527, - 101, - 104, + 100, + 103, true, "ground-truth", "ground-truth" @@ -36639,8 +36701,8 @@ 521, 515, 521, + 100, 101, - 102, true, "ground", "ground" @@ -36660,8 +36722,8 @@ 538, 522, 538, - 103, - 105, + 102, + 104, true, "truth collection", "truth collection" @@ -40651,7 +40713,7 @@ 357, 566, 69, - 125, + 124, true, "Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7].", "Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7]." @@ -40966,7 +41028,7 @@ 500, 510, 100, - 108, + 107, true, "[5, 6, 10]", "[5, 6, 10]" @@ -41029,7 +41091,7 @@ 507, 509, 105, - 107, + 106, true, "10", "10" @@ -41049,8 +41111,8 @@ 535, 516, 535, - 110, - 113, + 109, + 112, true, "YOLO architecture [", "YOLO architecture [" @@ -41070,8 +41132,8 @@ 540, 534, 540, - 112, - 117, + 111, + 116, true, "[8, 9]", "[8, 9]" @@ -41091,8 +41153,8 @@ 536, 535, 536, + 112, 113, - 114, true, "8", "8" @@ -41112,8 +41174,8 @@ 539, 538, 539, + 114, 115, - 116, true, "9", "9" @@ -41133,8 +41195,8 @@ 561, 549, 561, - 119, - 121, + 118, + 120, true, "SSD networks", "SSD networks" @@ -41154,8 +41216,8 @@ 565, 562, 565, - 121, - 124, + 120, + 123, true, "[7]", "[7]" @@ -41175,8 +41237,8 @@ 565, 562, 565, - 121, - 124, + 120, + 123, true, "[7]", "[7]" @@ -41196,8 +41258,8 @@ 564, 563, 564, + 121, 122, - 123, true, "7", "7" @@ -41217,8 +41279,8 @@ 715, 567, 715, - 125, - 160, + 124, + 158, true, "On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions.", "On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions." @@ -41238,8 +41300,8 @@ 569, 567, 569, + 124, 125, - 126, true, "On", "On" @@ -41259,8 +41321,8 @@ 582, 574, 582, + 126, 127, - 128, true, "platform", "platform" @@ -41280,8 +41342,8 @@ 591, 587, 591, + 129, 130, - 131, true, "have", "have" @@ -41301,8 +41363,8 @@ 608, 596, 608, - 132, - 137, + 131, + 136, true, "Faster-R-CNN", "Faster-R-CNN" @@ -41322,8 +41384,8 @@ 602, 596, 602, + 131, 132, - 133, true, "Faster", "Faster" @@ -41343,8 +41405,8 @@ 608, 605, 608, + 135, 136, - 137, true, "CNN", "CNN" @@ -41364,8 +41426,8 @@ 613, 609, 613, - 137, - 141, + 136, + 139, true, "[10]", "[10]" @@ -41385,8 +41447,8 @@ 613, 609, 613, - 137, - 141, + 136, + 139, true, "[10]", "[10]" @@ -41406,8 +41468,8 @@ 612, 610, 612, + 137, 138, - 140, true, "10", "10" @@ -41427,8 +41489,8 @@ 628, 622, 628, + 141, 143, - 145, true, "YOLOv2", "YOLOv2" @@ -41448,8 +41510,8 @@ 627, 622, 627, - 143, - 144, + 141, + 142, true, "YOLOv", "YOLOv" @@ -41469,8 +41531,8 @@ 628, 627, 628, - 144, - 145, + 142, + 143, true, "2", "2" @@ -41490,8 +41552,8 @@ 632, 629, 632, - 145, - 148, + 143, + 146, true, "[9]", "[9]" @@ -41511,8 +41573,8 @@ 632, 629, 632, - 145, - 148, + 143, + 146, true, "[9]", "[9]" @@ -41532,8 +41594,8 @@ 631, 630, 631, - 146, - 147, + 144, + 145, true, "9", "9" @@ -41553,8 +41615,8 @@ 654, 642, 654, + 147, 149, - 151, true, "available as", "available as" @@ -41574,8 +41636,8 @@ 679, 655, 679, + 149, 151, - 153, true, "individual microservices", "individual microservices" @@ -41595,8 +41657,8 @@ 689, 681, 689, + 152, 154, - 156, true, "both for", "both for" @@ -41616,8 +41678,8 @@ 698, 690, 698, - 156, - 157, + 154, + 155, true, "training", "training" @@ -41637,8 +41699,8 @@ 714, 703, 714, - 158, - 159, + 156, + 157, true, "predictions", "predictions" @@ -42016,7 +42078,7 @@ 0, 78, 0, - 20, + 19, true, "The networks available on our platform have been trained on arXiv data$^{11}$.", "The networks available on our platform have been trained on arXiv data$^{11}$." @@ -42163,7 +42225,7 @@ 66, 77, 11, - 19, + 18, true, "data^{11}", "data$^{11}$" @@ -42184,7 +42246,7 @@ 73, 75, 15, - 17, + 16, true, "11", "11" @@ -42204,8 +42266,8 @@ 96, 82, 96, - 21, - 23, + 20, + 22, true, "have annotated", "have annotated" @@ -42225,8 +42287,8 @@ 102, 97, 102, + 22, 23, - 28, true, "30000", "30000" @@ -42246,8 +42308,8 @@ 112, 103, 112, - 28, - 30, + 23, + 25, true, "PDF pages", "PDF pages" @@ -42267,8 +42329,8 @@ 121, 117, 121, - 31, - 32, + 26, + 27, true, "know", "know" @@ -42415,7 +42477,7 @@ 45, 156, 10, - 43, + 32, true, "From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation.", "From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation." @@ -42457,7 +42519,7 @@ 56, 61, 12, - 17, + 13, true, "30000", "30000" @@ -42477,8 +42539,8 @@ 67, 62, 67, - 17, - 18, + 13, + 14, true, "pages", "pages" @@ -42498,8 +42560,8 @@ 81, 72, 81, - 20, - 22, + 16, + 18, true, "have used", "have used" @@ -42519,8 +42581,8 @@ 87, 82, 87, - 22, - 27, + 18, + 19, true, "25000", "25000" @@ -42540,8 +42602,8 @@ 93, 88, 93, - 27, - 28, + 19, + 20, true, "pages", "pages" @@ -42561,8 +42623,8 @@ 96, 94, 96, - 28, - 29, + 20, + 21, true, "as", "as" @@ -42582,8 +42644,8 @@ 110, 97, 110, - 29, - 31, + 21, + 23, true, "training data", "training data" @@ -42603,8 +42665,8 @@ 119, 115, 119, - 32, - 33, + 24, + 25, true, "kept", "kept" @@ -42624,8 +42686,8 @@ 134, 130, 134, - 35, - 39, + 27, + 28, true, "5000", "5000" @@ -42645,8 +42707,8 @@ 140, 135, 140, - 39, - 40, + 28, + 29, true, "pages", "pages" @@ -42666,8 +42728,8 @@ 144, 141, 144, - 40, - 41, + 29, + 30, true, "for", "for" @@ -42687,8 +42749,8 @@ 155, 145, 155, - 41, - 42, + 30, + 31, true, "evaluation", "evaluation" @@ -42708,8 +42770,8 @@ 337, 157, 337, - 43, - 78, + 32, + 67, true, "Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms.", "Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms." @@ -42729,8 +42791,8 @@ 167, 161, 167, - 44, - 46, + 33, + 35, true, "to the", "to the" @@ -42750,8 +42812,8 @@ 178, 168, 178, - 46, - 48, + 35, + 37, true, "large size", "large size" @@ -42771,8 +42833,8 @@ 185, 179, 185, - 48, - 50, + 37, + 39, true, "of the", "of the" @@ -42792,8 +42854,8 @@ 193, 186, 193, - 50, - 51, + 39, + 40, true, "dataset", "dataset" @@ -42813,8 +42875,8 @@ 220, 198, 220, - 53, - 58, + 42, + 47, true, "did not need to employ", "did not need to employ" @@ -42834,8 +42896,8 @@ 213, 211, 213, - 56, - 57, + 45, + 46, true, "to", "to" @@ -42855,8 +42917,8 @@ 242, 225, 242, - 59, - 62, + 48, + 51, true, "data-augmentation", "data-augmentation" @@ -42876,8 +42938,8 @@ 229, 225, 229, - 59, - 60, + 48, + 49, true, "data", "data" @@ -42897,8 +42959,8 @@ 252, 230, 252, - 61, - 63, + 50, + 52, true, "augmentation technique", "augmentation technique" @@ -42918,8 +42980,8 @@ 270, 260, 270, - 65, - 67, + 54, + 56, true, "is usually", "is usually" @@ -42939,8 +43001,8 @@ 284, 271, 284, - 67, - 69, + 56, + 58, true, "necessary for", "necessary for" @@ -42960,8 +43022,8 @@ 301, 285, 301, - 69, - 72, + 58, + 61, true, "object-detection", "object-detection" @@ -42981,8 +43043,8 @@ 310, 292, 310, - 71, - 74, + 60, + 63, true, "detection or image", "detection or image" @@ -43002,8 +43064,8 @@ 301, 292, 301, - 71, - 72, + 60, + 61, true, "detection", "detection" @@ -43023,8 +43085,8 @@ 325, 305, 325, - 73, - 76, + 62, + 65, true, "image-classification", "image-classification" @@ -43044,8 +43106,8 @@ 310, 305, 310, - 73, - 74, + 62, + 63, true, "image", "image" @@ -43065,8 +43127,8 @@ 336, 311, 336, - 75, - 77, + 64, + 66, true, "classification algorithms", "classification algorithms" @@ -45586,7 +45648,7 @@ 228, 364, 45, - 84, + 76, true, "In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times.", "In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times." @@ -45733,7 +45795,7 @@ 291, 294, 57, - 60, + 58, true, "100", "100" @@ -45753,8 +45815,8 @@ 301, 295, 301, - 60, - 61, + 58, + 59, true, "epochs", "epochs" @@ -45774,8 +45836,8 @@ 307, 303, 307, - 62, - 66, + 60, + 64, true, "ie", "i.e." @@ -45795,8 +45857,8 @@ 317, 312, 317, - 67, - 72, + 65, + 66, true, "25000", "25000" @@ -45816,8 +45878,8 @@ 329, 318, 329, - 72, - 74, + 66, + 68, true, "page images", "page images" @@ -45837,8 +45899,8 @@ 341, 330, 341, - 74, - 77, + 68, + 71, true, "were fed to", "were fed to" @@ -45858,8 +45920,8 @@ 345, 339, 345, - 76, - 78, + 70, + 72, true, "to the", "to the" @@ -45879,8 +45941,8 @@ 353, 346, 353, - 78, - 79, + 72, + 73, true, "network", "network" @@ -45900,8 +45962,8 @@ 357, 354, 357, - 79, - 82, + 73, + 74, true, "100", "100" @@ -45921,8 +45983,8 @@ 363, 358, 363, - 82, - 83, + 74, + 75, true, "times", "times" @@ -45942,8 +46004,8 @@ 587, 365, 587, - 84, - 129, + 76, + 121, true, "We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied.", "We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied." @@ -45963,8 +46025,8 @@ 375, 368, 375, - 85, - 86, + 77, + 78, true, "observe", "observe" @@ -45984,8 +46046,8 @@ 384, 376, 384, - 86, - 88, + 78, + 80, true, "that the", "that the" @@ -46005,8 +46067,8 @@ 398, 385, 398, - 88, - 93, + 80, + 85, true, "out-ofthe-box", "out-ofthe-box" @@ -46026,8 +46088,8 @@ 407, 395, 407, - 92, - 95, + 84, + 87, true, "box Faster R", "box Faster R" @@ -46047,8 +46109,8 @@ 411, 406, 411, - 94, - 97, + 86, + 89, true, "R-CNN", "R-CNN" @@ -46068,8 +46130,8 @@ 411, 408, 411, - 96, - 97, + 88, + 89, true, "CNN", "CNN" @@ -46089,8 +46151,8 @@ 416, 412, 416, - 97, - 98, + 89, + 90, true, "from", "from" @@ -46110,8 +46172,8 @@ 427, 417, 427, - 98, - 99, + 90, + 91, true, "Tensorflow", "Tensorflow" @@ -46131,8 +46193,8 @@ 446, 428, 446, - 99, - 102, + 91, + 94, true, "does not implement", "does not implement" @@ -46152,8 +46214,8 @@ 459, 451, 459, - 103, - 104, + 95, + 96, true, "batching", "batching" @@ -46173,8 +46235,8 @@ 470, 460, 470, - 104, - 106, + 96, + 98, true, "during the", "during the" @@ -46194,8 +46256,8 @@ 485, 471, 485, - 106, - 108, + 98, + 100, true, "training phase", "training phase" @@ -46215,8 +46277,8 @@ 492, 487, 492, - 109, - 110, + 101, + 102, true, "while", "while" @@ -46236,8 +46298,8 @@ 499, 493, 499, - 110, - 112, + 102, + 104, true, "YOLOv2", "YOLOv2" @@ -46257,8 +46319,8 @@ 498, 493, 498, - 110, - 111, + 102, + 103, true, "YOLOv", "YOLOv" @@ -46278,8 +46340,8 @@ 499, 498, 499, - 111, - 112, + 103, + 104, true, "2", "2" @@ -46299,8 +46361,8 @@ 507, 500, 507, - 112, - 113, + 104, + 105, true, "batches", "batches" @@ -46320,8 +46382,8 @@ 509, 508, 509, - 113, - 114, + 105, + 106, true, "8", "8" @@ -46341,8 +46403,8 @@ 516, 510, 516, - 114, - 115, + 106, + 107, true, "images", "images" @@ -46362,8 +46424,8 @@ 521, 517, 521, - 115, - 117, + 107, + 109, true, "at a", "at a" @@ -46383,8 +46445,8 @@ 526, 522, 526, - 117, - 118, + 109, + 110, true, "time", "time" @@ -46404,8 +46466,8 @@ 534, 528, 534, - 119, - 120, + 111, + 112, true, "thanks", "thanks" @@ -46425,8 +46487,8 @@ 540, 535, 540, - 120, - 122, + 112, + 114, true, "to an", "to an" @@ -46446,8 +46508,8 @@ 546, 541, 546, - 122, - 123, + 114, + 115, true, "image", "image" @@ -46467,8 +46529,8 @@ 555, 547, 555, - 123, - 124, + 115, + 116, true, "resizing", "resizing" @@ -46488,8 +46550,8 @@ 586, 562, 586, - 125, - 128, + 117, + 120, true, "is automatically applied", "is automatically applied" @@ -46509,8 +46571,8 @@ 691, 588, 691, - 129, - 151, + 121, + 143, true, "We believe that this is the main origin for the discrepancy of time-to-solution for the training phase.", "We believe that this is the main origin for the discrepancy of time-to-solution for the training phase." @@ -46530,8 +46592,8 @@ 598, 591, 598, - 130, - 131, + 122, + 123, true, "believe", "believe" @@ -46551,8 +46613,8 @@ 608, 599, 608, - 131, - 133, + 123, + 125, true, "that this", "that this" @@ -46572,8 +46634,8 @@ 611, 609, 611, - 133, - 134, + 125, + 126, true, "is", "is" @@ -46593,8 +46655,8 @@ 627, 616, 627, - 135, - 137, + 127, + 129, true, "main origin", "main origin" @@ -46614,8 +46676,8 @@ 635, 628, 635, - 137, - 139, + 129, + 131, true, "for the", "for the" @@ -46635,8 +46697,8 @@ 647, 636, 647, - 139, - 140, + 131, + 132, true, "discrepancy", "discrepancy" @@ -46656,8 +46718,8 @@ 650, 648, 650, - 140, - 141, + 132, + 133, true, "of", "of" @@ -46677,8 +46739,8 @@ 667, 651, 667, - 141, - 146, + 133, + 138, true, "time-to-solution", "time-to-solution" @@ -46698,8 +46760,8 @@ 655, 651, 655, - 141, - 142, + 133, + 134, true, "time", "time" @@ -46719,8 +46781,8 @@ 658, 656, 658, - 143, - 144, + 135, + 136, true, "to", "to" @@ -46740,8 +46802,8 @@ 667, 659, 667, - 145, - 146, + 137, + 138, true, "solution", "solution" @@ -46761,8 +46823,8 @@ 675, 668, 675, - 146, - 148, + 138, + 140, true, "for the", "for the" @@ -46782,8 +46844,8 @@ 690, 676, 690, - 148, - 150, + 140, + 142, true, "training phase", "training phase" @@ -46803,8 +46865,8 @@ 731, 692, 731, + 143, 151, - 159, true, "The same holds true for the prediction.", "The same holds true for the prediction." @@ -46824,8 +46886,8 @@ 706, 701, 706, - 153, - 154, + 145, + 146, true, "holds", "holds" @@ -46845,8 +46907,8 @@ 715, 707, 715, - 154, - 156, + 146, + 148, true, "true for", "true for" @@ -46866,8 +46928,8 @@ 730, 720, 730, - 157, - 158, + 149, + 150, true, "prediction", "prediction" @@ -46887,8 +46949,8 @@ 913, 732, 911, - 159, - 200, + 151, + 191, true, "Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", "Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node)." @@ -46908,8 +46970,8 @@ 751, 743, 751, - 161, - 163, + 153, + 155, true, "from the", "from the" @@ -46929,8 +46991,8 @@ 757, 752, 757, - 163, - 164, + 155, + 156, true, "point", "point" @@ -46950,8 +47012,8 @@ 760, 758, 760, - 164, - 165, + 156, + 157, true, "of", "of" @@ -46971,8 +47033,8 @@ 765, 761, 765, - 165, - 166, + 157, + 158, true, "view", "view" @@ -46992,8 +47054,8 @@ 772, 766, 772, - 166, - 168, + 158, + 160, true, "of the", "of the" @@ -47013,8 +47075,8 @@ 781, 773, 781, - 168, - 169, + 160, + 161, true, "platform", "platform" @@ -47034,8 +47096,8 @@ 793, 787, 793, - 171, - 173, + 163, + 165, true, "YOLOv2", "YOLOv2" @@ -47055,8 +47117,8 @@ 792, 787, 792, - 171, - 172, + 163, + 164, true, "YOLOv", "YOLOv" @@ -47076,8 +47138,8 @@ 793, 792, 793, - 172, - 173, + 164, + 165, true, "2", "2" @@ -47097,8 +47159,8 @@ 806, 794, 806, - 173, - 174, + 165, + 166, true, "architecture", "architecture" @@ -47118,8 +47180,8 @@ 826, 807, 826, - 174, - 177, + 166, + 169, true, "seems better suited", "seems better suited" @@ -47139,8 +47201,8 @@ 830, 827, 830, - 177, - 178, + 169, + 170, true, "for", "for" @@ -47160,8 +47222,8 @@ 841, 831, 841, - 178, - 179, + 170, + 171, true, "deployment", "deployment" @@ -47181,8 +47243,8 @@ 845, 843, 845, - 180, - 181, + 172, + 173, true, "as", "as" @@ -47202,8 +47264,8 @@ 863, 849, 863, - 182, - 185, + 174, + 177, true, "allows to have", "allows to have" @@ -47223,8 +47285,8 @@ 858, 856, 858, - 183, - 184, + 175, + 176, true, "to", "to" @@ -47244,8 +47306,8 @@ 888, 878, 888, - 188, - 189, + 180, + 181, true, "throughput", "throughput" @@ -47265,33 +47327,12 @@ 912, 889, 910, - 189, - 199, + 181, + 190, true, "(\u2248 10 pages/sec/node)", "(\u2248 10 pages/sec/node)" ], - [ - "verb", - "single-verb", - 18259197018396996238, - "TEXT", - "#/texts/51", - 1.0, - 17767354399704339168, - 12733722225655458138, - null, - null, - 890, - 893, - 890, - 891, - 190, - 191, - true, - "\u2248", - "\u2248" - ], [ "numval", "ival", @@ -47307,8 +47348,8 @@ 896, 892, 894, - 191, - 193, + 183, + 184, true, "10", "10" @@ -47328,8 +47369,8 @@ 902, 895, 900, - 193, - 194, + 184, + 185, true, "pages", "pages" @@ -47349,8 +47390,8 @@ 911, 901, 909, - 195, - 198, + 186, + 189, true, "sec/node", "sec/node" @@ -49450,7 +49491,7 @@ 274, 445, 48, - 88, + 87, true, "For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0.", "For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0." @@ -49870,7 +49911,7 @@ 412, 417, 76, - 81, + 80, true, "98.7%", "98.7%" @@ -49891,7 +49932,7 @@ 412, 416, 76, - 80, + 79, true, "98.7", "98.7" @@ -49911,8 +49952,8 @@ 422, 418, 422, - 81, - 83, + 80, + 82, true, "at a", "at a" @@ -49932,8 +49973,8 @@ 439, 423, 439, - 83, - 85, + 82, + 84, true, "confidence level", "confidence level" @@ -49953,8 +49994,8 @@ 442, 440, 442, + 84, 85, - 86, true, "of", "of" @@ -49974,8 +50015,8 @@ 444, 443, 444, + 85, 86, - 87, true, "0", "0" @@ -49995,8 +50036,8 @@ 447, 446, 447, + 87, 88, - 89, true, "5", "5" @@ -50016,8 +50057,8 @@ 556, 449, 556, - 90, - 111, + 89, + 110, true, "The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers.", "The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers." @@ -50037,8 +50078,8 @@ 461, 453, 461, - 91, - 93, + 90, + 92, true, "Faster R", "Faster R" @@ -50058,8 +50099,8 @@ 465, 460, 465, - 92, - 95, + 91, + 94, true, "R-CNN", "R-CNN" @@ -50079,8 +50120,8 @@ 472, 462, 472, - 94, - 96, + 93, + 95, true, "CNN method", "CNN method" @@ -50100,8 +50141,8 @@ 502, 473, 502, - 96, - 101, + 95, + 100, true, "is also performing quite well", "is also performing quite well" @@ -50121,8 +50162,8 @@ 520, 508, 520, - 103, - 105, + 102, + 104, true, "has slightly", "has slightly" @@ -50142,8 +50183,8 @@ 547, 527, 547, - 106, - 109, + 105, + 108, true, "precision and recall", "precision and recall" @@ -50163,8 +50204,8 @@ 536, 527, 536, + 105, 106, - 107, true, "precision", "precision" @@ -50184,8 +50225,8 @@ 555, 541, 555, - 108, - 110, + 107, + 109, true, "recall numbers", "recall numbers" @@ -50205,8 +50246,8 @@ 667, 557, 667, - 111, - 129, + 110, + 128, true, "We believe this originates from the selective search algorithm which is used to determine regions of interest.", "We believe this originates from the selective search algorithm which is used to determine regions of interest." @@ -50226,8 +50267,8 @@ 567, 560, 567, + 111, 112, - 113, true, "believe", "believe" @@ -50247,8 +50288,8 @@ 583, 573, 583, + 113, 114, - 115, true, "originates", "originates" @@ -50268,8 +50309,8 @@ 592, 584, 592, - 115, - 117, + 114, + 116, true, "from the", "from the" @@ -50289,8 +50330,8 @@ 619, 593, 619, - 117, - 120, + 116, + 119, true, "selective search algorithm", "selective search algorithm" @@ -50310,8 +50351,8 @@ 646, 626, 646, - 121, - 125, + 120, + 124, true, "is used to determine", "is used to determine" @@ -50331,8 +50372,8 @@ 636, 634, 636, + 122, 123, - 124, true, "to", "to" @@ -50352,8 +50393,8 @@ 654, 647, 654, + 124, 125, - 126, true, "regions", "regions" @@ -50373,8 +50414,8 @@ 657, 655, 657, + 125, 126, - 127, true, "of", "of" @@ -50394,8 +50435,8 @@ 666, 658, 666, + 126, 127, - 128, true, "interest", "interest" @@ -50415,8 +50456,8 @@ 773, 668, 773, - 129, - 149, + 128, + 148, true, "The images we feed it are not typical photographic images (made with a camera) but layout visualisations.", "The images we feed it are not typical photographic images (made with a camera) but layout visualisations." @@ -50436,8 +50477,8 @@ 678, 672, 678, + 129, 130, - 131, true, "images", "images" @@ -50457,8 +50498,8 @@ 686, 682, 686, + 131, 132, - 133, true, "feed", "feed" @@ -50478,8 +50519,8 @@ 697, 690, 697, - 134, - 136, + 133, + 135, true, "are not", "are not" @@ -50499,8 +50540,8 @@ 725, 698, 725, - 136, - 139, + 135, + 138, true, "typical photographic images", "typical photographic images" @@ -50520,8 +50561,8 @@ 746, 726, 746, - 139, - 145, + 138, + 144, true, "(made with a camera)", "(made with a camera)" @@ -50541,8 +50582,8 @@ 731, 727, 731, + 139, 140, - 141, true, "made", "made" @@ -50562,8 +50603,8 @@ 738, 732, 738, - 141, - 143, + 140, + 142, true, "with a", "with a" @@ -50583,8 +50624,8 @@ 745, 739, 745, + 142, 143, - 144, true, "camera", "camera" @@ -50604,8 +50645,8 @@ 772, 751, 772, - 146, - 148, + 145, + 147, true, "layout visualisations", "layout visualisations" @@ -50625,8 +50666,8 @@ 867, 774, 867, - 149, - 168, + 148, + 167, true, "The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", "The selective search algorithm in Faster R-CNN might not be optimal for such type of objects." @@ -50646,8 +50687,8 @@ 804, 778, 804, - 150, - 153, + 149, + 152, true, "selective search algorithm", "selective search algorithm" @@ -50667,8 +50708,8 @@ 807, 805, 807, + 152, 153, - 154, true, "in", "in" @@ -50688,8 +50729,8 @@ 816, 808, 816, - 154, - 156, + 153, + 155, true, "Faster R", "Faster R" @@ -50709,8 +50750,8 @@ 820, 815, 820, - 155, - 158, + 154, + 157, true, "R-CNN", "R-CNN" @@ -50730,8 +50771,8 @@ 820, 817, 820, + 156, 157, - 158, true, "CNN", "CNN" @@ -50751,8 +50792,8 @@ 833, 831, 833, + 159, 160, - 161, true, "be", "be" @@ -50772,8 +50813,8 @@ 845, 834, 845, - 161, - 163, + 160, + 162, true, "optimal for", "optimal for" @@ -50793,8 +50834,8 @@ 855, 846, 855, - 163, - 165, + 162, + 164, true, "such type", "such type" @@ -50814,8 +50855,8 @@ 858, 856, 858, + 164, 165, - 166, true, "of", "of" @@ -50835,8 +50876,8 @@ 866, 859, 866, + 165, 166, - 167, true, "objects", "objects" @@ -52201,7 +52242,7 @@ 188, 334, 36, - 70, + 66, true, "In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation.", "In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation." @@ -52369,7 +52410,7 @@ 251, 258, 47, - 54, + 50, true, "100-400", "100-400" @@ -52389,8 +52430,8 @@ 268, 259, 268, - 54, - 55, + 50, + 51, true, "annotated", "annotated" @@ -52410,8 +52451,8 @@ 274, 269, 274, - 55, - 56, + 51, + 52, true, "pages", "pages" @@ -52431,8 +52472,8 @@ 293, 283, 293, - 59, - 60, + 55, + 56, true, "equivalent", "equivalent" @@ -52452,8 +52493,8 @@ 298, 294, 298, - 60, - 62, + 56, + 58, true, "of a", "of a" @@ -52473,8 +52514,8 @@ 305, 299, 305, - 62, - 63, + 58, + 59, true, "couple", "couple" @@ -52494,8 +52535,8 @@ 308, 306, 308, - 63, - 64, + 59, + 60, true, "of", "of" @@ -52515,8 +52556,8 @@ 318, 309, 318, - 64, - 67, + 60, + 63, true, "man-hours", "man-hours" @@ -52536,8 +52577,8 @@ 312, 309, 312, - 64, - 65, + 60, + 61, true, "man", "man" @@ -52557,8 +52598,8 @@ 318, 313, 318, - 66, - 67, + 62, + 63, true, "hours", "hours" @@ -52578,8 +52619,8 @@ 322, 319, 322, - 67, - 68, + 63, + 64, true, "for", "for" @@ -52599,8 +52640,8 @@ 333, 323, 333, - 68, - 69, + 64, + 65, true, "annotation", "annotation" @@ -52620,8 +52661,8 @@ 406, 335, 406, - 70, - 83, + 66, + 79, true, "Second it must be robust against extreme imbalance of the labeled data.", "Second it must be robust against extreme imbalance of the labeled data." @@ -52641,8 +52682,8 @@ 341, 335, 341, - 70, - 71, + 66, + 67, true, "Second", "Second" @@ -52662,8 +52703,8 @@ 352, 345, 352, - 72, - 74, + 68, + 70, true, "must be", "must be" @@ -52683,8 +52724,8 @@ 367, 353, 367, - 74, - 76, + 70, + 72, true, "robust against", "robust against" @@ -52704,8 +52745,8 @@ 385, 368, 385, - 76, - 78, + 72, + 74, true, "extreme imbalance", "extreme imbalance" @@ -52725,8 +52766,8 @@ 392, 386, 392, - 78, - 80, + 74, + 76, true, "of the", "of the" @@ -52746,8 +52787,8 @@ 400, 393, 400, - 80, - 81, + 76, + 77, true, "labeled", "labeled" @@ -52767,8 +52808,8 @@ 405, 401, 405, - 81, - 82, + 77, + 78, true, "data", "data" @@ -52788,8 +52829,8 @@ 510, 407, 510, - 83, - 105, + 79, + 101, true, "It is clear that cells of the label Title will be much more uncommon than cells with the label of Text.", "It is clear that cells of the label Title will be much more uncommon than cells with the label of Text." @@ -52809,8 +52850,8 @@ 412, 410, 412, - 84, - 85, + 80, + 81, true, "is", "is" @@ -52830,8 +52871,8 @@ 423, 413, 423, - 85, - 87, + 81, + 83, true, "clear that", "clear that" @@ -52851,8 +52892,8 @@ 429, 424, 429, - 87, - 88, + 83, + 84, true, "cells", "cells" @@ -52872,8 +52913,8 @@ 436, 430, 436, - 88, - 90, + 84, + 86, true, "of the", "of the" @@ -52893,8 +52934,8 @@ 448, 437, 448, - 90, - 92, + 86, + 88, true, "label Title", "label Title" @@ -52914,8 +52955,8 @@ 461, 449, 461, - 92, - 95, + 88, + 91, true, "will be much", "will be much" @@ -52935,8 +52976,8 @@ 480, 467, 480, - 96, - 98, + 92, + 94, true, "uncommon than", "uncommon than" @@ -52956,8 +52997,8 @@ 486, 481, 486, - 98, - 99, + 94, + 95, true, "cells", "cells" @@ -52977,8 +53018,8 @@ 495, 487, 495, - 99, - 101, + 95, + 97, true, "with the", "with the" @@ -52998,8 +53039,8 @@ 501, 496, 501, - 101, - 102, + 97, + 98, true, "label", "label" @@ -53019,8 +53060,8 @@ 504, 502, 504, - 102, - 103, + 98, + 99, true, "of", "of" @@ -53040,8 +53081,8 @@ 509, 505, 509, - 103, - 104, + 99, + 100, true, "Text", "Text" @@ -53061,8 +53102,8 @@ 635, 511, 635, - 105, - 128, + 101, + 124, true, "Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process.", "Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process." @@ -53082,8 +53123,8 @@ 526, 521, 526, - 108, - 109, + 104, + 105, true, "model", "model" @@ -53103,8 +53144,8 @@ 543, 527, 543, + 105, 109, - 113, true, "needs to be very", "needs to be very" @@ -53124,8 +53165,8 @@ 535, 533, 535, - 110, - 111, + 106, + 107, true, "to", "to" @@ -53145,8 +53186,8 @@ 552, 544, 552, - 113, - 115, + 109, + 111, true, "quick in", "quick in" @@ -53166,8 +53207,8 @@ 576, 553, 576, - 115, - 118, + 111, + 114, true, "training and predicting", "training and predicting" @@ -53187,8 +53228,8 @@ 561, 553, 561, - 115, - 116, + 111, + 112, true, "training", "training" @@ -53208,8 +53249,8 @@ 576, 566, 576, - 117, - 118, + 113, + 114, true, "predicting", "predicting" @@ -53229,8 +53270,8 @@ 583, 578, 583, - 119, - 120, + 115, + 116, true, "since", "since" @@ -53250,8 +53291,8 @@ 599, 587, 599, - 121, - 123, + 117, + 119, true, "will support", "will support" @@ -53271,8 +53312,8 @@ 634, 604, 634, - 124, - 127, + 120, + 123, true, "interactive annotation process", "interactive annotation process" @@ -56044,7 +56085,7 @@ 0, 126, 0, - 29, + 28, true, "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$.", "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$." @@ -56254,7 +56295,7 @@ 117, 125, 20, - 28, + 27, true, "B^{12}", "B$^{12}$" @@ -56275,7 +56316,7 @@ 121, 123, 24, - 26, + 25, true, "12", "12" @@ -56295,8 +56336,8 @@ 223, 127, 223, - 29, - 52, + 28, + 47, true, "We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels.", "We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels." @@ -56316,8 +56357,8 @@ 144, 139, 144, + 30, 31, - 32, true, "chose", "chose" @@ -56337,8 +56378,8 @@ 148, 145, 148, + 31, 32, - 35, true, "100", "100" @@ -56358,8 +56399,8 @@ 160, 149, 160, + 32, 35, - 38, true, "open-access", "open-access" @@ -56379,8 +56420,8 @@ 167, 154, 167, - 37, - 39, + 34, + 36, true, "access papers", "access papers" @@ -56400,8 +56441,8 @@ 181, 172, 181, - 40, - 41, + 37, + 38, true, "annotated", "annotated" @@ -56421,8 +56462,8 @@ 185, 182, 185, - 41, - 44, + 38, + 39, true, "400", "400" @@ -56442,8 +56483,8 @@ 191, 186, 191, - 44, - 45, + 39, + 40, true, "pages", "pages" @@ -56463,8 +56504,8 @@ 194, 192, 194, - 45, - 46, + 40, + 41, true, "of", "of" @@ -56484,8 +56525,8 @@ 204, 200, 204, - 47, - 48, + 42, + 43, true, "with", "with" @@ -56505,8 +56546,8 @@ 206, 205, 206, - 48, - 49, + 43, + 44, true, "6", "6" @@ -56526,8 +56567,8 @@ 222, 207, 222, - 49, - 51, + 44, + 46, true, "semantic labels", "semantic labels" @@ -56547,8 +56588,8 @@ 369, 224, 369, - 52, - 78, + 47, + 73, true, "Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label.", "Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label." @@ -56568,8 +56609,8 @@ 230, 224, 230, - 52, - 53, + 47, + 48, true, "Tables", "Tables" @@ -56589,8 +56630,8 @@ 232, 231, 232, - 53, - 54, + 48, + 49, true, "2", "2" @@ -56610,8 +56651,8 @@ 238, 233, 238, - 54, - 55, + 49, + 50, true, "shows", "shows" @@ -56631,8 +56672,8 @@ 259, 243, 259, - 56, - 58, + 51, + 53, true, "confusion matrix", "confusion matrix" @@ -56652,8 +56693,8 @@ 271, 260, 271, - 58, - 60, + 53, + 55, true, "between the", "between the" @@ -56673,8 +56714,8 @@ 294, 285, 294, - 63, - 64, + 58, + 59, true, "predicted", "predicted" @@ -56694,8 +56735,8 @@ 301, 295, 301, - 64, - 65, + 59, + 60, true, "labels", "labels" @@ -56715,8 +56756,8 @@ 316, 310, 316, - 67, - 69, + 62, + 64, true, "as the", "as the" @@ -56736,8 +56777,8 @@ 331, 317, 331, - 69, - 71, + 64, + 66, true, "derived recall", "derived recall" @@ -56757,8 +56798,8 @@ 345, 325, 345, - 70, - 73, + 65, + 68, true, "recall and precision", "recall and precision" @@ -56778,8 +56819,8 @@ 353, 336, 353, - 72, - 74, + 67, + 69, true, "precision metrics", "precision metrics" @@ -56799,8 +56840,8 @@ 362, 354, 362, - 74, - 76, + 69, + 71, true, "for each", "for each" @@ -56820,8 +56861,8 @@ 368, 363, 368, - 76, - 77, + 71, + 72, true, "label", "label" @@ -56841,8 +56882,8 @@ 462, 370, 462, - 78, - 98, + 73, + 92, true, "We observe that the recall and precision numbers are excellent, with most of them above 99%.", "We observe that the recall and precision numbers are excellent, with most of them above 99%." @@ -56862,8 +56903,8 @@ 380, 373, 380, - 79, - 80, + 74, + 75, true, "observe", "observe" @@ -56883,8 +56924,8 @@ 389, 381, 389, - 80, - 82, + 75, + 77, true, "that the", "that the" @@ -56904,8 +56945,8 @@ 410, 390, 410, - 82, - 85, + 77, + 80, true, "recall and precision", "recall and precision" @@ -56925,8 +56966,8 @@ 396, 390, 396, - 82, - 83, + 77, + 78, true, "recall", "recall" @@ -56946,8 +56987,8 @@ 418, 401, 418, - 84, - 86, + 79, + 81, true, "precision numbers", "precision numbers" @@ -56967,8 +57008,8 @@ 422, 419, 422, - 86, - 87, + 81, + 82, true, "are", "are" @@ -56988,8 +57029,8 @@ 438, 434, 438, - 89, - 90, + 84, + 85, true, "with", "with" @@ -57009,8 +57050,8 @@ 446, 444, 446, - 91, - 92, + 86, + 87, true, "of", "of" @@ -57030,8 +57071,8 @@ 457, 452, 457, - 93, - 94, + 88, + 89, true, "above", "above" @@ -57051,8 +57092,8 @@ 461, 458, 461, - 94, - 97, + 89, + 91, true, "99%", "99%" @@ -57072,8 +57113,8 @@ 460, 458, 460, - 94, - 96, + 89, + 90, true, "99", "99" @@ -57093,8 +57134,8 @@ 558, 463, 558, - 98, - 115, + 92, + 109, true, "This is not surprising, since we are building models that specialise for a particular template.", "This is not surprising, since we are building models that specialise for a particular template." @@ -57114,8 +57155,8 @@ 474, 468, 474, - 99, - 101, + 93, + 95, true, "is not", "is not" @@ -57135,8 +57176,8 @@ 492, 487, 492, - 103, - 104, + 97, + 98, true, "since", "since" @@ -57156,8 +57197,8 @@ 508, 496, 508, - 105, - 107, + 99, + 101, true, "are building", "are building" @@ -57177,8 +57218,8 @@ 515, 509, 515, - 107, - 108, + 101, + 102, true, "models", "models" @@ -57198,8 +57239,8 @@ 531, 521, 531, - 109, - 110, + 103, + 104, true, "specialise", "specialise" @@ -57219,8 +57260,8 @@ 537, 532, 537, - 110, - 112, + 104, + 106, true, "for a", "for a" @@ -57240,8 +57281,8 @@ 557, 538, 557, - 112, - 114, + 106, + 108, true, "particular template", "particular template" @@ -60118,7 +60159,7 @@ 364, 667, 95, - 256, + 226, true, "[{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...]", "[{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...]" @@ -60181,7 +60222,7 @@ 376, 438, 101, - 147, + 132, true, "[{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }]", "[{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }]" @@ -60244,7 +60285,7 @@ 388, 423, 107, - 139, + 124, true, "[52.304, 509.750, 168.099, 523.980]", "[52.304, 509.750, 168.099, 523.980]" @@ -60265,7 +60306,7 @@ 389, 395, 108, - 114, + 111, true, "52.304", "52.304" @@ -60285,8 +60326,8 @@ 410, 397, 404, + 112, 115, - 122, true, "509.750", "509.750" @@ -60306,8 +60347,8 @@ 419, 406, 413, - 123, - 130, + 116, + 119, true, "168.099", "168.099" @@ -60327,8 +60368,8 @@ 428, 415, 422, - 131, - 138, + 120, + 123, true, "523.980", "523.980" @@ -60348,8 +60389,8 @@ 438, 425, 432, - 140, - 143, + 125, + 128, true, "'page '", "'page '" @@ -60369,8 +60410,8 @@ 436, 426, 430, - 141, - 142, + 126, + 127, true, "page", "page" @@ -60390,8 +60431,8 @@ 441, 434, 435, - 144, - 145, + 129, + 130, true, "1", "1" @@ -60411,8 +60452,8 @@ 453, 440, 447, - 148, - 151, + 133, + 136, true, "'type '", "'type '" @@ -60432,8 +60473,8 @@ 451, 441, 445, - 149, - 150, + 134, + 135, true, "type", "type" @@ -60453,8 +60494,8 @@ 474, 449, 468, - 152, - 159, + 137, + 144, true, "'subtitle-level-1 '", "'subtitle-level-1 '" @@ -60474,8 +60515,8 @@ 472, 450, 466, - 153, - 158, + 138, + 143, true, "subtitle-level-1", "subtitle-level-1" @@ -60495,8 +60536,8 @@ 464, 450, 458, - 153, - 154, + 138, + 139, true, "subtitle", "subtitle" @@ -60516,8 +60557,8 @@ 470, 459, 464, - 155, - 156, + 140, + 141, true, "level", "level" @@ -60537,8 +60578,8 @@ 472, 464, 466, - 156, - 158, + 141, + 143, true, "-1", "-1" @@ -60558,8 +60599,8 @@ 483, 470, 477, - 160, - 163, + 145, + 148, true, "'text '", "'text '" @@ -60579,8 +60620,8 @@ 481, 471, 475, - 161, - 162, + 146, + 147, true, "text", "text" @@ -60600,8 +60641,8 @@ 487, 480, 481, - 165, - 166, + 150, + 151, true, "1", "1" @@ -60621,8 +60662,8 @@ 500, 482, 494, - 166, - 167, + 151, + 152, true, "INTRODUCTION", "INTRODUCTION" @@ -60642,8 +60683,8 @@ 515, 502, 509, - 171, - 174, + 156, + 159, true, "'prov '", "'prov '" @@ -60663,8 +60704,8 @@ 513, 503, 507, - 172, - 173, + 157, + 158, true, "prov", "prov" @@ -60684,8 +60725,8 @@ 579, 511, 573, - 175, - 221, + 160, + 191, true, "[{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }]", "[{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }]" @@ -60705,8 +60746,8 @@ 527, 514, 521, - 177, - 180, + 162, + 165, true, "'bbox '", "'bbox '" @@ -60726,8 +60767,8 @@ 525, 515, 519, - 178, - 179, + 163, + 164, true, "bbox", "bbox" @@ -60747,8 +60788,8 @@ 564, 523, 558, - 181, - 213, + 166, + 183, true, "[52.304, 337.678, 286.067, 380.475]", "[52.304, 337.678, 286.067, 380.475]" @@ -60768,8 +60809,8 @@ 536, 524, 530, - 182, - 188, + 167, + 170, true, "52.304", "52.304" @@ -60789,8 +60830,8 @@ 545, 532, 539, - 189, - 196, + 171, + 174, true, "337.678", "337.678" @@ -60810,8 +60851,8 @@ 554, 541, 548, - 197, - 204, + 175, + 178, true, "286.067", "286.067" @@ -60831,8 +60872,8 @@ 563, 550, 557, - 205, - 212, + 179, + 182, true, "380.475", "380.475" @@ -60852,8 +60893,8 @@ 573, 560, 567, - 214, - 217, + 184, + 187, true, "'page '", "'page '" @@ -60873,8 +60914,8 @@ 571, 561, 565, - 215, - 216, + 185, + 186, true, "page", "page" @@ -60894,8 +60935,8 @@ 576, 569, 570, - 218, - 219, + 188, + 189, true, "1", "1" @@ -60915,8 +60956,8 @@ 588, 575, 582, - 222, - 225, + 192, + 195, true, "'type '", "'type '" @@ -60936,8 +60977,8 @@ 586, 576, 580, - 223, - 224, + 193, + 194, true, "type", "type" @@ -60957,8 +60998,8 @@ 602, 584, 596, - 226, - 229, + 196, + 199, true, "'paragraph '", "'paragraph '" @@ -60978,8 +61019,8 @@ 600, 585, 594, - 227, - 228, + 197, + 198, true, "paragraph", "paragraph" @@ -60999,8 +61040,8 @@ 611, 598, 605, - 230, - 233, + 200, + 203, true, "'text '", "'text '" @@ -61020,8 +61061,8 @@ 609, 599, 603, - 231, - 232, + 201, + 202, true, "text", "text" @@ -61041,8 +61082,8 @@ 629, 611, 623, - 236, - 238, + 206, + 208, true, "is estimated", "is estimated" @@ -61062,8 +61103,8 @@ 634, 624, 628, - 238, - 239, + 208, + 209, true, "that", "that" @@ -61083,8 +61124,8 @@ 640, 629, 634, - 239, - 244, + 209, + 214, true, "[...]", "[...]" @@ -61104,8 +61145,8 @@ 640, 629, 634, - 239, - 244, + 209, + 214, true, "[...]", "[...]" @@ -61125,8 +61166,8 @@ 644, 635, 638, - 244, - 245, + 214, + 215, true, "put", "put" @@ -61146,8 +61187,8 @@ 655, 639, 649, - 245, - 247, + 215, + 217, true, "these into", "these into" @@ -61167,8 +61208,8 @@ 663, 650, 657, - 247, - 248, + 217, + 218, true, "context", "context" @@ -61188,8 +61229,8 @@ 672, 663, 666, - 252, - 255, + 222, + 225, true, "etc", "..." @@ -61209,8 +61250,8 @@ 684, 669, 678, - 257, - 260, + 227, + 230, true, "'tables '", "'tables '" @@ -61230,8 +61271,8 @@ 682, 670, 676, - 258, - 259, + 228, + 229, true, "tables", "tables" @@ -61251,8 +61292,8 @@ 697, 680, 691, - 261, - 272, + 231, + 242, true, "[{...},...]", "[{...},...]" @@ -61272,8 +61313,8 @@ 697, 680, 691, - 261, - 272, + 231, + 242, true, "[{...},...]", "[{...},...]" @@ -61293,8 +61334,8 @@ 708, 693, 702, - 273, - 276, + 243, + 246, true, "'images '", "'images '" @@ -61314,8 +61355,8 @@ 706, 694, 700, - 274, - 275, + 244, + 245, true, "images", "images" @@ -61335,8 +61376,8 @@ 721, 704, 715, - 277, - 288, + 247, + 258, true, "[{...},...]", "[{...},...]" @@ -61356,8 +61397,8 @@ 721, 704, 715, - 277, - 288, + 247, + 258, true, "[{...},...]", "[{...},...]" @@ -65662,7 +65703,7 @@ 205, 307, 42, - 62, + 61, true, "The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", "The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python." @@ -65809,7 +65850,7 @@ 275, 277, 54, - 56, + 55, true, "13", "13" @@ -65829,8 +65870,8 @@ 296, 282, 296, - 57, - 59, + 56, + 58, true, "is implemented", "is implemented" @@ -65850,8 +65891,8 @@ 299, 297, 299, + 58, 59, - 60, true, "in", "in" @@ -65871,8 +65912,8 @@ 306, 300, 306, + 59, 60, - 61, true, "Python", "Python" @@ -66124,7 +66165,7 @@ 123, 191, 22, - 40, + 39, true, "The task scheduling is done with the Message Broker RabbitMQ$^{14}$.", "The task scheduling is done with the Message Broker RabbitMQ$^{14}$." @@ -66229,7 +66270,7 @@ 175, 190, 31, - 39, + 38, true, "RabbitMQ^{14}", "RabbitMQ$^{14}$" @@ -66250,7 +66291,7 @@ 186, 188, 35, - 37, + 36, true, "14", "14" @@ -66270,8 +66311,8 @@ 256, 192, 256, - 40, - 60, + 39, + 58, true, "The results are stored in the in-memory data store Redis$^{15}$.", "The results are stored in the in-memory data store Redis$^{15}$." @@ -66291,8 +66332,8 @@ 203, 196, 203, + 40, 41, - 42, true, "results", "results" @@ -66312,8 +66353,8 @@ 214, 204, 214, - 42, - 44, + 41, + 43, true, "are stored", "are stored" @@ -66333,8 +66374,8 @@ 221, 215, 221, - 44, - 46, + 43, + 45, true, "in the", "in the" @@ -66354,8 +66395,8 @@ 231, 222, 231, - 46, - 49, + 45, + 48, true, "in-memory", "in-memory" @@ -66375,8 +66416,8 @@ 224, 222, 224, + 45, 46, - 47, true, "in", "in" @@ -66396,8 +66437,8 @@ 236, 224, 236, - 47, - 50, + 46, + 49, true, "-memory data", "-memory data" @@ -66417,8 +66458,8 @@ 242, 237, 242, + 49, 50, - 51, true, "store", "store" @@ -66438,8 +66479,8 @@ 255, 243, 255, - 51, - 59, + 50, + 57, true, "Redis^{15}", "Redis$^{15}$" @@ -66459,8 +66500,8 @@ 248, 243, 248, + 50, 51, - 52, true, "Redis", "Redis" @@ -66480,8 +66521,8 @@ 253, 251, 253, + 54, 55, - 57, true, "15", "15" @@ -66501,8 +66542,8 @@ 612, 257, 612, - 60, - 125, + 58, + 123, true, "In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully.", "In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully." @@ -66522,8 +66563,8 @@ 259, 257, 259, - 60, - 61, + 58, + 59, true, "In", "In" @@ -66543,8 +66584,8 @@ 265, 260, 265, - 61, - 62, + 59, + 60, true, "order", "order" @@ -66564,8 +66605,8 @@ 268, 266, 268, - 62, - 63, + 60, + 61, true, "to", "to" @@ -66585,8 +66626,8 @@ 276, 269, 276, - 63, - 64, + 61, + 62, true, "perform", "perform" @@ -66606,8 +66647,8 @@ 302, 277, 302, - 64, - 67, + 62, + 65, true, "certain consecutive tasks", "certain consecutive tasks" @@ -66627,8 +66668,8 @@ 497, 303, 497, - 67, - 106, + 65, + 104, true, "(e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images)", "(e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images)" @@ -66648,8 +66689,8 @@ 309, 304, 309, - 68, - 72, + 66, + 70, true, "eg", "e. g." @@ -66669,8 +66710,8 @@ 317, 310, 317, - 72, - 73, + 70, + 71, true, "parsing", "parsing" @@ -66690,8 +66731,8 @@ 328, 320, 328, + 72, 74, - 76, true, "PDF page", "PDF page" @@ -66711,8 +66752,8 @@ 333, 329, 333, - 76, - 77, + 74, + 75, true, "with", "with" @@ -66732,8 +66773,8 @@ 350, 334, 350, + 75, 77, - 79, true, "embedded scanned", "embedded scanned" @@ -66753,8 +66794,8 @@ 357, 351, 357, - 79, - 80, + 77, + 78, true, "images", "images" @@ -66774,8 +66815,8 @@ 372, 358, 372, + 78, 80, - 82, true, "requires first", "requires first" @@ -66795,8 +66836,8 @@ 382, 375, 382, - 83, - 84, + 81, + 82, true, "parsing", "parsing" @@ -66816,8 +66857,8 @@ 389, 383, 389, + 82, 84, - 86, true, "of the", "of the" @@ -66837,8 +66878,8 @@ 411, 390, 411, - 86, - 89, + 84, + 87, true, "programmatic PDF page", "programmatic PDF page" @@ -66858,8 +66899,8 @@ 414, 412, 414, - 89, - 90, + 87, + 88, true, "to", "to" @@ -66879,8 +66920,8 @@ 422, 415, 422, - 90, - 91, + 88, + 89, true, "extract", "extract" @@ -66900,8 +66941,8 @@ 433, 427, 433, - 92, - 93, + 90, + 91, true, "images", "images" @@ -66921,8 +66962,8 @@ 457, 446, 457, + 94, 96, - 98, true, "OCR service", "OCR service" @@ -66942,8 +66983,8 @@ 460, 458, 460, - 98, - 99, + 96, + 97, true, "to", "to" @@ -66963,8 +67004,8 @@ 468, 461, 468, - 99, - 100, + 97, + 98, true, "extract", "extract" @@ -66984,8 +67025,8 @@ 478, 473, 478, - 101, - 102, + 99, + 100, true, "cells", "cells" @@ -67005,8 +67046,8 @@ 489, 479, 489, + 100, 102, - 104, true, "from these", "from these" @@ -67026,8 +67067,8 @@ 496, 490, 496, - 104, - 105, + 102, + 103, true, "images", "images" @@ -67047,8 +67088,8 @@ 519, 514, 519, - 109, - 110, + 107, + 108, true, "chain", "chain" @@ -67068,8 +67109,8 @@ 525, 520, 525, - 110, - 111, + 108, + 109, true, "tasks", "tasks" @@ -67089,8 +67130,8 @@ 536, 527, 536, + 110, 112, - 114, true, "such that", "such that" @@ -67110,8 +67151,8 @@ 553, 537, 553, + 112, 114, - 116, true, "subsequent steps", "subsequent steps" @@ -67131,8 +67172,8 @@ 571, 554, 571, - 116, - 119, + 114, + 117, true, "are only executed", "are only executed" @@ -67152,8 +67193,8 @@ 578, 572, 578, + 117, 119, - 121, true, "if the", "if the" @@ -67173,8 +67214,8 @@ 611, 588, 611, + 120, 122, - 124, true, "terminated successfully", "terminated successfully" @@ -67194,8 +67235,8 @@ 702, 613, 702, - 125, - 142, + 123, + 140, true, "This approach allows for a very robust, fault-tolerant service with very little downtime.", "This approach allows for a very robust, fault-tolerant service with very little downtime." @@ -67215,8 +67256,8 @@ 626, 618, 626, - 126, - 127, + 124, + 125, true, "approach", "approach" @@ -67236,8 +67277,8 @@ 633, 627, 633, - 127, - 128, + 125, + 126, true, "allows", "allows" @@ -67257,8 +67298,8 @@ 639, 634, 639, + 126, 128, - 130, true, "for a", "for a" @@ -67278,8 +67319,8 @@ 667, 653, 667, - 133, - 136, + 131, + 134, true, "fault-tolerant", "fault-tolerant" @@ -67299,8 +67340,8 @@ 675, 659, 675, + 133, 135, - 137, true, "tolerant service", "tolerant service" @@ -67320,8 +67361,8 @@ 680, 676, 680, - 137, - 138, + 135, + 136, true, "with", "with" @@ -67341,8 +67382,8 @@ 701, 686, 701, + 137, 139, - 141, true, "little downtime", "little downtime" @@ -67825,7 +67866,7 @@ 202, 347, 42, - 73, + 72, true, "In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$.", "In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$." @@ -68161,7 +68202,7 @@ 332, 346, 64, - 72, + 71, true, "library^{16}", "library$^{16}$" @@ -68182,7 +68223,7 @@ 342, 344, 68, - 70, + 69, true, "16", "16" @@ -68202,8 +68243,8 @@ 503, 348, 503, - 73, - 100, + 72, + 99, true, "This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker.", "This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker." @@ -68223,8 +68264,8 @@ 359, 353, 359, + 73, 74, - 75, true, "allows", "allows" @@ -68244,8 +68285,8 @@ 365, 363, 365, + 75, 76, - 77, true, "to", "to" @@ -68265,8 +68306,8 @@ 383, 378, 383, + 77, 78, - 79, true, "scale", "scale" @@ -68286,8 +68327,8 @@ 405, 388, 405, - 80, - 82, + 79, + 81, true, "compute resources", "compute resources" @@ -68307,8 +68348,8 @@ 417, 407, 417, - 83, - 85, + 82, + 84, true, "since each", "since each" @@ -68328,8 +68369,8 @@ 424, 418, 424, + 84, 85, - 86, true, "worker", "worker" @@ -68349,8 +68390,8 @@ 453, 425, 453, - 86, - 90, + 85, + 89, true, "can be spawned automatically", "can be spawned automatically" @@ -68370,8 +68411,8 @@ 460, 454, 460, - 90, - 92, + 89, + 91, true, "on the", "on the" @@ -68391,8 +68432,8 @@ 468, 461, 468, + 91, 92, - 93, true, "cluster", "cluster" @@ -68412,8 +68453,8 @@ 481, 473, 481, + 93, 94, - 95, true, "register", "register" @@ -68433,8 +68474,8 @@ 495, 489, 495, - 96, - 98, + 95, + 97, true, "to the", "to the" @@ -68454,8 +68495,8 @@ 502, 496, 502, + 97, 98, - 99, true, "broker", "broker" @@ -68475,8 +68516,8 @@ 579, 504, 579, - 100, - 116, + 99, + 115, true, "The workers are not only consumers of tasks, but may also produce new ones.", "The workers are not only consumers of tasks, but may also produce new ones." @@ -68496,8 +68537,8 @@ 515, 508, 515, + 100, 101, - 102, true, "workers", "workers" @@ -68517,8 +68558,8 @@ 523, 516, 523, - 102, - 104, + 101, + 103, true, "are not", "are not" @@ -68538,8 +68579,8 @@ 538, 524, 538, - 104, - 106, + 103, + 105, true, "only consumers", "only consumers" @@ -68559,8 +68600,8 @@ 541, 539, 541, + 105, 106, - 107, true, "of", "of" @@ -68580,8 +68621,8 @@ 547, 542, 547, + 106, 107, - 108, true, "tasks", "tasks" @@ -68601,8 +68642,8 @@ 569, 562, 569, + 111, 112, - 113, true, "produce", "produce" @@ -68622,8 +68663,8 @@ 578, 570, 578, - 113, - 115, + 112, + 114, true, "new ones", "new ones" @@ -68643,8 +68684,8 @@ 587, 585, 587, + 116, 117, - 118, true, "is", "is" @@ -68664,8 +68705,8 @@ 596, 592, 596, + 118, 119, - 120, true, "case", "case" @@ -68685,8 +68726,8 @@ 604, 597, 604, - 120, - 122, + 119, + 121, true, "for the", "for the" @@ -68706,8 +68747,8 @@ 613, 605, 613, + 121, 122, - 123, true, "requests", "requests" @@ -70114,7 +70155,7 @@ 452, 633, 93, - 141, + 140, true, "However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", "However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer." @@ -70240,7 +70281,7 @@ 541, 576, 112, - 127, + 126, true, "(in our case we use MongoDB$^{17}$)", "(in our case we use MongoDB$^{17}$)" @@ -70345,7 +70386,7 @@ 568, 575, 119, - 126, + 125, true, "^{17}", "$^{17}$" @@ -70366,7 +70407,7 @@ 571, 573, 122, - 124, + 123, true, "17", "17" @@ -70386,8 +70427,8 @@ 579, 577, 579, + 126, 127, - 128, true, "on", "on" @@ -70407,8 +70448,8 @@ 583, 580, 583, + 127, 128, - 129, true, "top", "top" @@ -70428,8 +70469,8 @@ 586, 584, 586, + 128, 129, - 130, true, "to", "to" @@ -70449,8 +70490,8 @@ 593, 587, 593, + 129, 130, - 131, true, "manage", "manage" @@ -70470,8 +70511,8 @@ 613, 598, 613, - 132, - 135, + 131, + 134, true, "storage and act", "storage and act" @@ -70491,8 +70532,8 @@ 605, 598, 605, + 131, 132, - 133, true, "storage", "storage" @@ -70512,8 +70553,8 @@ 613, 610, 613, + 133, 134, - 135, true, "act", "act" @@ -70533,8 +70574,8 @@ 619, 614, 619, - 135, - 137, + 134, + 136, true, "as an", "as an" @@ -70554,8 +70595,8 @@ 632, 620, 632, - 137, - 140, + 136, + 139, true, "access-layer", "access-layer" @@ -70575,8 +70616,8 @@ 626, 620, 626, + 136, 137, - 138, true, "access", "access" @@ -70596,8 +70637,8 @@ 632, 627, 632, + 138, 139, - 140, true, "layer", "layer" @@ -73411,7 +73452,7 @@ 0, 171, 0, - 34, + 32, true, "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution.", "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution." @@ -73516,7 +73557,7 @@ 50, 52, 7, - 9, + 8, true, "18", "18" @@ -73536,8 +73577,8 @@ 65, 53, 65, - 9, - 11, + 8, + 10, true, "available on", "available on" @@ -73557,8 +73598,8 @@ 86, 66, 86, - 11, - 14, + 10, + 13, true, "many cloud providers", "many cloud providers" @@ -73578,8 +73619,8 @@ 105, 95, 105, - 16, - 19, + 15, + 18, true, "on-premise", "on-premise" @@ -73599,8 +73640,8 @@ 97, 95, 97, + 15, 16, - 17, true, "on", "on" @@ -73620,8 +73661,8 @@ 119, 98, 119, - 18, - 20, + 17, + 19, true, "premise installations", "premise installations" @@ -73641,8 +73682,8 @@ 126, 121, 126, - 21, - 25, + 20, + 24, true, "eg", "e. g." @@ -73662,8 +73703,8 @@ 132, 127, 132, + 24, 25, - 26, true, "using", "using" @@ -73683,8 +73724,8 @@ 154, 137, 154, - 27, - 30, + 26, + 29, true, "IBM Cloud Private", "IBM Cloud Private" @@ -73704,8 +73745,8 @@ 157, 155, 157, + 29, 30, - 32, true, "19", "19" @@ -73725,8 +73766,8 @@ 170, 158, 170, - 32, - 33, + 30, + 31, true, "distribution", "distribution" @@ -73746,8 +73787,8 @@ 302, 172, 302, - 34, - 55, + 32, + 53, true, "Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", "Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints." @@ -73767,8 +73808,8 @@ 181, 172, 181, - 34, - 35, + 32, + 33, true, "Depending", "Depending" @@ -73788,8 +73829,8 @@ 188, 182, 188, + 33, 35, - 37, true, "on the", "on the" @@ -73809,8 +73850,8 @@ 201, 189, 201, - 37, - 38, + 35, + 36, true, "requirements", "requirements" @@ -73830,8 +73871,8 @@ 223, 207, 223, + 38, 40, - 42, true, "storage services", "storage services" @@ -73851,8 +73892,8 @@ 236, 224, 236, + 40, 42, - 44, true, "are launched", "are launched" @@ -73872,8 +73913,8 @@ 247, 237, 247, + 42, 44, - 46, true, "inside the", "inside the" @@ -73893,8 +73934,8 @@ 260, 248, 260, + 44, 46, - 48, true, "same cluster", "same cluster" @@ -73914,8 +73955,8 @@ 291, 264, 291, - 49, - 53, + 47, + 51, true, "linked to externally hosted", "linked to externally hosted" @@ -73935,8 +73976,8 @@ 273, 271, 273, - 50, - 51, + 48, + 49, true, "to", "to" @@ -73956,8 +73997,8 @@ 301, 292, 301, - 53, - 54, + 51, + 52, true, "endpoints", "endpoints" @@ -77170,7 +77211,7 @@ 6, 8, 1, - 3, + 2, true, "20", "20" @@ -77190,8 +77231,8 @@ 13, 9, 13, - 3, - 5, + 2, + 4, true, "as a", "as a" @@ -77211,8 +77252,8 @@ 22, 14, 22, + 4, 5, - 6, true, "function", "function" @@ -77232,8 +77273,8 @@ 25, 23, 25, + 5, 6, - 7, true, "of", "of" @@ -77253,8 +77294,8 @@ 30, 26, 30, + 6, 7, - 8, true, "time", "time" @@ -77274,8 +77315,8 @@ 177, 32, 177, - 9, - 41, + 8, + 37, true, "As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017.", "As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017." @@ -77295,8 +77336,8 @@ 34, 32, 34, + 8, 9, - 10, true, "As", "As" @@ -77316,8 +77357,8 @@ 46, 39, 46, - 11, - 13, + 10, + 12, true, "can see", "can see" @@ -77337,8 +77378,8 @@ 58, 52, 58, + 14, 15, - 16, true, "number", "number" @@ -77358,8 +77399,8 @@ 61, 59, 61, + 15, 16, - 17, true, "of", "of" @@ -77379,8 +77420,8 @@ 67, 62, 67, + 16, 17, - 18, true, "users", "users" @@ -77400,8 +77441,8 @@ 81, 72, 81, + 18, 19, - 20, true, "processed", "processed" @@ -77421,8 +77462,8 @@ 91, 82, 91, - 20, - 22, + 19, + 21, true, "PDF pages", "PDF pages" @@ -77442,8 +77483,8 @@ 120, 92, 120, - 22, - 26, + 21, + 25, true, "has been increasing steadily", "has been increasing steadily" @@ -77463,8 +77504,8 @@ 125, 121, 125, + 25, 26, - 27, true, "over", "over" @@ -77484,8 +77525,8 @@ 130, 126, 130, + 26, 27, - 28, true, "time", "time" @@ -77505,8 +77546,8 @@ 140, 131, 140, - 28, - 30, + 27, + 29, true, "since the", "since the" @@ -77526,8 +77567,8 @@ 147, 141, 147, + 29, 30, - 31, true, "launch", "launch" @@ -77547,8 +77588,8 @@ 150, 148, 150, + 30, 31, - 32, true, "of", "of" @@ -77568,8 +77609,8 @@ 162, 155, 162, + 32, 33, - 34, true, "service", "service" @@ -77589,8 +77630,8 @@ 165, 163, 165, + 33, 34, - 35, true, "in", "in" @@ -77610,8 +77651,8 @@ 171, 166, 171, + 34, 35, - 36, true, "April", "April" @@ -77631,8 +77672,8 @@ 176, 172, 176, + 35, 36, - 40, true, "2017", "2017" @@ -77652,8 +77693,8 @@ 363, 178, 363, - 41, - 75, + 37, + 71, true, "It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time.", "It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time." @@ -77673,8 +77714,8 @@ 191, 181, 191, - 42, - 44, + 38, + 40, true, "is however", "is however" @@ -77694,8 +77735,8 @@ 206, 204, 206, - 45, - 46, + 41, + 42, true, "to", "to" @@ -77715,8 +77756,8 @@ 210, 207, 210, - 46, - 47, + 42, + 43, true, "see", "see" @@ -77736,8 +77777,8 @@ 215, 211, 215, - 47, - 48, + 43, + 44, true, "that", "that" @@ -77757,8 +77798,8 @@ 225, 222, 225, - 49, - 50, + 45, + 46, true, "are", "are" @@ -77778,8 +77819,8 @@ 237, 226, 237, - 50, - 52, + 46, + 48, true, "sharp steps", "sharp steps" @@ -77799,8 +77840,8 @@ 249, 239, 249, - 53, - 54, + 49, + 50, true, "indicating", "indicating" @@ -77820,8 +77861,8 @@ 259, 250, 259, - 54, - 56, + 50, + 52, true, "that some", "that some" @@ -77841,8 +77882,8 @@ 265, 260, 265, - 56, - 57, + 52, + 53, true, "users", "users" @@ -77862,8 +77903,8 @@ 285, 266, 285, - 57, - 60, + 53, + 56, true, "have been uploading", "have been uploading" @@ -77883,8 +77924,8 @@ 301, 286, 301, - 60, - 62, + 56, + 58, true, "massive amounts", "massive amounts" @@ -77904,8 +77945,8 @@ 304, 302, 304, - 62, - 63, + 58, + 59, true, "of", "of" @@ -77925,8 +77966,8 @@ 314, 305, 314, - 63, - 64, + 59, + 60, true, "documents", "documents" @@ -77946,8 +77987,8 @@ 323, 315, 323, - 64, - 66, + 60, + 62, true, "into the", "into the" @@ -77967,8 +78008,8 @@ 331, 324, 331, - 66, - 67, + 62, + 63, true, "service", "service" @@ -77988,8 +78029,8 @@ 336, 332, 336, - 67, - 69, + 63, + 65, true, "in a", "in a" @@ -78009,8 +78050,8 @@ 354, 342, 354, - 70, - 72, + 66, + 68, true, "small amount", "small amount" @@ -78030,8 +78071,8 @@ 357, 355, 357, - 72, - 73, + 68, + 69, true, "of", "of" @@ -78051,8 +78092,8 @@ 362, 358, 362, - 73, - 74, + 69, + 70, true, "time", "time" @@ -78072,8 +78113,8 @@ 504, 364, 504, - 75, - 103, + 71, + 99, true, "Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", "Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity." @@ -78093,8 +78134,8 @@ 370, 368, 370, - 76, - 77, + 72, + 73, true, "to", "to" @@ -78114,8 +78155,8 @@ 381, 375, 381, - 78, - 79, + 74, + 75, true, "design", "design" @@ -78135,8 +78176,8 @@ 393, 386, 393, - 81, - 83, + 77, + 79, true, "was not", "was not" @@ -78156,8 +78197,8 @@ 403, 396, 403, - 84, - 85, + 80, + 81, true, "problem", "problem" @@ -78177,8 +78218,8 @@ 406, 404, 406, - 85, - 86, + 81, + 82, true, "to", "to" @@ -78198,8 +78239,8 @@ 418, 407, 418, - 86, - 87, + 82, + 83, true, "accommodate", "accommodate" @@ -78219,8 +78260,8 @@ 430, 425, 430, - 88, - 89, + 84, + 85, true, "peaks", "peaks" @@ -78240,8 +78281,8 @@ 446, 439, 446, - 91, - 92, + 87, + 88, true, "service", "service" @@ -78261,8 +78302,8 @@ 450, 447, 450, - 92, - 93, + 88, + 89, true, "was", "was" @@ -78282,8 +78323,8 @@ 458, 456, 458, - 94, - 95, + 90, + 91, true, "to", "to" @@ -78303,8 +78344,8 @@ 465, 459, 465, - 95, - 96, + 91, + 92, true, "handle", "handle" @@ -78324,8 +78365,8 @@ 483, 472, 483, - 97, - 99, + 93, + 95, true, "short burst", "short burst" @@ -78345,8 +78386,8 @@ 486, 484, 486, - 99, - 100, + 95, + 96, true, "of", "of" @@ -78366,8 +78407,8 @@ 503, 487, 503, - 100, - 102, + 96, + 98, true, "extreme activity", "extreme activity" @@ -85066,7 +85107,7 @@ 118, 269, 28, - 78, + 66, true, "MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", "MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu)." @@ -85234,7 +85275,7 @@ 175, 179, 39, - 43, + 40, true, "2020", "2020" @@ -85254,8 +85295,8 @@ 185, 180, 185, - 43, - 44, + 40, + 41, true, "under", "under" @@ -85275,8 +85316,8 @@ 198, 186, 198, - 44, - 53, + 41, + 46, true, "NMBP-23-2016", "NMBP-23-2016" @@ -85296,8 +85337,8 @@ 190, 186, 190, - 44, - 45, + 41, + 42, true, "NMBP", "NMBP" @@ -85317,8 +85358,8 @@ 193, 190, 193, - 45, - 48, + 42, + 44, true, "-23", "-23" @@ -85338,8 +85379,8 @@ 198, 194, 198, - 49, - 53, + 45, + 46, true, "2016", "2016" @@ -85359,8 +85400,8 @@ 203, 199, 203, - 53, - 54, + 46, + 47, true, "call", "call" @@ -85380,8 +85421,8 @@ 208, 204, 208, - 54, - 55, + 47, + 48, true, "with", "with" @@ -85401,8 +85442,8 @@ 231, 209, 231, - 55, - 58, + 48, + 51, true, "Grant agreement number", "Grant agreement number" @@ -85422,8 +85463,8 @@ 238, 232, 238, - 58, - 64, + 51, + 52, true, "721027", "721027" @@ -85443,8 +85484,8 @@ 268, 239, 268, - 64, - 77, + 52, + 65, true, "(http://the-force-project.eu)", "(http://the-force-project.eu)" @@ -85464,8 +85505,8 @@ 267, 240, 267, - 65, - 76, + 53, + 64, true, "http://the-force-project.eu", "http://the-force-project.eu" @@ -85598,7 +85639,7 @@ ], [ "reference", - "title", + "date", 1712774266196702392, "TEXT", "#/texts/100", @@ -85612,32 +85653,11 @@ 74, 78, 20, - 24, + 21, true, "2015", "2015" ], - [ - "reference", - "journal", - 1712774266196702392, - "TEXT", - "#/texts/100", - 1.0, - 329104161878859757, - 14602804737313863528, - null, - null, - 80, - 85, - 80, - 85, - 25, - 26, - true, - "ICDAR", - "ICDAR" - ], [ "reference", "title", @@ -85645,19 +85665,19 @@ "TEXT", "#/texts/100", 1.0, - 1396556335874361340, - 396648570188163249, + 17804212744220731295, + 13329383501201933373, null, null, - 90, + 80, 159, - 90, + 80, 159, - 30, - 44, + 22, + 35, true, - "Competition on Recognition of Documents with Complex Layouts-RDCL2015", - "Competition on Recognition of Documents with Complex Layouts-RDCL2015" + "ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015", + "ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015" ], [ "reference", @@ -85674,8 +85694,8 @@ 249, 161, 249, - 45, - 59, + 36, + 49, true, "In Proceedings of the 13th International Conference on Document Analysis and Recognition", "In Proceedings of the 13th International Conference on Document Analysis and Recognition" @@ -85695,8 +85715,8 @@ 260, 251, 260, - 60, - 65, + 50, + 52, true, "ICDAR2015", "ICDAR2015" @@ -85716,12 +85736,33 @@ 268, 263, 268, - 67, - 68, + 54, + 55, true, "Nancy", "Nancy" ], + [ + "reference", + "date", + 1712774266196702392, + "TEXT", + "#/texts/100", + 1.0, + 10303630957638511768, + 3815340683710445282, + null, + null, + 270, + 279, + 270, + 279, + 56, + 59, + true, + "1151-1155", + "1151-1155" + ], [ "reference", "citation-number", @@ -85766,7 +85807,7 @@ ], [ "reference", - "title", + "date", 14718288547983000340, "TEXT", "#/texts/101", @@ -85780,31 +85821,73 @@ 17, 21, 6, - 10, + 7, true, "2001", "2001" ], [ "reference", - "journal", + "title", 14718288547983000340, "TEXT", "#/texts/101", 1.0, - 6555746984855475051, - 13858240110390271203, + 2109081024677782429, + 14560503901773287747, null, null, 23, - 55, + 37, 23, + 37, + 8, + 10, + true, + "Random Forests", + "Random Forests" + ], + [ + "reference", + "journal", + 14718288547983000340, + "TEXT", + "#/texts/101", + 1.0, + 13278563109182224937, + 9894237306486099503, + null, + null, + 39, + 55, + 39, 55, 11, - 16, + 13, + true, + "Machine Learning", + "Machine Learning" + ], + [ + "reference", + "date", + 14718288547983000340, + "TEXT", + "#/texts/101", + 1.0, + 10551073428908397011, + 16087676618282063646, + null, + null, + 63, + 74, + 63, + 74, + 17, + 20, true, - "Random Forests. Machine Learning", - "Random Forests. Machine Learning" + "01 Oct 2001", + "01 Oct 2001" ], [ "reference", @@ -85813,19 +85896,19 @@ "TEXT", "#/texts/101", 1.0, - 3534146179424153776, - 1305701981546673535, + 1225079762841478321, + 13531790532415888950, null, null, 83, - 101, + 122, 83, - 101, - 35, - 45, + 122, + 26, + 41, true, - "https://doi.org/10", - "https://doi.org/10" + "https://doi.org/10.1023/A:1010933404324", + "https://doi.org/10.1023/A:1010933404324" ], [ "reference", @@ -85932,6 +86015,27 @@ "C. M. Modena.", "C. M. Modena." ], + [ + "reference", + "date", + 16943780574244090186, + "TEXT", + "#/texts/102", + 1.0, + 389609625536085742, + 14383425253514843049, + null, + null, + 60, + 64, + 60, + 64, + 22, + 23, + true, + "1998", + "1998" + ], [ "reference", "title", @@ -85947,8 +86051,8 @@ 145, 66, 145, - 27, - 38, + 24, + 35, true, "Geometric layout analysis techniques for document image understanding: a review", "Geometric layout analysis techniques for document image understanding: a review" @@ -86123,7 +86227,7 @@ ], [ "reference", - "title", + "date", 8004985786049140169, "TEXT", "#/texts/103", @@ -86137,14 +86241,14 @@ 129, 133, 29, - 33, + 30, true, "2005", "2005" ], [ "reference", - "journal", + "title", 8004985786049140169, "TEXT", "#/texts/103", @@ -86157,8 +86261,8 @@ 190, 135, 187, - 34, - 43, + 31, + 40, true, "From Legacy Documents to XML: A Conversion Framework", "From Legacy Documents to XML: A Conversion Framework" @@ -86178,8 +86282,8 @@ 238, 198, 235, - 45, - 51, + 42, + 48, true, "Berlin Heidelberg, Berlin, Heidelberg", "Berlin Heidelberg, Berlin, Heidelberg" @@ -86199,12 +86303,33 @@ 266, 245, 263, - 59, - 69, + 53, + 62, true, "https://doi.org/10", "https://doi.org/10" ], + [ + "reference", + "doi", + 8004985786049140169, + "TEXT", + "#/texts/103", + 1.0, + 17297012968265468209, + 10114419193417306093, + null, + null, + 267, + 282, + 264, + 279, + 63, + 68, + true, + "1007/11551362_9", + "1007/11551362_9" + ], [ "reference", "citation-number", @@ -86249,24 +86374,45 @@ ], [ "reference", - "title", + "date", 12744546813104546377, "TEXT", "#/texts/104", 1.0, - 15801153477410803962, - 9542358830180887135, + 389609625548777059, + 1587769393776818040, null, null, 19, - 35, + 23, 19, - 35, + 23, 6, - 15, + 7, + true, + "2015", + "2015" + ], + [ + "reference", + "title", + 12744546813104546377, + "TEXT", + "#/texts/104", + 1.0, + 15491004285883184028, + 17483261521377705764, + null, + null, + 25, + 35, + 25, + 35, + 8, + 12, true, - "2015. Fast R-CNN", - "2015. Fast R-CNN" + "Fast R-CNN", + "Fast R-CNN" ], [ "reference", @@ -86283,8 +86429,8 @@ 112, 37, 112, - 16, - 30, + 13, + 24, true, "In Proceedings of the 2015 IEEE International Conference on Computer Vision", "In Proceedings of the 2015 IEEE International Conference on Computer Vision" @@ -86304,8 +86450,8 @@ 118, 114, 118, - 31, - 32, + 25, + 26, true, "ICCV", "ICCV" @@ -86325,8 +86471,8 @@ 129, 121, 129, - 34, - 38, + 28, + 31, true, "ICCV '15", "ICCV '15" @@ -86346,12 +86492,33 @@ 174, 155, 174, - 44, - 49, + 37, + 42, true, "Washington, DC, USA", "Washington, DC, USA" ], + [ + "reference", + "date", + 12744546813104546377, + "TEXT", + "#/texts/104", + 1.0, + 10303975503395430788, + 13846363068497305469, + null, + null, + 176, + 185, + 176, + 185, + 43, + 46, + true, + "1440-1448", + "1440-1448" + ], [ "reference", "url", @@ -86359,19 +86526,40 @@ "TEXT", "#/texts/104", 1.0, - 3534146179424153776, - 7670413446641027165, + 3301781339572596013, + 17531137372088121631, null, null, 187, - 205, + 215, 187, - 205, + 215, + 47, 60, - 70, true, - "https://doi.org/10", - "https://doi.org/10" + "https://doi.org/10.1109/ICCV", + "https://doi.org/10.1109/ICCV" + ], + [ + "reference", + "date", + 12744546813104546377, + "TEXT", + "#/texts/104", + 1.0, + 389609625548777059, + 1587769393776757579, + null, + null, + 216, + 220, + 216, + 220, + 61, + 62, + true, + "2015", + "2015" ], [ "reference", @@ -86478,6 +86666,27 @@ "Jitendra Malik.", "Jitendra Malik." ], + [ + "reference", + "date", + 16061746189176848219, + "TEXT", + "#/texts/105", + 1.0, + 389609625548777061, + 894814354396885943, + null, + null, + 72, + 76, + 72, + 76, + 18, + 19, + true, + "2013", + "2013" + ], [ "reference", "title", @@ -86493,8 +86702,8 @@ 158, 78, 158, - 23, - 33, + 20, + 30, true, "Rich feature hierarchies for accurate object detection and semantic segmentation", "Rich feature hierarchies for accurate object detection and semantic segmentation" @@ -86506,19 +86715,61 @@ "TEXT", "#/texts/105", 1.0, - 14650447850827428066, - 6622884905494035113, + 389609625536419383, + 889446752040326567, null, null, 160, - 168, + 164, 160, - 168, - 34, - 36, + 164, + 31, + 32, + true, + "CoRR", + "CoRR" + ], + [ + "reference", + "date", + 16061746189176848219, + "TEXT", + "#/texts/105", + 1.0, + 3979843797462439752, + 2449824314382216916, + null, + null, + 165, + 178, + 165, + 178, + 32, + 37, true, - "CoRR abs", - "CoRR abs" + "abs/1311.2524", + "abs/1311.2524" + ], + [ + "reference", + "date", + 16061746189176848219, + "TEXT", + "#/texts/105", + 1.0, + 389609625548777061, + 894814354396890826, + null, + null, + 180, + 184, + 180, + 184, + 38, + 39, + true, + "2013", + "2013" ], [ "reference", @@ -86688,6 +86939,48 @@ "Alexander C. Berg.", "Alexander C. Berg." ], + [ + "reference", + "date", + 11872392946390819176, + "TEXT", + "#/texts/106", + 1.0, + 389609625548777056, + 12418382060406794776, + null, + null, + 116, + 120, + 116, + 120, + 29, + 30, + true, + "2016", + "2016" + ], + [ + "reference", + "title", + 11872392946390819176, + "TEXT", + "#/texts/106", + 1.0, + 10201684882899222639, + 16463858842282873959, + null, + null, + 122, + 156, + 122, + 156, + 31, + 37, + true, + "SSD: Single Shot MultiBox Detector", + "SSD: Single Shot MultiBox Detector" + ], [ "reference", "location", @@ -86703,8 +86996,8 @@ 197, 193, 197, - 45, - 46, + 42, + 43, true, "Cham", "Cham" @@ -86724,8 +87017,8 @@ 224, 206, 224, - 53, - 63, + 48, + 57, true, "https://doi.org/10", "https://doi.org/10" @@ -86837,7 +87130,7 @@ ], [ "reference", - "title", + "date", 2956849475535726296, "TEXT", "#/texts/107", @@ -86851,32 +87144,11 @@ 77, 81, 19, - 23, + 20, true, "2016", "2016" ], - [ - "reference", - "journal", - 2956849475535726296, - "TEXT", - "#/texts/107", - 1.0, - 2660903465505862355, - 9582938942600580090, - null, - null, - 83, - 110, - 83, - 110, - 24, - 30, - true, - "You Only Look Once: Unified", - "You Only Look Once: Unified" - ], [ "reference", "title", @@ -86884,19 +87156,19 @@ "TEXT", "#/texts/107", 1.0, - 2467798556476435147, - 10856601719252715708, + 5895818558987270699, + 2974553673873283962, null, null, - 112, + 83, 138, - 112, + 83, 138, - 31, - 36, + 21, + 33, true, - "Real-Time Object Detection", - "Real-Time Object Detection" + "You Only Look Once: Unified, Real-Time Object Detection", + "You Only Look Once: Unified, Real-Time Object Detection" ], [ "reference", @@ -86905,19 +87177,19 @@ "TEXT", "#/texts/107", 1.0, - 7885547286604337816, - 14176715156115962749, + 17631274803144515959, + 18105892991402137032, null, null, - 145, + 140, 203, - 145, + 140, 203, - 41, - 49, + 34, + 43, true, - "IEEE Conference on Computer Vision and Pattern Recognition", - "IEEE Conference on Computer Vision and Pattern Recognition" + "2016 IEEE Conference on Computer Vision and Pattern Recognition", + "2016 IEEE Conference on Computer Vision and Pattern Recognition" ], [ "reference", @@ -86934,12 +87206,33 @@ 209, 205, 209, - 50, - 51, + 44, + 45, true, "CVPR", "CVPR" ], + [ + "reference", + "date", + 2956849475535726296, + "TEXT", + "#/texts/107", + 1.0, + 389609625548777056, + 17837801987031982734, + null, + null, + 212, + 216, + 212, + 216, + 47, + 48, + true, + "2016", + "2016" + ], [ "reference", "citation-number", @@ -87005,7 +87298,7 @@ ], [ "reference", - "title", + "date", 6623297047995432604, "TEXT", "#/texts/108", @@ -87019,7 +87312,28 @@ 35, 39, 9, - 13, + 10, + true, + "2016", + "2016" + ], + [ + "reference", + "date", + 6623297047995432604, + "TEXT", + "#/texts/108", + 1.0, + 389609625548777056, + 2625243571990783197, + null, + null, + 110, + 114, + 110, + 114, + 28, + 29, true, "2016", "2016" @@ -87040,7 +87354,7 @@ 1, 3, 1, - 3, + 2, true, "10", "10" @@ -87060,8 +87374,8 @@ 17, 5, 17, - 4, - 6, + 3, + 5, true, "Shaoqing Ren", "Shaoqing Ren" @@ -87081,8 +87395,8 @@ 29, 19, 29, - 7, - 9, + 6, + 8, true, "Kaiming He", "Kaiming He" @@ -87102,8 +87416,8 @@ 44, 31, 44, - 10, - 12, + 9, + 11, true, "Ross Girshick", "Ross Girshick" @@ -87123,32 +87437,53 @@ 59, 50, 59, - 14, - 17, + 13, + 16, true, "Jian Sun.", "Jian Sun." ], [ "reference", - "title", + "date", 2507285765516108280, "TEXT", "#/texts/109", 1.0, - 18278696908508259685, - 2858836887653185589, + 389609625548777059, + 1924763351573441882, null, null, 60, - 144, + 64, 60, - 144, + 64, + 16, 17, - 37, true, - "2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks", - "2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" + "2015", + "2015" + ], + [ + "reference", + "title", + 2507285765516108280, + "TEXT", + "#/texts/109", + 1.0, + 695901516261617265, + 14331097264748910677, + null, + null, + 66, + 144, + 66, + 144, + 18, + 33, + true, + "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks", + "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" ], [ "reference", @@ -87165,8 +87500,8 @@ 201, 146, 201, - 38, - 47, + 34, + 42, true, "In Advances in Neural Information Processing Systems 28", "In Advances in Neural Information Processing Systems 28" @@ -87178,40 +87513,19 @@ "TEXT", "#/texts/109", 1.0, - 12944083688050190195, - 12953170403535611737, + 3374974501831695503, + 17450904193872703176, null, null, 309, - 337, + 420, 309, - 337, - 89, - 101, - true, - "http://papers.nips.cc/paper/", - "http://papers.nips.cc/paper/" - ], - [ - "reference", - "doi", - 2507285765516108280, - "TEXT", - "#/texts/109", - 1.0, - 12178341415895634440, - 93706065194188109, - null, - null, - 422, - 425, - 422, - 425, - 130, - 131, + 420, + 82, + 119, true, - "pdf", - "pdf" + "http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks", + "http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks" ], [ "reference", @@ -87229,7 +87543,7 @@ 1, 3, 1, - 3, + 2, true, "11", "11" @@ -87249,8 +87563,8 @@ 20, 5, 20, - 4, - 8, + 3, + 7, true, "Peter W J Staar", "Peter W J Staar" @@ -87270,8 +87584,8 @@ 35, 22, 35, - 9, - 11, + 8, + 10, true, "Michele Dolfi", "Michele Dolfi" @@ -87291,8 +87605,8 @@ 51, 37, 51, - 12, - 14, + 11, + 13, true, "Christoph Auer", "Christoph Auer" @@ -87312,15 +87626,15 @@ 70, 57, 70, - 16, - 19, + 15, + 18, true, "Costas Bekas.", "Costas Bekas." ], [ "reference", - "title", + "date", 14905276480471286920, "TEXT", "#/texts/110", @@ -87333,8 +87647,8 @@ 75, 71, 75, + 18, 19, - 23, true, "2018", "2018" @@ -87354,8 +87668,8 @@ 133, 77, 133, - 24, - 32, + 20, + 28, true, "Corpus Conversion Service poster at the SysML conference", "Corpus Conversion Service poster at the SysML conference" @@ -87375,8 +87689,8 @@ 166, 135, 166, - 33, - 49, + 29, + 44, true, "http://www.sysml.cc/doc/ 76.pdf", "http://www.sysml.cc/doc/ 76.pdf" @@ -87397,7 +87711,7 @@ 0, 2, 0, - 2, + 1, true, "72", "72" @@ -87439,7 +87753,7 @@ 0, 4, 0, - 4, + 3, true, "0.97", "0.97" @@ -87460,7 +87774,7 @@ 0, 4, 0, - 4, + 3, true, "0.98", "0.98" @@ -87565,7 +87879,7 @@ 4, 6, 2, - 4, + 3, true, "99", "99" @@ -87607,7 +87921,7 @@ 4, 6, 2, - 4, + 3, true, "98", "98" @@ -87628,7 +87942,7 @@ 0, 2, 0, - 2, + 1, true, "75", "75" @@ -87775,7 +88089,7 @@ 0, 3, 0, - 3, + 1, true, "670", "670" @@ -87922,7 +88236,7 @@ 0, 3, 0, - 3, + 1, true, "325", "325" @@ -88027,7 +88341,7 @@ 0, 2, 0, - 2, + 1, true, "17", "17" @@ -88069,7 +88383,7 @@ 0, 5, 0, - 5, + 1, true, "56460", "56460" @@ -88090,7 +88404,7 @@ 0, 2, 0, - 2, + 1, true, "14", "14" @@ -88216,7 +88530,7 @@ 0, 4, 0, - 4, + 1, true, "4223", "4223" @@ -88237,7 +88551,7 @@ 0, 2, 0, - 2, + 1, true, "26", "26" @@ -88363,7 +88677,7 @@ 0, 4, 0, - 4, + 1, true, "3418", "3418" @@ -88384,7 +88698,7 @@ 0, 3, 0, - 3, + 1, true, "100", "100" @@ -88405,7 +88719,7 @@ 0, 5, 0, - 5, + 3, true, "99.85", "99.85" @@ -88426,7 +88740,7 @@ 0, 3, 0, - 3, + 1, true, "100", "100" @@ -88447,7 +88761,7 @@ 0, 5, 0, - 5, + 3, true, "99.94", "99.94" @@ -88468,7 +88782,7 @@ 0, 5, 0, - 5, + 3, true, "99.24", "99.24" @@ -88489,7 +88803,7 @@ 0, 5, 0, - 5, + 3, true, "99.97", "99.97" @@ -88510,7 +88824,7 @@ 0, 5, 0, - 5, + 3, true, "97.40", "97.40" @@ -88531,7 +88845,7 @@ 0, 5, 0, - 5, + 3, true, "97.52", "97.52" @@ -88552,7 +88866,7 @@ 0, 3, 0, - 3, + 1, true, "100", "100" @@ -88573,7 +88887,7 @@ 0, 5, 0, - 5, + 3, true, "99.99", "99.99" @@ -88594,7 +88908,7 @@ 0, 5, 0, - 5, + 3, true, "99.64", "99.64" @@ -88615,7 +88929,7 @@ 0, 5, 0, - 5, + 3, true, "99.24", "99.24" @@ -88636,7 +88950,7 @@ 0, 2, 0, - 2, + 1, true, "98", "98" @@ -88656,8 +88970,8 @@ 7, 5, 7, + 2, 3, - 5, true, "96", "96" @@ -88678,7 +88992,7 @@ 0, 2, 0, - 2, + 1, true, "99", "99" @@ -88698,8 +89012,8 @@ 7, 5, 7, + 2, 3, - 5, true, "83", "83" @@ -88720,7 +89034,7 @@ 0, 2, 0, - 2, + 1, true, "99", "99" @@ -88740,8 +89054,8 @@ 7, 5, 7, + 2, 3, - 5, true, "46", "46" @@ -88762,7 +89076,7 @@ 0, 2, 0, - 2, + 1, true, "99", "99" @@ -88782,8 +89096,8 @@ 7, 5, 7, + 2, 3, - 5, true, "58", "58" @@ -93723,6 +94037,7 @@ ] }, "sref": "#", + "subj_hash": 18446744073709551615, "tables": [ { "#-cols": 5, @@ -93730,7 +94045,6 @@ "captions": [ { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/54", - "hash": 9160199179916979172, "orig": "Table 1: Time-to-solution and performance results for the Faster RCNN and YOLOv2 models. The training of the models was done on 25000 PDF pages. The prediction (per page) and performance numbers (Recall= \u211b and Precision= \ud835\udcab) were obtained on 5000 page images, where the prediction confidence cutoff was tuned to yield the maximum F1 score for each. All time-to-solution measurements for training were obtained on a POWER8 node with a single Pascal P100 GPU.", "prov": [ { @@ -93738,6 +94052,7 @@ } ], "sref": "#/tables/0/captions/0", + "subj_hash": 9160199179916979172, "text": "Table 1: Time-to-solution and performance results for the Faster RCNN and YOLOv2 models. The training of the models was done on 25000 PDF pages. The prediction (per page) and performance numbers (Recall= \u211b and Precision= \ud835\udcab) were obtained on 5000 page images, where the prediction confidence cutoff was tuned to yield the maximum F1 score for each. All time-to-solution measurements for training were obtained on a POWER8 node with a single Pascal P100 GPU.", "text-hash": 17279509228359814482, "type": "paragraph" @@ -94323,7 +94638,6 @@ ], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/tables/0", "footnotes": [], - "hash": 16709517892596982787, "mentions": [], "prov": [ { @@ -94331,6 +94645,7 @@ } ], "sref": "#/tables/0", + "subj_hash": 16709517892596982787, "type": "table" }, { @@ -94339,7 +94654,6 @@ "captions": [ { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/64", - "hash": 18354136439820865774, "orig": "Table 3: Comparison for two different journal templates showing the aggregated precision and recall averaged over all labels. Each model has been independently trained on a dataset of 400 pages each. The results show that the ML algorithm proves to perform very well for the multiple document templates, simply by providing a different dataset to train on.", "prov": [ { @@ -94347,6 +94661,7 @@ } ], "sref": "#/tables/1/captions/0", + "subj_hash": 18354136439820865774, "text": "Table 3: Comparison for two different journal templates showing the aggregated precision and recall averaged over all labels. Each model has been independently trained on a dataset of 400 pages each. The results show that the ML algorithm proves to perform very well for the multiple document templates, simply by providing a different dataset to train on.", "text-hash": 8085176655901164108, "type": "paragraph" @@ -96907,7 +97222,6 @@ ], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/tables/1", "footnotes": [], - "hash": 16041588621504517180, "mentions": [], "prov": [ { @@ -96915,6 +97229,7 @@ } ], "sref": "#/tables/1", + "subj_hash": 16041588621504517180, "type": "table" }, { @@ -97185,7 +97500,6 @@ ], "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/tables/2", "footnotes": [], - "hash": 14817357053216629605, "mentions": [], "prov": [ { @@ -97193,13 +97507,13 @@ } ], "sref": "#/tables/2", + "subj_hash": 14817357053216629605, "type": "table" } ], "texts": [ { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/0", - "hash": 7377574370756688828, "orig": "arXiv:1806.02284v1 [cs.DL] 24 May 2018", "prov": [ { @@ -97207,13 +97521,13 @@ } ], "sref": "#/texts/0", + "subj_hash": 7377574370756688828, "text": "arXiv:1806.02284v1 [cs.DL] 24 May 2018", "text-hash": 605943372629925146, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/1", - "hash": 10227328696767902037, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "prov": [ { @@ -97221,13 +97535,13 @@ } ], "sref": "#/texts/1", + "subj_hash": 10227328696767902037, "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "title" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/2", - "hash": 8770494724746327817, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "prov": [ { @@ -97235,13 +97549,13 @@ } ], "sref": "#/texts/2", + "subj_hash": 8770494724746327817, "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/3", - "hash": 18258237174351515285, "orig": "taa,dol,cau,bek@zurich.ibm.com", "prov": [ { @@ -97249,13 +97563,13 @@ } ], "sref": "#/texts/3", + "subj_hash": 18258237174351515285, "text": "taa,dol,cau,bek@zurich.ibm.com", "text-hash": 7883794643982446593, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/4", - "hash": 5704354110496947297, "orig": "IBM Research", "prov": [ { @@ -97263,13 +97577,13 @@ } ], "sref": "#/texts/4", + "subj_hash": 5704354110496947297, "text": "IBM Research", "text-hash": 16114797969310195405, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/5", - "hash": 11056873211244709904, "orig": "Rueschlikon, Switzerland", "prov": [ { @@ -97277,13 +97591,13 @@ } ], "sref": "#/texts/5", + "subj_hash": 11056873211244709904, "text": "Rueschlikon, Switzerland", "text-hash": 10483037511456664190, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/6", - "hash": 11788868678004267702, "orig": "ABSTRACT", "prov": [ { @@ -97291,13 +97605,13 @@ } ], "sref": "#/texts/6", + "subj_hash": 11788868678004267702, "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/7", - "hash": 3624246356859711021, "orig": "1 INTRODUCTION", "prov": [ { @@ -97305,13 +97619,13 @@ } ], "sref": "#/texts/7", + "subj_hash": 3624246356859711021, "text": "1 INTRODUCTION", "text-hash": 4359834464932974729, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/8", - "hash": 17999848460847860039, "orig": "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size. Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable. Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging. In this paper, we present a modular, cloud-based platform to ingest documents at scale. This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format. We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "prov": [ { @@ -97319,13 +97633,13 @@ } ], "sref": "#/texts/8", + "subj_hash": 17999848460847860039, "text": "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size. Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable. Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging. In this paper, we present a modular, cloud-based platform to ingest documents at scale. This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format. We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 8142196169563728819, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/9", - "hash": 14387482728083328702, "orig": "ACM Reference Format:", "prov": [ { @@ -97333,13 +97647,13 @@ } ], "sref": "#/texts/9", + "subj_hash": 14387482728083328702, "text": "ACM Reference Format:", "text-hash": 7430992009485070364, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/10", - "hash": 11222145795862225841, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas. 2018. Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.. In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom. ACM, New York, NY, USA, 9 pages. https://doi.org/10. 1145/3219819.3219834", "prov": [ { @@ -97347,13 +97661,13 @@ } ], "sref": "#/texts/10", + "subj_hash": 11222145795862225841, "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas. 2018. Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.. In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom. ACM, New York, NY, USA, 9 pages. https://doi.org/10. 1145/3219819.3219834", "text-hash": 10605881125688857885, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/11", - "hash": 16923207262044929933, "orig": "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$. These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery. It is needless to say that valuable qualitative and quantitative information is contained in many of them. However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout. Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery. In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$. This poses a real problem, since more and more information published in the PDF documents is going dark. In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components. First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML. Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context.", "prov": [ { @@ -97361,13 +97675,13 @@ } ], "sref": "#/texts/11", + "subj_hash": 16923207262044929933, "text": "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$. These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery. It is needless to say that valuable qualitative and quantitative information is contained in many of them. However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout. Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery. In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$. This poses a real problem, since more and more information published in the PDF documents is going dark. In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components. First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML. Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context.", "text-hash": 9516638039579926761, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/12", - "hash": 3749305213430885773, "orig": "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files. The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms. This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components. Each of these microservices can be consumed by its own REST API. This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform. In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures.", "prov": [ { @@ -97375,13 +97689,13 @@ } ], "sref": "#/texts/12", + "subj_hash": 3749305213430885773, "text": "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files. The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms. This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components. Each of these microservices can be consumed by its own REST API. This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform. In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures.", "text-hash": 3945867624210419433, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/13", - "hash": 3409470577915009676, "orig": "To obtain a thorough understanding of what our platform can do and how well it performs, we have structured this paper as follows: In Section 2, we briefly review the current state-of-the-art document", "prov": [ { @@ -97389,13 +97703,13 @@ } ], "sref": "#/texts/13", + "subj_hash": 3409470577915009676, "text": "To obtain a thorough understanding of what our platform can do and how well it performs, we have structured this paper as follows: In Section 2, we briefly review the current state-of-the-art document", "text-hash": 4583103017707584490, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/15", - "hash": 17187299362680072378, "orig": "processing solutions. In Section 3, we present the design of the platform and its components. In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively. Finally, in Section 5, we discuss the open questions w.r.t. research and possible next steps in the development of the platform.", "prov": [ { @@ -97403,13 +97717,13 @@ } ], "sref": "#/texts/14", + "subj_hash": 17187299362680072378, "text": "processing solutions. In Section 3, we present the design of the platform and its components. In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively. Finally, in Section 5, we discuss the open questions w.r.t. research and possible next steps in the development of the platform.", "text-hash": 9243393324994873880, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/16", - "hash": 697648145931166262, "orig": "2 STATE OF THE ART", "prov": [ { @@ -97417,13 +97731,13 @@ } ], "sref": "#/texts/15", + "subj_hash": 697648145931166262, "text": "2 STATE OF THE ART", "text-hash": 2385816824895853732, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/17", - "hash": 7935233310532930917, "orig": "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4]. Broadly speaking, there are two types of approaches to this problem. In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document. This can be done through a conversion from PDF towards HTML or MS Word for example. The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format. For example, this could be a JSON/XML file with a particular schema. Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution.", "prov": [ { @@ -97431,13 +97745,13 @@ } ], "sref": "#/texts/16", + "subj_hash": 7935233310532930917, "text": "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4]. Broadly speaking, there are two types of approaches to this problem. In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document. This can be done through a conversion from PDF towards HTML or MS Word for example. The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format. For example, this could be a JSON/XML file with a particular schema. Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution.", "text-hash": 57757550267838417, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/18", - "hash": 2762070725424637531, "orig": "Many solutions have already been developed that tackle the problem of document conversion. There are well known open-source programs such as Xpdf 3 and Tabula$^{4}$. There are also proprietary solutions, such as Abby$^{5}$, Nuance 6 or DataCap$^{7}$. In contrast to the open-source solutions, all three proprietary solutions support also", "prov": [ { @@ -97445,13 +97759,13 @@ } ], "sref": "#/texts/17", + "subj_hash": 2762070725424637531, "text": "Many solutions have already been developed that tackle the problem of document conversion. There are well known open-source programs such as Xpdf 3 and Tabula$^{4}$. There are also proprietary solutions, such as Abby$^{5}$, Nuance 6 or DataCap$^{7}$. In contrast to the open-source solutions, all three proprietary solutions support also", "text-hash": 5230489225511983287, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/19", - "hash": 7536915191196259776, "orig": "extraction from scanned documents. Besides the well known opensource and proprietary solutions, there are also countless academic solutions as well as libraries. For example, the challenge of segmenting complex page layouts is actively addressed by recurring competitions posed by ICDAR, as in Ref. [1] and previous editions.", "prov": [ { @@ -97459,13 +97773,13 @@ } ], "sref": "#/texts/18", + "subj_hash": 7536915191196259776, "text": "extraction from scanned documents. Besides the well known opensource and proprietary solutions, there are also countless academic solutions as well as libraries. For example, the challenge of segmenting complex page layouts is actively addressed by recurring competitions posed by ICDAR, as in Ref. [1] and previous editions.", "text-hash": 167221319977518894, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/20", - "hash": 11495493007651807568, "orig": "3 PLATFORM DESIGN", "prov": [ { @@ -97473,13 +97787,13 @@ } ], "sref": "#/texts/19", + "subj_hash": 11495493007651807568, "text": "3 PLATFORM DESIGN", "text-hash": 10322960049580053438, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/21", - "hash": 7650015170039242996, "orig": "Given the plethora of existing solutions, we would like to point out how our solution differs from these, and thus approaches the problem of document conversion in a new way.", "prov": [ { @@ -97487,13 +97801,13 @@ } ], "sref": "#/texts/20", + "subj_hash": 7650015170039242996, "text": "Given the plethora of existing solutions, we would like to point out how our solution differs from these, and thus approaches the problem of document conversion in a new way.", "text-hash": 333520156392116834, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/22", - "hash": 14959508657858158650, "orig": "The key idea is that we do not write any rule-based conversion algorithms, but rather utilize generic machine learning algorithms which produce models that can be easily and quickly trained on ground-truth acquired via human annotation. This flexible mechanism allows us to adapt very quickly to certain templates of documents, achieve very accurate results and ultimately eliminates the time-consuming and costly tuning of traditional rule-based conversion algorithms. This approach is in stark contrast to the previously mentioned state of the art conversion systems, which are all rulebased.", "prov": [ { @@ -97501,13 +97815,13 @@ } ], "sref": "#/texts/21", + "subj_hash": 14959508657858158650, "text": "The key idea is that we do not write any rule-based conversion algorithms, but rather utilize generic machine learning algorithms which produce models that can be easily and quickly trained on ground-truth acquired via human annotation. This flexible mechanism allows us to adapt very quickly to certain templates of documents, achieve very accurate results and ultimately eliminates the time-consuming and costly tuning of traditional rule-based conversion algorithms. This approach is in stark contrast to the previously mentioned state of the art conversion systems, which are all rulebased.", "text-hash": 6868109665737773720, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/23", - "hash": 10379300903412882972, "orig": "While the approach of swapping rule based solutions with machine learning solutions might appear very natural in the current era of artificial intelligence, it has some serious consequences with regard to its design. First of all, one can not think anymore at the level of a single document. Rather, one should think at the level of a collection of documents (or a corpus of documents). A machine learned model for a single document is not very useful, but a machine learned model for a certain type of documents (e.g. scientific articles, patents, regulations, contracts, etc.) obviously is. This is the first big distinction between the current existing solutions and ours: Existing solutions take one document at a time (no matter its origin) and convert it to a desired output format. Our solution can ingest an entire collection of documents and build machine learned models on top of that. Of course, once the the model is trained, one can convert documents one at a time, too.", "prov": [ { @@ -97515,13 +97829,13 @@ } ], "sref": "#/texts/22", + "subj_hash": 10379300903412882972, "text": "While the approach of swapping rule based solutions with machine learning solutions might appear very natural in the current era of artificial intelligence, it has some serious consequences with regard to its design. First of all, one can not think anymore at the level of a single document. Rather, one should think at the level of a collection of documents (or a corpus of documents). A machine learned model for a single document is not very useful, but a machine learned model for a certain type of documents (e.g. scientific articles, patents, regulations, contracts, etc.) obviously is. This is the first big distinction between the current existing solutions and ours: Existing solutions take one document at a time (no matter its origin) and convert it to a desired output format. Our solution can ingest an entire collection of documents and build machine learned models on top of that. Of course, once the the model is trained, one can convert documents one at a time, too.", "text-hash": 11150916691880738938, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/25", - "hash": 4994395008195818594, "orig": "A second discriminator between the existing solutions and ours is that we need to provide the tools to gather ground-truth, since no model can be trained without it. Hence, not only do we need the ability to manage collections of documents, we also need the ability for people to annotate documents and store these annotations in an efficient way. These annotations are then used as ground-truth data to train models. It is clear then that ML models add an extra level of complexity: One has to provide the ability to store a collection of documents, annotate these documents, store the annotations, train a model and ultimately apply this model on unseen documents. For the authors of this paper, it was therefore evident that our solution cannot be a monolithic application. It fits much better the concept of a cloud-based platform that can execute the previously mentioned tasks in an efficient and scalable way.", "prov": [ { @@ -97529,13 +97843,13 @@ } ], "sref": "#/texts/23", + "subj_hash": 4994395008195818594, "text": "A second discriminator between the existing solutions and ours is that we need to provide the tools to gather ground-truth, since no model can be trained without it. Hence, not only do we need the ability to manage collections of documents, we also need the ability for people to annotate documents and store these annotations in an efficient way. These annotations are then used as ground-truth data to train models. It is clear then that ML models add an extra level of complexity: One has to provide the ability to store a collection of documents, annotate these documents, store the annotations, train a model and ultimately apply this model on unseen documents. For the authors of this paper, it was therefore evident that our solution cannot be a monolithic application. It fits much better the concept of a cloud-based platform that can execute the previously mentioned tasks in an efficient and scalable way.", "text-hash": 16536368219630364368, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/26", - "hash": 4203835122307823579, "orig": "3.1 Components", "prov": [ { @@ -97543,13 +97857,13 @@ } ], "sref": "#/texts/24", + "subj_hash": 4203835122307823579, "text": "3.1 Components", "text-hash": 3789103236857293111, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/27", - "hash": 13520362244078084911, "orig": "Our platform implements a processing pipeline to ingest, manage, parse, annotate, train and eventually convert the data contained in any type of format (scanned or programmatically created PDF, bitmap images, Word documents, etc.) into a structured data format (e.g. JSON or XML).", "prov": [ { @@ -97557,13 +97871,13 @@ } ], "sref": "#/texts/25", + "subj_hash": 13520362244078084911, "text": "Our platform implements a processing pipeline to ingest, manage, parse, annotate, train and eventually convert the data contained in any type of format (scanned or programmatically created PDF, bitmap images, Word documents, etc.) into a structured data format (e.g. JSON or XML).", "text-hash": 12910497814715733387, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/28", - "hash": 1749622367305947670, "orig": "This processing pipeline is formed by five components as depicted in Figure 1: (1) parsing of documents into an internal format optimised for ML, (2) Annotation of the label ground-truth in parsed documents (3) training ML models from the acquired annotations, (4) applying the custom ML model(s), (5) assembling the document(s) into a structured data format. If a trained model is available, only components 1, 4 and 5 are needed to convert the documents. If no template-specific machine learned model is available yet, we provide two additional components 2 and 3, that allow users to gather ground-truth and train custom models. It is important to note that the platform comes with default models, so annotation and training are advised to retrieve the best quality output, yet they are optional.", "prov": [ { @@ -97571,13 +97885,13 @@ } ], "sref": "#/texts/26", + "subj_hash": 1749622367305947670, "text": "This processing pipeline is formed by five components as depicted in Figure 1: (1) parsing of documents into an internal format optimised for ML, (2) Annotation of the label ground-truth in parsed documents (3) training ML models from the acquired annotations, (4) applying the custom ML model(s), (5) assembling the document(s) into a structured data format. If a trained model is available, only components 1, 4 and 5 are needed to convert the documents. If no template-specific machine learned model is available yet, we provide two additional components 2 and 3, that allow users to gather ground-truth and train custom models. It is important to note that the platform comes with default models, so annotation and training are advised to retrieve the best quality output, yet they are optional.", "text-hash": 1334541935326461060, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/30", - "hash": 11083736481641202939, "orig": "Let us now elaborate on what each of the five components deliver in the rest of this section.", "prov": [ { @@ -97585,13 +97899,13 @@ } ], "sref": "#/texts/27", + "subj_hash": 11083736481641202939, "text": "Let us now elaborate on what each of the five components deliver in the rest of this section.", "text-hash": 10456209429844276823, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/31", - "hash": 15403141463083979171, "orig": "3.2 Parsing of Documents", "prov": [ { @@ -97599,13 +97913,13 @@ } ], "sref": "#/texts/28", + "subj_hash": 15403141463083979171, "text": "3.2 Parsing of Documents", "text-hash": 6127225399482532623, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/32", - "hash": 12234429517419341922, "orig": "In the parsing component of the pipeline, we solve the following straightforward but non-trivial task: Find the bounding boxes of all text-snippets that appear on each PDF page. For simplicity, we will refer to the bounding boxes of the text-snippets as cells in the remainder of the paper. There are two reasons why we are interested in these cells. First, they provide us with the crucial geometric features which are later used in the machine learning models to determine the layout semantic label. Second, the concept of a cell can be easily transferred to scanned documents. In Figure 2, we show the cells obtained from an example PDF page after the parsing stage.", "prov": [ { @@ -97613,13 +97927,13 @@ } ], "sref": "#/texts/29", + "subj_hash": 12234429517419341922, "text": "In the parsing component of the pipeline, we solve the following straightforward but non-trivial task: Find the bounding boxes of all text-snippets that appear on each PDF page. For simplicity, we will refer to the bounding boxes of the text-snippets as cells in the remainder of the paper. There are two reasons why we are interested in these cells. First, they provide us with the crucial geometric features which are later used in the machine learning models to determine the layout semantic label. Second, the concept of a cell can be easily transferred to scanned documents. In Figure 2, we show the cells obtained from an example PDF page after the parsing stage.", "text-hash": 13908173772261346000, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/33", - "hash": 16957857111665886816, "orig": "While the task of finding the cells might appear intuitive from a conceptual point of view, it is not in practice, since there does not exist a unique, precise definition of the cells. This lack of a precise definition has its origins not only in the ISO-standard 8 detailing the PDF document code but also in the variability of the quality of PDFs. Older PDFs which were created from scanned images using OCR typically return cells for each word, while more recent PDFs allow us to create cells for full text-lines. This variability in the geometric features of the cell (e.g. the width of the cell) can negatively impact the performance of later machine learning models. As a consequence, we reduce the variability of the geometric features as much as possible. The more consistent and homogeneous the geometric features of a cell are, the better the machine learning algorithms can do predictions.", "prov": [ { @@ -97627,13 +97941,13 @@ } ], "sref": "#/texts/30", + "subj_hash": 16957857111665886816, "text": "While the task of finding the cells might appear intuitive from a conceptual point of view, it is not in practice, since there does not exist a unique, precise definition of the cells. This lack of a precise definition has its origins not only in the ISO-standard 8 detailing the PDF document code but also in the variability of the quality of PDFs. Older PDFs which were created from scanned images using OCR typically return cells for each word, while more recent PDFs allow us to create cells for full text-lines. This variability in the geometric features of the cell (e.g. the width of the cell) can negatively impact the performance of later machine learning models. As a consequence, we reduce the variability of the geometric features as much as possible. The more consistent and homogeneous the geometric features of a cell are, the better the machine learning algorithms can do predictions.", "text-hash": 9481411723883903182, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/34", - "hash": 10390915169360946497, "orig": "For programmatic PDFs, the text cells are contructed from raw streams of symbols and transforms defined in the PDF document. This operation relies on the iterators provided by the QPDF library$^{9}$.", "prov": [ { @@ -97641,13 +97955,13 @@ } ], "sref": "#/texts/31", + "subj_hash": 10390915169360946497, "text": "For programmatic PDFs, the text cells are contructed from raw streams of symbols and transforms defined in the PDF document. This operation relies on the iterators provided by the QPDF library$^{9}$.", "text-hash": 11149022357700220845, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/35", - "hash": 15254383206256494278, "orig": "For scanned PDFs, we use a two step approach to find the cells by first running all bitmap resources in the PDF through an OCR engine and then merging the extracted text-snippets from the images with the remaining cells from the programmatically created content. Eventually, all the created cells and line paths are stored in an internal JSON format, which also keeps references to the bitmap resources embedded in the PDF document. From this point, all further processing does not need to distinguish between scanned or programmatic sources.", "prov": [ { @@ -97655,13 +97969,13 @@ } ], "sref": "#/texts/32", + "subj_hash": 15254383206256494278, "text": "For scanned PDFs, we use a two step approach to find the cells by first running all bitmap resources in the PDF through an OCR engine and then merging the extracted text-snippets from the images with the remaining cells from the programmatically created content. Eventually, all the created cells and line paths are stored in an internal JSON format, which also keeps references to the bitmap resources embedded in the PDF document. From this point, all further processing does not need to distinguish between scanned or programmatic sources.", "text-hash": 6573226034038831156, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/36", - "hash": 17759618186065566858, "orig": "3.3 Ground-truth gathering through human-annotation", "prov": [ { @@ -97669,13 +97983,13 @@ } ], "sref": "#/texts/33", + "subj_hash": 17759618186065566858, "text": "3.3 Ground-truth gathering through human-annotation", "text-hash": 8679681341332585960, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/37", - "hash": 11638821473906997927, "orig": "In this component, we collect ground-truth for the custom machine learning models to be trained on. Representative ground-truth data is of paramount importance to obtain machine learned models with excellent recall and precision. Unfortunately, it is often very hard to obtain lots of representative ground-truth data, primarily due the the enormous variability across the layout of documents. As a consequence, the concept of annotators for documents were incorporated into the platform from the very beginning. The purpose of these annotators is two-fold.", "prov": [ { @@ -97683,13 +97997,13 @@ } ], "sref": "#/texts/34", + "subj_hash": 11638821473906997927, "text": "In this component, we collect ground-truth for the custom machine learning models to be trained on. Representative ground-truth data is of paramount importance to obtain machine learned models with excellent recall and precision. Unfortunately, it is often very hard to obtain lots of representative ground-truth data, primarily due the the enormous variability across the layout of documents. As a consequence, the concept of annotators for documents were incorporated into the platform from the very beginning. The purpose of these annotators is two-fold.", "text-hash": 14503768930839698451, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/38", - "hash": 13020065077657899116, "orig": "First and foremost, the annotators on the platform allow us to gather ground-truth at scale using a crowd-sourcing approach. In each annotation task, we retrieve the original PDF page and its associated parsed components, containing the cells (see Figure 2). We then ask the (human) annotator to assign each cell a layout semantic label. Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$. In the annotator tool, each layout semantic label is visually represented by a colour. By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3. Since humans are very efficient in visual recognition, this task comes very natural to us. The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns.", "prov": [ { @@ -97697,13 +98011,13 @@ } ], "sref": "#/texts/35", + "subj_hash": 13020065077657899116, "text": "First and foremost, the annotators on the platform allow us to gather ground-truth at scale using a crowd-sourcing approach. In each annotation task, we retrieve the original PDF page and its associated parsed components, containing the cells (see Figure 2). We then ask the (human) annotator to assign each cell a layout semantic label. Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$. In the annotator tool, each layout semantic label is visually represented by a colour. By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3. Since humans are very efficient in visual recognition, this task comes very natural to us. The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns.", "text-hash": 13130850271187616458, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/39", - "hash": 10103841011442966464, "orig": "The second purpose of the annotators is to visually inspect the quality of our machine learned models. The goal of the models is to emulate the action of the annotators, i.e. to assign a layout semantic label to each cell. Clearly, the result of a prediction for each page can therefore be displayed as if it were an annotated page. This allows the users to directly inspect the results of the models on unseen pages. A direct consequence of this inspection capability in the annotators is that the annotation task can be transformed easily into a correction task, i.e. the human annotators only need to correct the incorrectly predicted labels. Of course, as the models become better over time, the number of corrections needed to be made become less and less. This allows us to significantly reduce the annotation time per document. Since annotations are typically created by professionals with a high hourly rate, the colouring technique allowed us to significantly reduce the cost of ground-truth gathering.", "prov": [ { @@ -97711,13 +98025,13 @@ } ], "sref": "#/texts/36", + "subj_hash": 10103841011442966464, "text": "The second purpose of the annotators is to visually inspect the quality of our machine learned models. The goal of the models is to emulate the action of the annotators, i.e. to assign a layout semantic label to each cell. Clearly, the result of a prediction for each page can therefore be displayed as if it were an annotated page. This allows the users to directly inspect the results of the models on unseen pages. A direct consequence of this inspection capability in the annotators is that the annotation task can be transformed easily into a correction task, i.e. the human annotators only need to correct the incorrectly predicted labels. Of course, as the models become better over time, the number of corrections needed to be made become less and less. This allows us to significantly reduce the annotation time per document. Since annotations are typically created by professionals with a high hourly rate, the colouring technique allowed us to significantly reduce the cost of ground-truth gathering.", "text-hash": 11435379797753757998, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/40", - "hash": 10982401368140758581, "orig": "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute. The vertical red lines indicate that a training was performed on the annotated pages, and a new, improved model is", "prov": [ { @@ -97725,13 +98039,13 @@ } ], "sref": "#/texts/37", + "subj_hash": 10982401368140758581, "text": "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute. The vertical red lines indicate that a training was performed on the annotated pages, and a new, improved model is", "text-hash": 10548529097098469537, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/42", - "hash": 887751753527930563, "orig": "used from that point to predict the labels. Since the corrections become less and less, the rate of annotation goes up. It is needless to say that this inter-leaving of training models (based on annotated ground-truth) and annotation benefits directly from our platform approach, since each task (submitting page-annotations, training the model, applying the model for predicting the labels) comes down to an asynchronous call to a microservice. The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection.", "prov": [ { @@ -97739,13 +98053,13 @@ } ], "sref": "#/texts/38", + "subj_hash": 887751753527930563, "text": "used from that point to predict the labels. Since the corrections become less and less, the rate of annotation goes up. It is needless to say that this inter-leaving of training models (based on annotated ground-truth) and annotation benefits directly from our platform approach, since each task (submitting page-annotations, training the model, applying the model for predicting the labels) comes down to an asynchronous call to a microservice. The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection.", "text-hash": 2205427981859754031, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/43", - "hash": 4695688617288377564, "orig": "3.4 Machine Learning: Training models & Applying models", "prov": [ { @@ -97753,13 +98067,13 @@ } ], "sref": "#/texts/39", + "subj_hash": 4695688617288377564, "text": "3.4 Machine Learning: Training models & Applying models", "text-hash": 16834670239362291258, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/44", - "hash": 3275001812318455279, "orig": "In the CCS, there are essentially two types of machine-learning models. On the one hand, we have default models, which are designed to be layout independent. They take a raster image of the page to identify and locate basic objects, such as tables, figures, formulas, etc. On the other hand, we also support the training of custom, templatespecific models, which are designed to specialize on a particular layout template and allow us to convert and extract the data out of documents with very high precision and recall. They will classify each cell in the page with regard to their layout semantic label.", "prov": [ { @@ -97767,13 +98081,13 @@ } ], "sref": "#/texts/40", + "subj_hash": 3275001812318455279, "text": "In the CCS, there are essentially two types of machine-learning models. On the one hand, we have default models, which are designed to be layout independent. They take a raster image of the page to identify and locate basic objects, such as tables, figures, formulas, etc. On the other hand, we also support the training of custom, templatespecific models, which are designed to specialize on a particular layout template and allow us to convert and extract the data out of documents with very high precision and recall. They will classify each cell in the page with regard to their layout semantic label.", "text-hash": 4429706140044408651, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/45", - "hash": 15354930767839681193, "orig": "3.4.1 Metrics. Before discussing the performance of the models, let us first define the precision and recall metrics used to evaluate the results. The first observation is that the output of a machine learned model is exactly the same of what a human annotator would produce, i.e. it will assign a text cell a semantic label. The correctness of this label is what we aim to measure with the recall and precision metrics. The second observation is that we deal with a", "prov": [ { @@ -97781,13 +98095,13 @@ } ], "sref": "#/texts/41", + "subj_hash": 15354930767839681193, "text": "3.4.1 Metrics. Before discussing the performance of the models, let us first define the precision and recall metrics used to evaluate the results. The first observation is that the output of a machine learned model is exactly the same of what a human annotator would produce, i.e. it will assign a text cell a semantic label. The correctness of this label is what we aim to measure with the recall and precision metrics. The second observation is that we deal with a", "text-hash": 6184852591532473349, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/47", - "hash": 6337233386759158728, "orig": "multi-class classification problem, i.e. we don't have only two labels, but many possible semantic labels, hence the performance result will be the average of the recall and precision for each label.", "prov": [ { @@ -97795,13 +98109,13 @@ } ], "sref": "#/texts/42", + "subj_hash": 6337233386759158728, "text": "multi-class classification problem, i.e. we don't have only two labels, but many possible semantic labels, hence the performance result will be the average of the recall and precision for each label.", "text-hash": 15490331838172880166, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/48", - "hash": 2249972239307071508, "orig": "The recall (= \u211b) and precision (= \ud835\udcab) for a given label on a page is defined by the standard formulas", "prov": [ { @@ -97809,13 +98123,13 @@ } ], "sref": "#/texts/43", + "subj_hash": 2249972239307071508, "text": "The recall (= \u211b) and precision (= \ud835\udcab) for a given label on a page is defined by the standard formulas", "text-hash": 1131271437908497026, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/49", - "hash": 12383805870947794174, "orig": "\u211b = t$_{p}$ t$_{p}$ + f$_{p}$ , \ud835\udcab = t$_{p}$ t$_{p}$ + f$_{n}$ , (1)", "prov": [ { @@ -97823,13 +98137,13 @@ } ], "sref": "#/texts/44", + "subj_hash": 12383805870947794174, "text": "\u211b = t$_{p}$ t$_{p}$ + f$_{p}$ , \ud835\udcab = t$_{p}$ t$_{p}$ + f$_{n}$ , (1)", "text-hash": 14055366495763095132, "type": "equation" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/50", - "hash": 7053654953998543393, "orig": "where t$_{p}$, f$_{p}$ and f$_{n}$ represent respectively true positive, false positive and false negative predicted labels.", "prov": [ { @@ -97837,13 +98151,13 @@ } ], "sref": "#/texts/45", + "subj_hash": 7053654953998543393, "text": "where t$_{p}$, f$_{p}$ and f$_{n}$ represent respectively true positive, false positive and false negative predicted labels.", "text-hash": 642098605774556301, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/51", - "hash": 15921044595687116426, "orig": "3.4.2 Default Models. The aim of the default models is to identify specific, ubiquitous objects in documents. Examples of such objects are tables, figures with their captions, mathematical formulas, etc. Due to the high variability in both the document layout as well as in the representation of these objects, we need very robust object detection methods. Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7]. On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions.", "prov": [ { @@ -97851,13 +98165,13 @@ } ], "sref": "#/texts/46", + "subj_hash": 15921044595687116426, "text": "3.4.2 Default Models. The aim of the default models is to identify specific, ubiquitous objects in documents. Examples of such objects are tables, figures with their captions, mathematical formulas, etc. Due to the high variability in both the document layout as well as in the representation of these objects, we need very robust object detection methods. Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7]. On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions.", "text-hash": 5618307884355612648, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/52", - "hash": 12234068400463628788, "orig": "In this paper, we will focus only on the detection of table objects, but the same principles described in the following analysis are also applied for other type of objects.", "prov": [ { @@ -97865,13 +98179,13 @@ } ], "sref": "#/texts/47", + "subj_hash": 12234068400463628788, "text": "In this paper, we will focus only on the detection of table objects, but the same principles described in the following analysis are also applied for other type of objects.", "text-hash": 13907813772802190178, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/53", - "hash": 4628466594790006384, "orig": "The networks available on our platform have been trained on arXiv data$^{11}$. We have annotated 30000 PDF pages and know the", "prov": [ { @@ -97879,13 +98193,13 @@ } ], "sref": "#/texts/48", + "subj_hash": 4628466594790006384, "text": "The networks available on our platform have been trained on arXiv data$^{11}$. We have annotated 30000 PDF pages and know the", "text-hash": 16911352314006995166, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/55", - "hash": 9651706913678711778, "orig": "location of at least one table on each page. From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation. Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms.", "prov": [ { @@ -97893,13 +98207,13 @@ } ], "sref": "#/texts/49", + "subj_hash": 9651706913678711778, "text": "location of at least one table on each page. From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation. Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms.", "text-hash": 11888191065829014864, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/56", - "hash": 1363251178266051349, "orig": "We do not locate the table directly on the image of the original PDF page but rather on an image representation of the parsed PDF page with cell boxes. The reasoning behind this is to reduce the variability between all input PDF pages as much as possible and thus increase the effectiveness of the deep neural networks. An example of such an image can be seen in Figure 5. The red bounding boxes around the tables are a result of the prediction using YOLOv2 and are absent in the image on which the model predicts. Note that the visualisation of the text cells visible in Figure 5 does not include any text of the original document, but only its geometrical definition. This is important when one compares for example Asian documents with Japanese, Chinese or Korean characters versus European languages with the roman alphabet. We do not want the deep neural network to focus on the specific characters, but rather on the layout of the cells in the page.", "prov": [ { @@ -97907,13 +98221,13 @@ } ], "sref": "#/texts/50", + "subj_hash": 1363251178266051349, "text": "We do not locate the table directly on the image of the original PDF page but rather on an image representation of the parsed PDF page with cell boxes. The reasoning behind this is to reduce the variability between all input PDF pages as much as possible and thus increase the effectiveness of the deep neural networks. An example of such an image can be seen in Figure 5. The red bounding boxes around the tables are a result of the prediction using YOLOv2 and are absent in the image on which the model predicts. Note that the visualisation of the text cells visible in Figure 5 does not include any text of the original document, but only its geometrical definition. This is important when one compares for example Asian documents with Japanese, Chinese or Korean characters versus European languages with the roman alphabet. We do not want the deep neural network to focus on the specific characters, but rather on the layout of the cells in the page.", "text-hash": 2009046567395259777, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/57", - "hash": 18259197018396996238, "orig": "Let us now discuss both deep neural network training microservices on the platform. In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision. In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times. We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied. We believe that this is the main origin for the discrepancy of time-to-solution for the training phase. The same holds true for the prediction. Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", "prov": [ { @@ -97921,13 +98235,13 @@ } ], "sref": "#/texts/51", + "subj_hash": 18259197018396996238, "text": "Let us now discuss both deep neural network training microservices on the platform. In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision. In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times. We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied. We believe that this is the main origin for the discrepancy of time-to-solution for the training phase. The same holds true for the prediction. Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", "text-hash": 7883278994224882668, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/58", - "hash": 14663676516964431047, "orig": "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously. The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1. We use these bounding boxes to associate with each cell a label, which is in this particular case either Table or Not-Table, depending on whether they overlap", "prov": [ { @@ -97935,13 +98249,13 @@ } ], "sref": "#/texts/52", + "subj_hash": 14663676516964431047, "text": "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously. The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1. We use these bounding boxes to associate with each cell a label, which is in this particular case either Table or Not-Table, depending on whether they overlap", "text-hash": 7164504172498806323, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/59", - "hash": 4577067829072175096, "orig": "Table 2: Performance results for the template specific model of the Physical Review B journals. The confusion matrix highlights the huge imbalance between the number of text cells with different labels. The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types.", "prov": [ { @@ -97949,13 +98263,13 @@ } ], "sref": "#/texts/53", + "subj_hash": 4577067829072175096, "text": "Table 2: Performance results for the template specific model of the Physical Review B journals. The confusion matrix highlights the huge imbalance between the number of text cells with different labels. The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types.", "text-hash": 3406859306294395222, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/60", - "hash": 2569392033451362672, "orig": "with the predicted bounding box. The corresponding recall and precision are then computed for this dual-class classification problem. In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence. For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0. 5. The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers. We believe this originates from the selective search algorithm which is used to determine regions of interest. The images we feed it are not typical photographic images (made with a camera) but layout visualisations. The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", "prov": [ { @@ -97963,13 +98277,13 @@ } ], "sref": "#/texts/54", + "subj_hash": 2569392033451362672, "text": "with the predicted bounding box. The corresponding recall and precision are then computed for this dual-class classification problem. In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence. For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0. 5. The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers. We believe this originates from the selective search algorithm which is used to determine regions of interest. The images we feed it are not typical photographic images (made with a camera) but layout visualisations. The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", "text-hash": 5414143675771382750, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/61", - "hash": 14539041145469267811, "orig": "3.4.3 Template specific Models. The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template. This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance. Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality.", "prov": [ { @@ -97977,13 +98291,13 @@ } ], "sref": "#/texts/55", + "subj_hash": 14539041145469267811, "text": "3.4.3 Template specific Models. The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template. This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance. Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality.", "text-hash": 6991735551340401103, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/62", - "hash": 8607014065143641201, "orig": "For an algorithm to fit in the interactive platform design we identified a few key requirements. First, it is crucial that the model can generate good results with a limited set of pages. In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation. Second it must be robust against extreme imbalance of the labeled data. It is clear that cells of the label Title will be much more uncommon than cells with the label of Text. Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process.", "prov": [ { @@ -97991,13 +98305,13 @@ } ], "sref": "#/texts/56", + "subj_hash": 8607014065143641201, "text": "For an algorithm to fit in the interactive platform design we identified a few key requirements. First, it is crucial that the model can generate good results with a limited set of pages. In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation. Second it must be robust against extreme imbalance of the labeled data. It is clear that cells of the label Title will be much more uncommon than cells with the label of Text. Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process.", "text-hash": 17832237182951286493, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/63", - "hash": 1994904537764312371, "orig": "For these reasons, we chose random forest [2] as a machine learning algorithm for template specific models. Random forest algorithms are known to be trained fast and can produce very accurate results on limited, but relatively structured data. In our case,", "prov": [ { @@ -98005,13 +98319,13 @@ } ], "sref": "#/texts/57", + "subj_hash": 1994904537764312371, "text": "For these reasons, we chose random forest [2] as a machine learning algorithm for template specific models. Random forest algorithms are known to be trained fast and can produce very accurate results on limited, but relatively structured data. In our case,", "text-hash": 1377511684573734815, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/65", - "hash": 7742256726079628058, "orig": "this structure originates of course from the template. Furthermore, random forest is an ensemble method, meaning that they learn on the distribution function of the features, and not individual dataelements. As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised.", "prov": [ { @@ -98019,13 +98333,13 @@ } ], "sref": "#/texts/58", + "subj_hash": 7742256726079628058, "text": "this structure originates of course from the template. Furthermore, random forest is an ensemble method, meaning that they learn on the distribution function of the features, and not individual dataelements. As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised.", "text-hash": 250119056806139256, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/66", - "hash": 8810233123818174294, "orig": "The random forest method is applied to each cell of the page based on a feature vector representing all of its properties. For example, the feature vector contains information as the page number, the size of the text cell, its position, as well as the distance from the neighbouring cells. Additionally to pure geometrical information we include the text style (normal, italic, or bold) and some text statistics, as the fraction of numeric characters. We then improve the obtained results by performing subsequent iterations with other random forest methods, which operate on an enlarged feature space including the previously predicted labels of the neighbourhood around the current cell.", "prov": [ { @@ -98033,13 +98347,13 @@ } ], "sref": "#/texts/59", + "subj_hash": 8810233123818174294, "text": "The random forest method is applied to each cell of the page based on a feature vector representing all of its properties. For example, the feature vector contains information as the page number, the size of the text cell, its position, as well as the distance from the neighbouring cells. Additionally to pure geometrical information we include the text style (normal, italic, or bold) and some text statistics, as the fraction of numeric characters. We then improve the obtained results by performing subsequent iterations with other random forest methods, which operate on an enlarged feature space including the previously predicted labels of the neighbourhood around the current cell.", "text-hash": 17619932035192809924, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/67", - "hash": 16446711449286912460, "orig": "It is important to realize that almost all of these features are purely geometrical. This allows us to apply exactly the same machine learning methods on both scanned and programmatic PDF documents.", "prov": [ { @@ -98047,13 +98361,13 @@ } ], "sref": "#/texts/60", + "subj_hash": 16446711449286912460, "text": "It is important to realize that almost all of these features are purely geometrical. This allows us to apply exactly the same machine learning methods on both scanned and programmatic PDF documents.", "text-hash": 9704353849744984874, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/68", - "hash": 9558434107504657973, "orig": "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$. We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels. Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label. We observe that the recall and precision numbers are excellent, with most of them above 99%. This is not surprising, since we are building models that specialise for a particular template.", "prov": [ { @@ -98061,13 +98375,13 @@ } ], "sref": "#/texts/61", + "subj_hash": 9558434107504657973, "text": "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$. We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels. Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label. We observe that the recall and precision numbers are excellent, with most of them above 99%. This is not surprising, since we are building models that specialise for a particular template.", "text-hash": 11971893452237256865, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/69", - "hash": 18349896906192842040, "orig": "Moreover, the same ML algorithm proves to perform very well on different document templates, as is evident from the numbers shown in Table 3, simply by providing it with different datasets to train on. The latter is the power of our platform: we can re-use the same machine-learning algorithm to generate different models solely based on the data gathered by the annotation on the platform. We do not need to define rules and heuristics or update code in order to deal with new types of documents. We only need to gather more data.", "prov": [ { @@ -98075,13 +98389,13 @@ } ], "sref": "#/texts/62", + "subj_hash": 18349896906192842040, "text": "Moreover, the same ML algorithm proves to perform very well on different document templates, as is evident from the numbers shown in Table 3, simply by providing it with different datasets to train on. The latter is the power of our platform: we can re-use the same machine-learning algorithm to generate different models solely based on the data gathered by the annotation on the platform. We do not need to define rules and heuristics or update code in order to deal with new types of documents. We only need to gather more data.", "text-hash": 8080940474762743702, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/70", - "hash": 10082834006373808153, "orig": "3.5 Assembly", "prov": [ { @@ -98089,13 +98403,13 @@ } ], "sref": "#/texts/63", + "subj_hash": 10082834006373808153, "text": "3.5 Assembly", "text-hash": 11736313095563614837, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/71", - "hash": 15253541252152665681, "orig": "In this component, we build a structured data file in JSON or XML format, which contains all the text and objects (e.g. tables) from the original document, retaining the layout semantics. This structured data file is constructed by assembling all the cells from the parsed file", "prov": [ { @@ -98103,13 +98417,13 @@ } ], "sref": "#/texts/64", + "subj_hash": 15253541252152665681, "text": "In this component, we build a structured data file in JSON or XML format, which contains all the text and objects (e.g. tables) from the original document, retaining the layout semantics. This structured data file is constructed by assembling all the cells from the parsed file", "text-hash": 6565628665194191037, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/72", - "hash": 3904142170608486950, "orig": "Listing 1: Excerpt from the JSON output of the Corpus Conversion Service after conversion of this paper.", "prov": [ { @@ -98117,13 +98431,13 @@ } ], "sref": "#/texts/65", + "subj_hash": 3904142170608486950, "text": "Listing 1: Excerpt from the JSON output of the Corpus Conversion Service after conversion of this paper.", "text-hash": 4079383948124449940, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/73", - "hash": 6410818076508661508, "orig": "{ 'description ': { 'title ': 'Corpus Conversion Service: A machine learning platform to ingest documents at scale. ',, \u2192 'abstract ': 'Over the past few decades, the amount of scientific articles [...] ',, \u2192 'affiliations ': 'IBM Research Rueschlikon, Switzerland ', 'authors ': 'Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas ', \u2192 }, 'main-text ': [{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...], 'tables ': [{...},...], 'images ': [{...},...] }", "prov": [ { @@ -98131,13 +98445,13 @@ } ], "sref": "#/texts/66", + "subj_hash": 6410818076508661508, "text": "{ 'description ': { 'title ': 'Corpus Conversion Service: A machine learning platform to ingest documents at scale. ',, \u2192 'abstract ': 'Over the past few decades, the amount of scientific articles [...] ',, \u2192 'affiliations ': 'IBM Research Rueschlikon, Switzerland ', 'authors ': 'Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas ', \u2192 }, 'main-text ': [{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...], 'tables ': [{...},...], 'images ': [{...},...] }", "text-hash": 15129105844666734962, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/74", - "hash": 12813875992986832439, "orig": "in combination with their associated predicted (or human-annotated) layout semantic labels. It should be noted that no machine learning is used in this component. It is purely rule based and therefore completely deterministic.", "prov": [ { @@ -98145,13 +98459,13 @@ } ], "sref": "#/texts/67", + "subj_hash": 12813875992986832439, "text": "in combination with their associated predicted (or human-annotated) layout semantic labels. It should be noted that no machine learning is used in this component. It is purely rule based and therefore completely deterministic.", "text-hash": 13337022012432085155, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/75", - "hash": 11030869010407626539, "orig": "The assembly phase is a two step process. First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order. Then, the text of all cells that have the same label is contracted into a temporary document objects. Third, we build the internal structure of the temporary document objects, based on the information provided by the models. The latter is only applicable for internally structured objects, such as tables. An example of the generated JSON output is shown in Listing 1.", "prov": [ { @@ -98159,13 +98473,13 @@ } ], "sref": "#/texts/68", + "subj_hash": 11030869010407626539, "text": "The assembly phase is a two step process. First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order. Then, the text of all cells that have the same label is contracted into a temporary document objects. Third, we build the internal structure of the temporary document objects, based on the information provided by the models. The latter is only applicable for internally structured objects, such as tables. An example of the generated JSON output is shown in Listing 1.", "text-hash": 10508897272021404039, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/76", - "hash": 2142320548375900929, "orig": "4 ARCHITECTURE AND ORCHESTRATION OF CLOUD BASED MICROSERVICES", "prov": [ { @@ -98173,13 +98487,13 @@ } ], "sref": "#/texts/69", + "subj_hash": 2142320548375900929, "text": "4 ARCHITECTURE AND ORCHESTRATION OF CLOUD BASED MICROSERVICES", "text-hash": 950718827856471405, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/77", - "hash": 12747011194397783283, "orig": "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated. Before discussing the technical details, we would like to point out our requirements for the architecture of the platform. These requirements are all related to scaling. Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources. In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation. It is clear that the architecture of such a service is heavily influenced by these requirements.", "prov": [ { @@ -98187,13 +98501,13 @@ } ], "sref": "#/texts/70", + "subj_hash": 12747011194397783283, "text": "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated. Before discussing the technical details, we would like to point out our requirements for the architecture of the platform. These requirements are all related to scaling. Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources. In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation. It is clear that the architecture of such a service is heavily influenced by these requirements.", "text-hash": 13395059553653450335, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/78", - "hash": 174789262945188010, "orig": "4.1 Platform layers", "prov": [ { @@ -98201,13 +98515,13 @@ } ], "sref": "#/texts/71", + "subj_hash": 174789262945188010, "text": "4.1 Platform layers", "text-hash": 3197077882590976520, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/79", - "hash": 7228893318503650455, "orig": "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents. In Figure 6, we show a sketch of its", "prov": [ { @@ -98215,13 +98529,13 @@ } ], "sref": "#/texts/72", + "subj_hash": 7228893318503650455, "text": "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents. In Figure 6, we show a sketch of its", "text-hash": 475277818666452483, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/81", - "hash": 9230667184712205690, "orig": "architecture. As one can observe, we have grouped the service into four layers. These layers are:", "prov": [ { @@ -98229,13 +98543,13 @@ } ], "sref": "#/texts/73", + "subj_hash": 9230667184712205690, "text": "architecture. As one can observe, we have grouped the service into four layers. These layers are:", "text-hash": 12309253064221915096, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/82", - "hash": 17419815751432442882, "orig": "(1) An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering. The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", "prov": [ { @@ -98243,13 +98557,13 @@ } ], "sref": "#/texts/74", + "subj_hash": 17419815751432442882, "text": "(1) An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering. The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", "text-hash": 8731693174932948592, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/83", - "hash": 11194226403360998426, "orig": "(2) An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result. The task scheduling is done with the Message Broker RabbitMQ$^{14}$. The results are stored in the in-memory data store Redis$^{15}$. In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully. This approach allows for a very robust, fault-tolerant service with very little downtime.", "prov": [ { @@ -98257,13 +98571,13 @@ } ], "sref": "#/texts/75", + "subj_hash": 11194226403360998426, "text": "(2) An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result. The task scheduling is done with the Message Broker RabbitMQ$^{14}$. The results are stored in the in-memory data store Redis$^{15}$. In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully. This approach allows for a very robust, fault-tolerant service with very little downtime.", "text-hash": 10633901501381588600, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/84", - "hash": 9005324696118733701, "orig": "(3) A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc). In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$. This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker. The workers are not only consumers of tasks, but may also produce new ones. This is the case for the requests", "prov": [ { @@ -98271,13 +98585,13 @@ } ], "sref": "#/texts/76", + "subj_hash": 9005324696118733701, "text": "(3) A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc). In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$. This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker. The workers are not only consumers of tasks, but may also produce new ones. This is the case for the requests", "text-hash": 17146307233289309425, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/86", - "hash": 8082547756621048511, "orig": "operating on the whole corpus. Whenever possible we parallelise the compute-heavy operations at the page (or document) level.", "prov": [ { @@ -98285,13 +98599,13 @@ } ], "sref": "#/texts/77", + "subj_hash": 8082547756621048511, "text": "operating on the whole corpus. Whenever possible we parallelise the compute-heavy operations at the page (or document) level.", "text-hash": 18059523399368641563, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/87", - "hash": 7791113385466815951, "orig": "(4) A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store. The object-store allows us to easily scale the storage with regard to the number of processed documents. However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", "prov": [ { @@ -98299,13 +98613,13 @@ } ], "sref": "#/texts/78", + "subj_hash": 7791113385466815951, "text": "(4) A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store. The object-store allows us to easily scale the storage with regard to the number of processed documents. However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", "text-hash": 18360382746077681451, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/88", - "hash": 2845012065511066307, "orig": "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it. This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ. Being a cloud-based platform, our solution allows for these software assets to be detached from the main deployment and to be served by specialised vendors services which are certified to the latest industry requirements such as data-at-rest encryption, high availability, etc.", "prov": [ { @@ -98313,13 +98627,13 @@ } ], "sref": "#/texts/79", + "subj_hash": 2845012065511066307, "text": "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it. This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ. Being a cloud-based platform, our solution allows for these software assets to be detached from the main deployment and to be served by specialised vendors services which are certified to the latest industry requirements such as data-at-rest encryption, high availability, etc.", "text-hash": 5147922161190726703, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/89", - "hash": 15072914837937068796, "orig": "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform. From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services. During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks. For example other result-backends didn't offer the auto-cleaning functionality offered by Redis and, before opting for a custom solution mixing MongoDB with an object storage, we evaluated other solutions as", "prov": [ { @@ -98327,13 +98641,13 @@ } ], "sref": "#/texts/80", + "subj_hash": 15072914837937068796, "text": "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform. From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services. During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks. For example other result-backends didn't offer the auto-cleaning functionality offered by Redis and, before opting for a custom solution mixing MongoDB with an object storage, we evaluated other solutions as", "text-hash": 6457975667604208730, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/91", - "hash": 15263283599394646155, "orig": "the GridFS storage, but it didn't fit to the constraints of typical cloud environments.", "prov": [ { @@ -98341,13 +98655,13 @@ } ], "sref": "#/texts/81", + "subj_hash": 15263283599394646155, "text": "the GridFS storage, but it didn't fit to the constraints of typical cloud environments.", "text-hash": 6564180200469858791, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/92", - "hash": 11417717357379295278, "orig": "4.2 Deployment", "prov": [ { @@ -98355,13 +98669,13 @@ } ], "sref": "#/texts/82", + "subj_hash": 11417717357379295278, "text": "4.2 Deployment", "text-hash": 10410411375713696396, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/93", - "hash": 9031137420247852045, "orig": "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution. Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", "prov": [ { @@ -98369,13 +98683,13 @@ } ], "sref": "#/texts/83", + "subj_hash": 9031137420247852045, "text": "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution. Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", "text-hash": 17120327512656828009, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/94", - "hash": 18436578077535696718, "orig": "The common parts of all deployments are the interface and the compute layer. The compute layer is designed for dynamically adapt the number of resources on the current load. For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents.", "prov": [ { @@ -98383,13 +98697,13 @@ } ], "sref": "#/texts/84", + "subj_hash": 18436578077535696718, "text": "The common parts of all deployments are the interface and the compute layer. The compute layer is designed for dynamically adapt the number of resources on the current load. For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents.", "text-hash": 8003240278028347820, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/95", - "hash": 11734907767490759865, "orig": "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements. The parse component is indeed more demanding than the simple annotation components.", "prov": [ { @@ -98397,13 +98711,13 @@ } ], "sref": "#/texts/85", + "subj_hash": 11734907767490759865, "text": "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements. The parse component is indeed more demanding than the simple annotation components.", "text-hash": 14704352826439757333, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/96", - "hash": 7845460979782401889, "orig": "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks. Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment. Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer.", "prov": [ { @@ -98411,13 +98725,13 @@ } ], "sref": "#/texts/86", + "subj_hash": 7845460979782401889, "text": "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks. Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment. Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer.", "text-hash": 18296438351865061837, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/97", - "hash": 17769988780693768120, "orig": "4.3 Scaling benchmarks", "prov": [ { @@ -98425,13 +98739,13 @@ } ], "sref": "#/texts/87", + "subj_hash": 17769988780693768120, "text": "4.3 Scaling benchmarks", "text-hash": 8669715371308316950, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/98", - "hash": 12387489643011067991, "orig": "Let us now discuss some scaling results on our platform. As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources. In Figure 7, we show the number of users and the number of processed PDF", "prov": [ { @@ -98439,13 +98753,13 @@ } ], "sref": "#/texts/88", + "subj_hash": 12387489643011067991, "text": "Let us now discuss some scaling results on our platform. As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources. In Figure 7, we show the number of users and the number of processed PDF", "text-hash": 14043220598855238339, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/99", - "hash": 10375772475809458895, "orig": "pages 20 as a function of time. As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017. It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time. Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", "prov": [ { @@ -98453,13 +98767,13 @@ } ], "sref": "#/texts/89", + "subj_hash": 10375772475809458895, "text": "pages 20 as a function of time. As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017. It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time. Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", "text-hash": 11451664978555915307, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/100", - "hash": 7054726458191881751, "orig": "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources. We show this scaling by displaying the speedup versus the number of worker nodes available. Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores. As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes. Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker. The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level. The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes. Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size.", "prov": [ { @@ -98467,13 +98781,13 @@ } ], "sref": "#/texts/90", + "subj_hash": 7054726458191881751, "text": "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources. We show this scaling by displaying the speedup versus the number of worker nodes available. Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores. As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes. Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker. The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level. The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes. Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size.", "text-hash": 641132783909312643, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/101", - "hash": 7794115281016062068, "orig": "5 CONCLUSION", "prov": [ { @@ -98481,13 +98795,13 @@ } ], "sref": "#/texts/91", + "subj_hash": 7794115281016062068, "text": "5 CONCLUSION", "text-hash": 18347902420476900066, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/102", - "hash": 7038163015905900647, "orig": "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation.", "prov": [ { @@ -98495,13 +98809,13 @@ } ], "sref": "#/texts/92", + "subj_hash": 7038163015905900647, "text": "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation.", "text-hash": 657005981473069779, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/103", - "hash": 1508626318915838319, "orig": "The fundamental design choices in our solution have proven to enable scaling in three elementary ways. First, it can service multiple users concurrently. Second, it can ingest, parse and apply machine learned models on many documents at the same time. Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources.", "prov": [ { @@ -98509,13 +98823,13 @@ } ], "sref": "#/texts/93", + "subj_hash": 1508626318915838319, "text": "The fundamental design choices in our solution have proven to enable scaling in three elementary ways. First, it can service multiple users concurrently. Second, it can ingest, parse and apply machine learned models on many documents at the same time. Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources.", "text-hash": 1575427749670982603, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/104", - "hash": 17247086344435786796, "orig": "In the future, we plan to extend the platform in two major areas. First, we would like to extend the number of microservices, especially with regard to image understanding. The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc). The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier. Second, we would like to improve the quality and performance of our default models. We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5). To leverage this growing use of deep learning models, we will additionally introduce", "prov": [ { @@ -98523,13 +98837,13 @@ } ], "sref": "#/texts/94", + "subj_hash": 17247086344435786796, "text": "In the future, we plan to extend the platform in two major areas. First, we would like to extend the number of microservices, especially with regard to image understanding. The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc). The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier. Second, we would like to improve the quality and performance of our default models. We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5). To leverage this growing use of deep learning models, we will additionally introduce", "text-hash": 9192771730962863754, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/105", - "hash": 10287541089279789496, "orig": "specialised data-parallelism in order to speed up the training and provide interactive user-customisation capabilities.", "prov": [ { @@ -98537,13 +98851,13 @@ } ], "sref": "#/texts/95", + "subj_hash": 10287541089279789496, "text": "specialised data-parallelism in order to speed up the training and provide interactive user-customisation capabilities.", "text-hash": 11530911151361059606, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/106", - "hash": 7819882792760965882, "orig": "ACKNOWLEDGMENTS", "prov": [ { @@ -98551,13 +98865,13 @@ } ], "sref": "#/texts/96", + "subj_hash": 7819882792760965882, "text": "ACKNOWLEDGMENTS", "text-hash": 18322720810464861272, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/107", - "hash": 15983582675278266440, "orig": "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system.", "prov": [ { @@ -98565,13 +98879,13 @@ } ], "sref": "#/texts/97", + "subj_hash": 15983582675278266440, "text": "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system.", "text-hash": 5556222901900980902, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/108", - "hash": 12711351442546714716, "orig": "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation. MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", "prov": [ { @@ -98579,13 +98893,13 @@ } ], "sref": "#/texts/98", + "subj_hash": 12711351442546714716, "text": "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation. MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", "text-hash": 13431247303555599034, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/109", - "hash": 1225384713519841338, "orig": "REFERENCES", "prov": [ { @@ -98593,13 +98907,13 @@ } ], "sref": "#/texts/99", + "subj_hash": 1225384713519841338, "text": "REFERENCES", "text-hash": 1858797456585454232, "type": "subtitle-level-1" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/110", - "hash": 1712774266196702392, "orig": "[1] A. Antonacopoulos, C. Clausner, C. Papadopoulos, and S. Pletschacher. 2015. ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015. In Proceedings of the 13th International Conference on Document Analysis and Recognition (ICDAR2015). Nancy, 1151-1155.", "prov": [ { @@ -98607,13 +98921,13 @@ } ], "sref": "#/texts/100", + "subj_hash": 1712774266196702392, "text": "[1] A. Antonacopoulos, C. Clausner, C. Papadopoulos, and S. Pletschacher. 2015. ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015. In Proceedings of the 13th International Conference on Document Analysis and Recognition (ICDAR2015). Nancy, 1151-1155.", "text-hash": 1659105420801451542, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/111", - "hash": 14718288547983000340, "orig": "[2] Leo Breiman. 2001. Random Forests. Machine Learning 45, 1 (01 Oct 2001), 5-32. https://doi.org/10.1023/A:1010933404324", "prov": [ { @@ -98621,13 +98935,13 @@ } ], "sref": "#/texts/101", + "subj_hash": 14718288547983000340, "text": "[2] Leo Breiman. 2001. Random Forests. Machine Learning 45, 1 (01 Oct 2001), 5-32. https://doi.org/10.1023/A:1010933404324", "text-hash": 6812664208788567426, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/112", - "hash": 16943780574244090186, "orig": "[3] R. Cattoni, T. Coianiz, S. Messelodi, and C. M. Modena. 1998. Geometric layout analysis techniques for document image understanding: a review. Technical Report.", "prov": [ { @@ -98635,13 +98949,13 @@ } ], "sref": "#/texts/102", + "subj_hash": 16943780574244090186, "text": "[3] R. Cattoni, T. Coianiz, S. Messelodi, and C. M. Modena. 1998. Geometric layout analysis techniques for document image understanding: a review. Technical Report.", "text-hash": 9486476535199015848, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/113", - "hash": 8004985786049140169, "orig": "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier. 2005. From Legacy Documents to XML: A Conversion Framework. Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103. https://doi.org/10.1007/11551362_9", "prov": [ { @@ -98649,13 +98963,13 @@ } ], "sref": "#/texts/103", + "subj_hash": 8004985786049140169, "text": "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier. 2005. From Legacy Documents to XML: A Conversion Framework. Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103. https://doi.org/10.1007/11551362_9", "text-hash": 18434854666592634661, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/114", - "hash": 12744546813104546377, "orig": "[5] Ross Girshick. 2015. Fast R-CNN. In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15). IEEE Computer Society, Washington, DC, USA, 1440-1448. https://doi.org/10.1109/ICCV.2015.169", "prov": [ { @@ -98663,13 +98977,13 @@ } ], "sref": "#/texts/104", + "subj_hash": 12744546813104546377, "text": "[5] Ross Girshick. 2015. Fast R-CNN. In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15). IEEE Computer Society, Washington, DC, USA, 1440-1448. https://doi.org/10.1109/ICCV.2015.169", "text-hash": 13406949228208477349, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/115", - "hash": 16061746189176848219, "orig": "[6] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. 2013. Rich feature hierarchies for accurate object detection and semantic segmentation. CoRR abs/1311.2524 (2013). arXiv:1311.2524 http://arxiv.org/abs/1311.2524", "prov": [ { @@ -98677,13 +98991,13 @@ } ], "sref": "#/texts/105", + "subj_hash": 16061746189176848219, "text": "[6] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. 2013. Rich feature hierarchies for accurate object detection and semantic segmentation. CoRR abs/1311.2524 (2013). arXiv:1311.2524 http://arxiv.org/abs/1311.2524", "text-hash": 5756829059313082807, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/116", - "hash": 11872392946390819176, "orig": "[7] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C. Berg. 2016. SSD: Single Shot MultiBox Detector. Springer International Publishing, Cham, 21-37. https://doi.org/10. 1007/978-3-319-46448-0_2", "prov": [ { @@ -98691,13 +99005,13 @@ } ], "sref": "#/texts/106", + "subj_hash": 11872392946390819176, "text": "[7] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C. Berg. 2016. SSD: Single Shot MultiBox Detector. Springer International Publishing, Cham, 21-37. https://doi.org/10. 1007/978-3-319-46448-0_2", "text-hash": 14270091870781297606, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/117", - "hash": 2956849475535726296, "orig": "[8] Joseph Redmon, Santosh Kumar Divvala, Ross B. Girshick, and Ali Farhadi. 2016. You Only Look Once: Unified, Real-Time Object Detection. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016), 779-788.", "prov": [ { @@ -98705,13 +99019,13 @@ } ], "sref": "#/texts/107", + "subj_hash": 2956849475535726296, "text": "[8] Joseph Redmon, Santosh Kumar Divvala, Ross B. Girshick, and Ali Farhadi. 2016. You Only Look Once: Unified, Real-Time Object Detection. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016), 779-788.", "text-hash": 4738468948628789302, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/118", - "hash": 6623297047995432604, "orig": "[9] Joseph Redmon and Ali Farhadi. 2016. YOLO9000: Better, Faster, Stronger. arXiv preprint arXiv:1612.08242 (2016).", "prov": [ { @@ -98719,13 +99033,13 @@ } ], "sref": "#/texts/108", + "subj_hash": 6623297047995432604, "text": "[9] Joseph Redmon and Ali Farhadi. 2016. YOLO9000: Better, Faster, Stronger. arXiv preprint arXiv:1612.08242 (2016).", "text-hash": 15195146357792776186, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/119", - "hash": 2507285765516108280, "orig": "[10] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In Advances in Neural Information Processing Systems 28, C. Cortes, N. D. Lawrence, D. D. Lee, M. Sugiyama, and R. Garnett (Eds.). Curran Associates, Inc., 91-99. http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks. pdf", "prov": [ { @@ -98733,13 +99047,13 @@ } ], "sref": "#/texts/109", + "subj_hash": 2507285765516108280, "text": "[10] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In Advances in Neural Information Processing Systems 28, C. Cortes, N. D. Lawrence, D. D. Lee, M. Sugiyama, and R. Garnett (Eds.). Curran Associates, Inc., 91-99. http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks. pdf", "text-hash": 5476658171803931478, "type": "paragraph" }, { "dloc": "9fd2a4018bf2111bf11dd2cad4d2767dff877692694aa68126a00543f810a5be#/texts/120", - "hash": 14905276480471286920, "orig": "[11] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. 2018. Corpus Conversion Service poster at the SysML conference. http://www.sysml.cc/doc/ 76.pdf", "prov": [ { @@ -98747,6 +99061,7 @@ } ], "sref": "#/texts/110", + "subj_hash": 14905276480471286920, "text": "[11] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. 2018. Corpus Conversion Service poster at the SysML conference. http://www.sysml.cc/doc/ 76.pdf", "text-hash": 6922174983558886886, "type": "paragraph" diff --git a/tests/data/docs/doc_01.nlp.json b/tests/data/docs/doc_01.nlp.json index 64f63d81..3756a87a 100644 --- a/tests/data/docs/doc_01.nlp.json +++ b/tests/data/docs/doc_01.nlp.json @@ -1,6 +1,6 @@ { "_s3_data": {}, - "applied-models": [ + "applied_models": [ "cite", "expression", "language", @@ -660,7 +660,6 @@ "captions": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/41", - "hash": 5929648907277899214, "orig": "FIGURE1 Schematic of a data flow for the creation of a Knowledge Graph. The data flow consists of three main task types: extraction of document elements (abstracts, paragraphs, tables, figures, etc.), annotation of these elements to detect entities and their relationships and finally aggregation of these entities and their relationships. For every task, we keep complete provenance, such that we can always trace back to a specific document or element that embeds a certain entity or relationship", "prov": [ { @@ -668,6 +667,7 @@ } ], "sref": "#/figures/0/captions/0", + "subj_hash": 5929648907277899214, "text": "FIGURE1 Schematic of a data flow for the creation of a Knowledge Graph. The data flow consists of three main task types: extraction of document elements (abstracts, paragraphs, tables, figures, etc.), annotation of these elements to detect entities and their relationships and finally aggregation of these entities and their relationships. For every task, we keep complete provenance, such that we can always trace back to a specific document or element that embeds a certain entity or relationship", "text-hash": 12816755167354360565, "type": "caption" @@ -677,7 +677,6 @@ "created_by": "unknown", "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/figures/0", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -685,6 +684,7 @@ } ], "sref": "#/figures/0", + "subj_hash": 18446744073709551615, "type": "figure" }, { @@ -693,7 +693,6 @@ "created_by": "unknown", "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/figures/1", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -701,13 +700,13 @@ } ], "sref": "#/figures/1", + "subj_hash": 18446744073709551615, "type": "figure" }, { "captions": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/90", - "hash": 13588295264109661534, "orig": "FIGURE 3 The time-to-solution for k-hop graph traversal for Neo4J and our new graph engine. The results were obtained for the graph500 and twitter benchmark graphs. The 10th and 90th percentiles are represented by the shaded regions; the median is shown by the markers", "prov": [ { @@ -715,6 +714,7 @@ } ], "sref": "#/figures/2/captions/0", + "subj_hash": 13588295264109661534, "text": "FIGURE 3 The time-to-solution for k-hop graph traversal for Neo4J and our new graph engine. The results were obtained for the graph500 and twitter benchmark graphs. The 10th and 90th percentiles are represented by the shaded regions; the median is shown by the markers", "text-hash": 9558113653035301733, "type": "caption" @@ -724,7 +724,6 @@ "created_by": "unknown", "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/figures/2", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -732,13 +731,13 @@ } ], "sref": "#/figures/2", + "subj_hash": 18446744073709551615, "type": "figure" }, { "captions": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/99", - "hash": 5867845979623066511, "orig": "FIGURE 4 Visual workflow editor for deep queries in the CPS platform. The interface exhibits a left toolbar to pick specific graph operations, a main drawing area for the workflow DAG and a right panel to inspect and define parameters of each graph operation. Colors indicate different operation types such as input node-retrieval (blue), traversal (red), logical operators (green) and transform functions (yellow). Valid workflows can be executed using the ' play ' button", "prov": [ { @@ -746,6 +745,7 @@ } ], "sref": "#/figures/3/captions/0", + "subj_hash": 5867845979623066511, "text": "FIGURE 4 Visual workflow editor for deep queries in the CPS platform. The interface exhibits a left toolbar to pick specific graph operations, a main drawing area for the workflow DAG and a right panel to inspect and define parameters of each graph operation. Colors indicate different operation types such as input node-retrieval (blue), traversal (red), logical operators (green) and transform functions (yellow). Valid workflows can be executed using the ' play ' button", "text-hash": 12590315652817418422, "type": "caption" @@ -755,7 +755,6 @@ "created_by": "unknown", "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/figures/3", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -763,6 +762,7 @@ } ], "sref": "#/figures/3", + "subj_hash": 18446744073709551615, "type": "figure" }, { @@ -771,7 +771,6 @@ "created_by": "unknown", "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/figures/4", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -779,13 +778,13 @@ } ], "sref": "#/figures/4", + "subj_hash": 18446744073709551615, "type": "figure" }, { "captions": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/123", - "hash": 3722064109667835816, "orig": "FIGURE5 The architectural design of the CPS platform. On the left, we show the data flow processing architecture orchestrated through an asynchronous REST API. On the right, we sketch the multitenant KG serving facility which provides a dedicated environment for each project", "prov": [ { @@ -793,6 +792,7 @@ } ], "sref": "#/figures/5/captions/0", + "subj_hash": 3722064109667835816, "text": "FIGURE5 The architectural design of the CPS platform. On the left, we show the data flow processing architecture orchestrated through an asynchronous REST API. On the right, we sketch the multitenant KG serving facility which provides a dedicated environment for each project", "text-hash": 1256907401557265619, "type": "caption" @@ -802,7 +802,6 @@ "created_by": "unknown", "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/figures/5", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -810,13 +809,13 @@ } ], "sref": "#/figures/5", + "subj_hash": 18446744073709551615, "type": "figure" }, { "captions": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/131", - "hash": 5492278710328857395, "orig": "FIGURE 6 Sketch of the entire pipeline to perform deep data exploration on large corpora", "prov": [ { @@ -824,6 +823,7 @@ } ], "sref": "#/figures/6/captions/0", + "subj_hash": 5492278710328857395, "text": "FIGURE 6 Sketch of the entire pipeline to perform deep data exploration on large corpora", "text-hash": 10669134213704159562, "type": "caption" @@ -833,7 +833,6 @@ "created_by": "unknown", "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/figures/6", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -841,13 +840,13 @@ } ], "sref": "#/figures/6", + "subj_hash": 18446744073709551615, "type": "figure" }, { "captions": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/145", - "hash": 14119822239274862236, "orig": "FIGURE 7 The evaluation workflow to identify the petroleum system elements (PSE) in an article and infer its properties. It starts by searching for all petroleum system elements of a certain type (eg, source, reservoir or seal) and a particular report (worktasks 1 and 2). By successive graph traversals (worktasks 3-5, 7-9, 11, 12) along specific edges and logical operations (worktasks 6, 10, 13, 14), we are able to obtain a list of candidate formations (worktask 15), ages (worktask 16) and rocks (worktask 17), ranked by their accumulated weight. Execution of this query takes less than 18 ms on average", "prov": [ { @@ -855,6 +854,7 @@ } ], "sref": "#/figures/7/captions/0", + "subj_hash": 14119822239274862236, "text": "FIGURE 7 The evaluation workflow to identify the petroleum system elements (PSE) in an article and infer its properties. It starts by searching for all petroleum system elements of a certain type (eg, source, reservoir or seal) and a particular report (worktasks 1 and 2). By successive graph traversals (worktasks 3-5, 7-9, 11, 12) along specific edges and logical operations (worktasks 6, 10, 13, 14), we are able to obtain a list of candidate formations (worktask 15), ages (worktask 16) and rocks (worktask 17), ranked by their accumulated weight. Execution of this query takes less than 18 ms on average", "text-hash": 2397375916393726887, "type": "paragraph" @@ -864,7 +864,6 @@ "created_by": "unknown", "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/figures/7", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -872,6 +871,7 @@ } ], "sref": "#/figures/7", + "subj_hash": 18446744073709551615, "type": "figure" }, { @@ -880,7 +880,6 @@ "created_by": "unknown", "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/figures/8", "footnotes": [], - "hash": 18446744073709551615, "mentions": [], "prov": [ { @@ -888,6 +887,7 @@ } ], "sref": "#/figures/8", + "subj_hash": 18446744073709551615, "type": "figure" } ], @@ -976,7 +976,6 @@ "footnotes": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/footnotes/0", - "hash": 4934591159529761265, "orig": "This is an open access article under the terms of the Creative Commons Attribution License, which permits use, distribution and reproduction in any medium, provided the original work is properly cited.", "prov": [ { @@ -984,13 +983,13 @@ } ], "sref": "#/footnotes/0", + "subj_hash": 4934591159529761265, "text": "This is an open access article under the terms of the Creative Commons Attribution License, which permits use, distribution and reproduction in any medium, provided the original work is properly cited.", "text-hash": 11226800603937609484, "type": "footnote" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/footnotes/1", - "hash": 16070682594069297502, "orig": "\u00a9 2020 The Authors. Applied AI Letters published by John Wiley & Sons Ltd.", "prov": [ { @@ -998,12 +997,12 @@ } ], "sref": "#/footnotes/1", + "subj_hash": 16070682594069297502, "text": "\u00a9 2020 The Authors. Applied AI Letters published by John Wiley & Sons Ltd.", "text-hash": 2671219352918255461, "type": "footnote" } ], - "hash": 18446744073709551615, "instances": { "data": [ [ @@ -1106,7 +1105,7 @@ 131, 134, 31, - 34, + 32, true, "500", "500" @@ -1126,8 +1125,8 @@ 171, 169, 171, - 40, - 42, + 38, + 39, true, "10", "10" @@ -1147,8 +1146,8 @@ 180, 178, 180, - 44, - 46, + 41, + 42, true, "90", "90" @@ -1442,7 +1441,7 @@ 304, 332, 59, - 75, + 73, true, "(worktasks 3-5, 7-9, 11, 12)", "(worktasks 3-5, 7-9, 11, 12)" @@ -1505,7 +1504,7 @@ 325, 327, 69, - 71, + 70, true, "11", "11" @@ -1525,8 +1524,8 @@ 331, 329, 331, + 71, 72, - 74, true, "12", "12" @@ -1546,8 +1545,8 @@ 402, 377, 402, - 81, - 94, + 79, + 89, true, "(worktasks 6, 10, 13, 14)", "(worktasks 6, 10, 13, 14)" @@ -1567,8 +1566,8 @@ 389, 388, 389, - 83, - 84, + 81, + 82, true, "6", "6" @@ -1588,8 +1587,8 @@ 393, 391, 393, - 85, - 87, + 83, + 84, true, "10", "10" @@ -1609,8 +1608,8 @@ 397, 395, 397, - 88, - 90, + 85, + 86, true, "13", "13" @@ -1630,8 +1629,8 @@ 401, 399, 401, - 91, - 93, + 87, + 88, true, "14", "14" @@ -1651,8 +1650,8 @@ 470, 457, 470, - 105, - 110, + 100, + 104, true, "(worktask 15)", "(worktask 15)" @@ -1672,8 +1671,8 @@ 469, 467, 469, - 107, - 109, + 102, + 103, true, "15", "15" @@ -1693,8 +1692,8 @@ 490, 477, 490, - 112, - 117, + 106, + 110, true, "(worktask 16)", "(worktask 16)" @@ -1714,8 +1713,8 @@ 489, 487, 489, - 114, - 116, + 108, + 109, true, "16", "16" @@ -1735,8 +1734,8 @@ 514, 501, 514, - 119, - 124, + 112, + 116, true, "(worktask 17)", "(worktask 17)" @@ -1756,8 +1755,8 @@ 513, 511, 513, - 121, - 123, + 114, + 115, true, "17", "17" @@ -1777,8 +1776,8 @@ 594, 592, 594, - 138, - 140, + 130, + 131, true, "18", "18" @@ -1841,7 +1840,7 @@ 0, 95, 0, - 22, + 19, true, "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland.", "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland." @@ -1946,7 +1945,7 @@ 65, 69, 14, - 18, + 15, true, "8820", "8820" @@ -1966,8 +1965,8 @@ 81, 70, 81, - 18, - 19, + 15, + 16, true, "Rueschlikon", "Rueschlikon" @@ -1987,8 +1986,8 @@ 94, 83, 94, - 20, - 21, + 17, + 18, true, "Switzerland", "Switzerland" @@ -2008,8 +2007,8 @@ 101, 96, 101, - 22, - 23, + 19, + 20, true, "Email", "Email" @@ -2029,8 +2028,8 @@ 121, 103, 121, - 24, - 29, + 21, + 26, true, "taa@zurich.ibm.com", "taa@zurich.ibm.com" @@ -3668,7 +3667,7 @@ 0, 95, 0, - 22, + 19, true, "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally.", "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally." @@ -3689,7 +3688,7 @@ 6, 10, 2, - 6, + 3, true, "2015", "2015" @@ -3709,8 +3708,8 @@ 17, 12, 17, - 7, - 8, + 4, + 5, true, "Adobe", "Adobe" @@ -3730,8 +3729,8 @@ 47, 44, 47, + 9, 12, - 15, true, "2.7", "2.7" @@ -3751,8 +3750,8 @@ 70, 48, 70, + 12, 15, - 18, true, "trillion PDF documents", "trillion PDF documents" @@ -3772,8 +3771,8 @@ 85, 74, 85, - 19, - 20, + 16, + 17, true, "circulation", "circulation" @@ -3793,8 +3792,8 @@ 157, 96, 157, - 22, - 35, + 19, + 32, true, "It is self-evident that this number has increased ever since.", "It is self-evident that this number has increased ever since." @@ -3814,8 +3813,8 @@ 114, 102, 114, + 21, 24, - 27, true, "self-evident", "self-evident" @@ -3835,8 +3834,8 @@ 131, 125, 131, - 29, - 30, + 26, + 27, true, "number", "number" @@ -3856,8 +3855,8 @@ 322, 158, 322, - 35, - 61, + 32, + 58, true, "The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world.", "The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world." @@ -3877,8 +3876,8 @@ 178, 162, 178, - 36, - 38, + 33, + 35, true, "explosive growth", "explosive growth" @@ -3898,8 +3897,8 @@ 191, 182, 191, - 39, - 40, + 36, + 37, true, "documents", "documents" @@ -3919,8 +3918,8 @@ 232, 214, 232, - 44, - 46, + 41, + 43, true, "digital publishing", "digital publishing" @@ -3940,8 +3939,8 @@ 250, 240, 250, - 47, - 48, + 44, + 45, true, "mainstream", "mainstream" @@ -3961,8 +3960,8 @@ 280, 263, 280, - 51, - 53, + 48, + 50, true, "serious challenge", "serious challenge" @@ -3982,8 +3981,8 @@ 321, 293, 321, - 56, - 60, + 53, + 57, true, "academic and corporate world", "academic and corporate world" @@ -4003,8 +4002,8 @@ 321, 306, 321, - 58, - 60, + 55, + 57, true, "corporate world", "corporate world" @@ -4024,8 +4023,8 @@ 459, 323, 459, - 61, - 84, + 58, + 81, true, "The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings.", "The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings." @@ -4045,8 +4044,8 @@ 353, 337, 353, - 63, - 65, + 60, + 62, true, "publication rate", "publication rate" @@ -4066,8 +4065,8 @@ 376, 357, 376, - 66, - 68, + 63, + 65, true, "scientific articles", "scientific articles" @@ -4087,8 +4086,8 @@ 417, 408, 417, - 74, - 75, + 71, + 72, true, "academics", "academics" @@ -4108,8 +4107,8 @@ 458, 443, 458, - 81, - 83, + 78, + 80, true, "latest findings", "latest findings" @@ -4129,8 +4128,8 @@ 639, 460, 639, - 84, - 117, + 81, + 114, true, "Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable.", "Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable." @@ -4150,8 +4149,8 @@ 487, 475, 487, + 84, 87, - 90, true, "ever-growing", "ever-growing" @@ -4171,8 +4170,8 @@ 494, 488, 494, - 90, - 91, + 87, + 88, true, "number", "number" @@ -4192,8 +4191,8 @@ 514, 498, 514, - 92, - 94, + 89, + 91, true, "internal reports", "internal reports" @@ -4213,8 +4212,8 @@ 529, 516, 529, - 95, - 96, + 92, + 93, true, "documentation", "documentation" @@ -4234,8 +4233,8 @@ 538, 531, 538, - 97, - 98, + 94, + 95, true, "patents", "patents" @@ -4255,8 +4254,8 @@ 549, 540, 549, - 99, - 100, + 96, + 97, true, "contracts", "contracts" @@ -4276,8 +4275,8 @@ 562, 551, 562, - 101, - 102, + 98, + 99, true, "regulations", "regulations" @@ -4297,8 +4296,8 @@ 577, 564, 577, - 103, - 105, + 100, + 102, true, "court filings", "court filings" @@ -4318,8 +4317,8 @@ 583, 579, 583, - 106, - 108, + 103, + 105, true, "etc", "etc." @@ -4339,8 +4338,8 @@ 609, 592, 609, - 111, - 113, + 108, + 110, true, "most corporations", "most corporations" @@ -4382,7 +4381,7 @@ 3, 5, 2, - 4, + 3, true, "15", "15" @@ -6797,7 +6796,7 @@ 3, 45, 2, - 14, + 11, true, "Publications of before year 2010.", "Publications of before year 2010." @@ -6818,7 +6817,7 @@ 3, 44, 2, - 13, + 10, true, "Publications of before year 2010", "Publications of before year 2010" @@ -10073,7 +10072,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -10093,8 +10092,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -10114,8 +10113,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -10135,8 +10134,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -10156,8 +10155,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -10177,8 +10176,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -10198,8 +10197,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -10219,8 +10218,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -10240,8 +10239,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -10261,8 +10260,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -10282,8 +10281,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -10303,8 +10302,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -10324,8 +10323,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -10345,8 +10344,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -10366,8 +10365,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -10387,8 +10386,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -10408,8 +10407,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -10429,8 +10428,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -10450,8 +10449,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -10471,8 +10470,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -10492,8 +10491,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -10513,8 +10512,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -10534,8 +10533,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -10555,8 +10554,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -10598,7 +10597,7 @@ 3, 5, 2, - 4, + 3, true, "15", "15" @@ -14294,7 +14293,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -14314,8 +14313,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -14335,8 +14334,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -14356,8 +14355,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -14377,8 +14376,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -14398,8 +14397,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -14419,8 +14418,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -14440,8 +14439,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -14461,8 +14460,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -14482,8 +14481,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -14503,8 +14502,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -14524,8 +14523,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -14545,8 +14544,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -14566,8 +14565,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -14587,8 +14586,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -14608,8 +14607,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -14629,8 +14628,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -14650,8 +14649,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -14671,8 +14670,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -14692,8 +14691,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -14713,8 +14712,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -14734,8 +14733,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -14755,8 +14754,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -14776,8 +14775,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -14819,7 +14818,7 @@ 3, 5, 2, - 4, + 3, true, "15", "15" @@ -18683,7 +18682,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -18703,8 +18702,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -18724,8 +18723,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -18745,8 +18744,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -18766,8 +18765,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -18787,8 +18786,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -18808,8 +18807,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -18829,8 +18828,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -18850,8 +18849,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -18871,8 +18870,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -18892,8 +18891,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -18913,8 +18912,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -18934,8 +18933,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -18955,8 +18954,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -18976,8 +18975,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -18997,8 +18996,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -19018,8 +19017,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -19039,8 +19038,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -19060,8 +19059,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -19081,8 +19080,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -19102,8 +19101,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -19123,8 +19122,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -19144,8 +19143,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -19165,8 +19164,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -20195,7 +20194,7 @@ 480, 570, 80, - 101, + 97, true, "From a corpus of about 100 000 documents, we typically extract about 3 million paragraphs.", "From a corpus of about 100 000 documents, we typically extract about 3 million paragraphs." @@ -20237,7 +20236,7 @@ 503, 506, 85, - 88, + 86, true, "100", "100" @@ -20257,8 +20256,8 @@ 510, 507, 510, - 88, - 91, + 86, + 87, true, "000", "000" @@ -20278,8 +20277,8 @@ 520, 511, 520, - 91, - 92, + 87, + 88, true, "documents", "documents" @@ -20299,8 +20298,8 @@ 550, 549, 550, - 97, - 98, + 93, + 94, true, "3", "3" @@ -20320,8 +20319,8 @@ 569, 551, 569, - 98, - 100, + 94, + 96, true, "million paragraphs", "million paragraphs" @@ -20341,8 +20340,8 @@ 687, 571, 687, - 101, - 118, + 97, + 114, true, "Assuming unlimited resources, the annotation task could be distributed to potentially 3 million independent workers.", "Assuming unlimited resources, the annotation task could be distributed to potentially 3 million independent workers." @@ -20362,8 +20361,8 @@ 599, 580, 599, - 102, - 104, + 98, + 100, true, "unlimited resources", "unlimited resources" @@ -20383,8 +20382,8 @@ 620, 605, 620, - 106, - 108, + 102, + 104, true, "annotation task", "annotation task" @@ -20404,8 +20403,8 @@ 658, 657, 658, - 113, - 114, + 109, + 110, true, "3", "3" @@ -20425,8 +20424,8 @@ 686, 659, 686, - 114, - 117, + 110, + 113, true, "million independent workers", "million independent workers" @@ -22547,7 +22546,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -22567,8 +22566,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -22588,8 +22587,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -22609,8 +22608,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -22630,8 +22629,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -22651,8 +22650,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -22672,8 +22671,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -22693,8 +22692,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -22714,8 +22713,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -22735,8 +22734,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -22756,8 +22755,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -22777,8 +22776,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -22798,8 +22797,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -22819,8 +22818,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -22840,8 +22839,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -22861,8 +22860,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -22882,8 +22881,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -22903,8 +22902,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -22924,8 +22923,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -22945,8 +22944,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -22966,8 +22965,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -22987,8 +22986,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -23008,8 +23007,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -23029,8 +23028,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -23072,7 +23071,7 @@ 3, 5, 2, - 4, + 3, true, "15", "15" @@ -26495,7 +26494,7 @@ 327, 472, 55, - 81, + 80, true, "Examples of advanced graphanalytical operations are page rank, node centralities, 9,10 node clustering, spectral analysis, and label propagation.", "Examples of advanced graphanalytical operations are page rank, node centralities, 9,10 node clustering, spectral analysis, and label propagation." @@ -26600,7 +26599,7 @@ 409, 413, 67, - 71, + 70, true, "9,10", "9,10" @@ -26620,8 +26619,8 @@ 429, 414, 429, - 71, - 73, + 70, + 72, true, "node clustering", "node clustering" @@ -26641,8 +26640,8 @@ 448, 431, 448, - 74, - 76, + 73, + 75, true, "spectral analysis", "spectral analysis" @@ -26662,8 +26661,8 @@ 471, 440, 471, - 75, - 80, + 74, + 79, true, "analysis, and label propagation", "analysis, and label propagation" @@ -26683,8 +26682,8 @@ 471, 454, 471, - 78, - 80, + 77, + 79, true, "label propagation", "label propagation" @@ -27167,7 +27166,7 @@ 463, 468, 84, - 89, + 87, true, "11,12", "11,12" @@ -27187,8 +27186,8 @@ 594, 469, 594, - 89, - 112, + 87, + 110, true, "Due to the poor performance we observed with available graph databases, we developed a new graph engine for the CPS platform.", "Due to the poor performance we observed with available graph databases, we developed a new graph engine for the CPS platform." @@ -27208,8 +27207,8 @@ 496, 480, 496, + 90, 92, - 94, true, "poor performance", "poor performance" @@ -27229,8 +27228,8 @@ 539, 514, 539, - 97, - 100, + 95, + 98, true, "available graph databases", "available graph databases" @@ -27250,8 +27249,8 @@ 572, 556, 572, - 104, - 107, + 102, + 105, true, "new graph engine", "new graph engine" @@ -27271,8 +27270,8 @@ 593, 581, 593, + 107, 109, - 111, true, "CPS platform", "CPS platform" @@ -27292,8 +27291,8 @@ 761, 595, 761, - 112, - 147, + 110, + 145, true, "This graph engine is able to execute advanced graph-analytics 2 as well as evaluate deep queries with multi-hop traversals on large graphs (>1B edges) extremely fast.", "This graph engine is able to execute advanced graph-analytics 2 as well as evaluate deep queries with multi-hop traversals on large graphs (>1B edges) extremely fast." @@ -27313,8 +27312,8 @@ 612, 600, 612, + 111, 113, - 115, true, "graph engine", "graph engine" @@ -27334,8 +27333,8 @@ 646, 632, 646, + 117, 119, - 121, true, "advanced graph", "advanced graph" @@ -27355,8 +27354,8 @@ 656, 641, 656, - 120, - 123, + 118, + 121, true, "graph-analytics", "graph-analytics" @@ -27376,8 +27375,8 @@ 656, 647, 656, - 122, - 123, + 120, + 121, true, "analytics", "analytics" @@ -27397,8 +27396,8 @@ 658, 657, 658, - 123, - 124, + 121, + 122, true, "2", "2" @@ -27418,8 +27417,8 @@ 691, 670, 691, - 127, - 130, + 125, + 128, true, "evaluate deep queries", "evaluate deep queries" @@ -27439,8 +27438,8 @@ 706, 697, 706, - 131, - 134, + 129, + 132, true, "multi-hop", "multi-hop" @@ -27460,8 +27459,8 @@ 702, 697, 702, - 131, - 132, + 129, + 130, true, "multi", "multi" @@ -27481,8 +27480,8 @@ 717, 703, 717, + 131, 133, - 135, true, "hop traversals", "hop traversals" @@ -27502,8 +27501,8 @@ 733, 721, 733, + 134, 136, - 138, true, "large graphs", "large graphs" @@ -27523,8 +27522,8 @@ 745, 734, 745, - 138, - 144, + 136, + 142, true, "(>1B edges)", "(>1B edges)" @@ -27544,8 +27543,8 @@ 737, 736, 737, - 140, - 141, + 138, + 139, true, "1", "1" @@ -27565,8 +27564,8 @@ 744, 737, 744, + 139, 141, - 143, true, "B edges", "B edges" @@ -27587,7 +27586,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -27607,8 +27606,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -27628,8 +27627,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -27649,8 +27648,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -27670,8 +27669,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -27691,8 +27690,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -27712,8 +27711,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -27733,8 +27732,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -27754,8 +27753,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -27775,8 +27774,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -27796,8 +27795,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -27817,8 +27816,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -27838,8 +27837,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -27859,8 +27858,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -27880,8 +27879,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -27901,8 +27900,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -27922,8 +27921,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -27943,8 +27942,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -27964,8 +27963,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -27985,8 +27984,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -28006,8 +28005,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -28027,8 +28026,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -28048,8 +28047,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -28069,8 +28068,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -28112,7 +28111,7 @@ 3, 5, 2, - 4, + 3, true, "15", "15" @@ -28700,7 +28699,7 @@ 148, 153, 24, - 29, + 27, true, "13,14", "13,14" @@ -28720,8 +28719,8 @@ 279, 154, 279, - 29, - 51, + 27, + 49, true, "In the adjacency list format, every node is essentially an object which contains a set of indices representing its neighbors.", "In the adjacency list format, every node is essentially an object which contains a set of indices representing its neighbors." @@ -28741,8 +28740,8 @@ 182, 161, 182, - 31, - 34, + 29, + 32, true, "adjacency list format", "adjacency list format" @@ -28762,8 +28761,8 @@ 194, 190, 194, - 36, - 37, + 34, + 35, true, "node", "node" @@ -28783,8 +28782,8 @@ 219, 213, 219, - 40, - 41, + 38, + 39, true, "object", "object" @@ -28804,8 +28803,8 @@ 240, 237, 240, - 44, - 45, + 42, + 43, true, "set", "set" @@ -28825,8 +28824,8 @@ 251, 244, 251, - 46, - 47, + 44, + 45, true, "indices", "indices" @@ -28846,8 +28845,8 @@ 278, 269, 278, - 49, - 50, + 47, + 48, true, "neighbors", "neighbors" @@ -28867,8 +28866,8 @@ 344, 283, 340, - 52, - 64, + 50, + 62, true, "The edges are therefore stored as a property of the node.", "The edges are therefore stored as a property of the node." @@ -28888,8 +28887,8 @@ 296, 287, 292, - 53, - 54, + 51, + 52, true, "edges", "edges" @@ -28909,8 +28908,8 @@ 331, 319, 327, - 59, - 60, + 57, + 58, true, "property", "property" @@ -28930,8 +28929,8 @@ 343, 335, 339, - 62, - 63, + 60, + 61, true, "node", "node" @@ -28951,8 +28950,8 @@ 502, 341, 498, - 64, - 93, + 62, + 91, true, "In the adjacency matrix approach, all nodes obtain an identifier (typically an unsigned integer) and the edges are stored as a list of nodeidentifier tuples.", "In the adjacency matrix approach, all nodes obtain an identifier (typically an unsigned integer) and the edges are stored as a list of nodeidentifier tuples." @@ -28972,8 +28971,8 @@ 377, 348, 373, - 66, - 69, + 64, + 67, true, "adjacency matrix approach", "adjacency matrix approach" @@ -28993,8 +28992,8 @@ 388, 379, 384, - 71, - 72, + 69, + 70, true, "nodes", "nodes" @@ -29014,8 +29013,8 @@ 409, 395, 405, - 74, - 75, + 72, + 73, true, "identifier", "identifier" @@ -29035,8 +29034,8 @@ 441, 406, 437, - 75, - 81, + 73, + 79, true, "(typically an unsigned integer)", "(typically an unsigned integer)" @@ -29056,8 +29055,8 @@ 440, 420, 436, + 76, 78, - 80, true, "unsigned integer", "unsigned integer" @@ -29077,8 +29076,8 @@ 455, 446, 451, - 83, - 84, + 81, + 82, true, "edges", "edges" @@ -29098,8 +29097,8 @@ 476, 468, 472, - 88, - 89, + 86, + 87, true, "list", "list" @@ -29119,8 +29118,8 @@ 501, 491, 497, - 91, - 92, + 89, + 90, true, "tuples", "tuples" @@ -29267,7 +29266,7 @@ 111, 113, 19, - 21, + 20, true, "13", "13" @@ -29287,8 +29286,8 @@ 159, 114, 159, - 21, - 31, + 20, + 30, true, "For example, consider the graph-traversal V !", "For example, consider the graph-traversal V !" @@ -29308,8 +29307,8 @@ 125, 118, 125, + 21, 22, - 23, true, "example", "example" @@ -29329,8 +29328,8 @@ 155, 140, 155, - 26, - 29, + 25, + 28, true, "graph-traversal", "graph-traversal" @@ -29350,8 +29349,8 @@ 145, 140, 145, + 25, 26, - 27, true, "graph", "graph" @@ -29371,8 +29370,8 @@ 157, 146, 157, - 28, - 30, + 27, + 29, true, "traversal V", "traversal V" @@ -29392,8 +29391,8 @@ 269, 160, 269, - 31, - 60, + 30, + 59, true, "A W, in which we start from a set of nodes V and traverse the edge A in order to obtain a new set of nodes W.", "A W, in which we start from a set of nodes V and traverse the edge A in order to obtain a new set of nodes W." @@ -29413,8 +29412,8 @@ 193, 190, 193, + 39, 40, - 41, true, "set", "set" @@ -29434,8 +29433,8 @@ 204, 197, 204, - 42, - 44, + 41, + 43, true, "nodes V", "nodes V" @@ -29455,8 +29454,8 @@ 226, 222, 226, + 46, 47, - 48, true, "edge", "edge" @@ -29476,8 +29475,8 @@ 237, 232, 237, + 49, 50, - 51, true, "order", "order" @@ -29497,8 +29496,8 @@ 257, 250, 257, - 54, - 56, + 53, + 55, true, "new set", "new set" @@ -29518,8 +29517,8 @@ 268, 261, 268, - 57, - 59, + 56, + 58, true, "nodes W", "nodes W" @@ -29539,8 +29538,8 @@ 321, 307, 321, - 66, - 68, + 65, + 67, true, "linear algebra", "linear algebra" @@ -29708,7 +29707,7 @@ 73, 83, 39, - 45, + 44, true, "GLYPH", "GLYPH" @@ -29750,7 +29749,7 @@ 80, 82, 42, - 44, + 43, true, "26", "26" @@ -29770,8 +29769,8 @@ 88, 86, 87, + 45, 46, - 47, true, "1", "1" @@ -29791,8 +29790,8 @@ 91, 88, 89, + 46, 47, - 48, true, "\u00de", "\u00de" @@ -30275,7 +30274,7 @@ 37, 57, 27, - 39, + 37, true, "GLYPHGLYPH", "GLYPHGLYPH" @@ -30296,7 +30295,7 @@ 44, 46, 30, - 32, + 31, true, "16", "16" @@ -30316,8 +30315,8 @@ 58, 54, 56, + 35, 36, - 38, true, "17", "17" @@ -30337,8 +30336,8 @@ 70, 58, 68, - 39, - 45, + 37, + 42, true, "GLYPH", "GLYPH" @@ -30358,8 +30357,8 @@ 69, 65, 67, - 42, - 44, + 40, + 41, true, "16", "16" @@ -30379,8 +30378,8 @@ 81, 69, 79, - 45, - 51, + 42, + 47, true, "GLYPH", "GLYPH" @@ -30400,8 +30399,8 @@ 80, 76, 78, - 48, - 50, + 45, + 46, true, "17", "17" @@ -30421,8 +30420,8 @@ 92, 80, 90, - 51, - 57, + 47, + 52, true, "GLYPH", "GLYPH" @@ -30442,8 +30441,8 @@ 91, 87, 89, - 54, - 56, + 50, + 51, true, "16", "16" @@ -30463,8 +30462,8 @@ 103, 91, 101, + 52, 57, - 63, true, "GLYPH", "GLYPH" @@ -30484,8 +30483,8 @@ 102, 98, 100, - 60, - 62, + 55, + 56, true, "17", "17" @@ -30505,8 +30504,8 @@ 110, 106, 107, - 65, - 66, + 59, + 60, true, "2", "2" @@ -31136,7 +31135,7 @@ 503, 508, 93, - 98, + 96, true, "15,16", "15,16" @@ -31156,8 +31155,8 @@ 600, 509, 600, - 98, - 113, + 96, + 111, true, "Notably, most advanced graph-analytical operations can be formulated using SpMV operations.", "Notably, most advanced graph-analytical operations can be formulated using SpMV operations." @@ -31177,8 +31176,8 @@ 537, 523, 537, + 99, 101, - 103, true, "advanced graph", "advanced graph" @@ -31198,8 +31197,8 @@ 548, 532, 548, - 102, - 105, + 100, + 103, true, "graph-analytical", "graph-analytical" @@ -31219,8 +31218,8 @@ 559, 538, 559, + 102, 104, - 106, true, "analytical operations", "analytical operations" @@ -31240,8 +31239,8 @@ 599, 584, 599, + 108, 110, - 112, true, "SpMV operations", "SpMV operations" @@ -31261,8 +31260,8 @@ 731, 601, 731, - 113, - 139, + 111, + 137, true, "The most trivial case is page-rank, in which one recursively executes Equation (1) in combination with a renormalization until w !", "The most trivial case is page-rank, in which one recursively executes Equation (1) in combination with a renormalization until w !" @@ -31282,8 +31281,8 @@ 622, 610, 622, + 113, 115, - 117, true, "trivial case", "trivial case" @@ -31303,8 +31302,8 @@ 635, 626, 635, - 118, - 121, + 116, + 119, true, "page-rank", "page-rank" @@ -31324,8 +31323,8 @@ 630, 626, 630, - 118, - 119, + 116, + 117, true, "page", "page" @@ -31345,8 +31344,8 @@ 635, 631, 635, - 120, - 121, + 118, + 119, true, "rank", "rank" @@ -31366,8 +31365,8 @@ 679, 671, 679, - 127, - 128, + 125, + 126, true, "Equation", "Equation" @@ -31387,8 +31386,8 @@ 683, 680, 683, - 128, - 131, + 126, + 129, true, "(1)", "(1)" @@ -31408,8 +31407,8 @@ 682, 681, 682, - 129, - 130, + 127, + 128, true, "1", "1" @@ -31429,8 +31428,8 @@ 698, 687, 698, - 132, - 133, + 130, + 131, true, "combination", "combination" @@ -31450,8 +31449,8 @@ 721, 706, 721, - 135, - 136, + 133, + 134, true, "renormalization", "renormalization" @@ -31471,8 +31470,8 @@ 753, 746, 753, - 143, - 150, + 141, + 148, true, "$^{!}$.", "$^{!}$." @@ -31492,8 +31491,8 @@ 752, 746, 752, - 143, - 149, + 141, + 147, true, "^{!}", "$^{!}$" @@ -31513,8 +31512,8 @@ 960, 754, 960, - 150, - 187, + 148, + 185, true, "In our previous work, 2 we have also shown in detail that advanced graph-analytical operations such as node centralities and spectral analysis of the graph can be done effectively with only SpMV operations.", "In our previous work, 2 we have also shown in detail that advanced graph-analytical operations such as node centralities and spectral analysis of the graph can be done effectively with only SpMV operations." @@ -31534,8 +31533,8 @@ 774, 761, 774, + 150, 152, - 154, true, "previous work", "previous work" @@ -31555,8 +31554,8 @@ 777, 776, 777, - 155, - 156, + 153, + 154, true, "2", "2" @@ -31576,8 +31575,8 @@ 806, 800, 806, - 161, - 162, + 159, + 160, true, "detail", "detail" @@ -31597,8 +31596,8 @@ 837, 821, 837, - 164, - 167, + 162, + 165, true, "graph-analytical", "graph-analytical" @@ -31618,8 +31617,8 @@ 826, 821, 826, - 164, - 165, + 162, + 163, true, "graph", "graph" @@ -31639,8 +31638,8 @@ 848, 827, 848, + 164, 166, - 168, true, "analytical operations", "analytical operations" @@ -31660,8 +31659,8 @@ 874, 857, 874, + 168, 170, - 172, true, "node centralities", "node centralities" @@ -31681,8 +31680,8 @@ 896, 879, 896, + 171, 173, - 175, true, "spectral analysis", "spectral analysis" @@ -31702,8 +31701,8 @@ 909, 904, 909, - 177, - 178, + 175, + 176, true, "graph", "graph" @@ -31723,8 +31722,8 @@ 959, 939, 959, - 183, - 186, + 181, + 184, true, "only SpMV operations", "only SpMV operations" @@ -33299,7 +33298,7 @@ 1214, 1216, 212, - 214, + 213, true, "17", "17" @@ -33320,7 +33319,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -33340,8 +33339,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -33361,8 +33360,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -33382,8 +33381,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -33403,8 +33402,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -33424,8 +33423,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -33445,8 +33444,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -33466,8 +33465,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -33487,8 +33486,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -33508,8 +33507,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -33529,8 +33528,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -33550,8 +33549,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -33571,8 +33570,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -33592,8 +33591,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -33613,8 +33612,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -33634,8 +33633,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -33655,8 +33654,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -33676,8 +33675,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -33697,8 +33696,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -33718,8 +33717,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -33739,8 +33738,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -33760,8 +33759,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -33781,8 +33780,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -33802,8 +33801,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -34223,7 +34222,7 @@ 205, 306, 44, - 78, + 75, true, "We computed a k-hop traversal query on the graph500 \u00a7\u00a7 (64M edges) and twitter-graph \u00b6\u00b6 (1.5B edges).", "We computed a k-hop traversal query on the graph500 \u00a7\u00a7 (64M edges) and twitter-graph \u00b6\u00b6 (1.5B edges)." @@ -34286,7 +34285,7 @@ 248, 256, 54, - 58, + 56, true, "graph500", "graph500" @@ -34328,7 +34327,7 @@ 253, 256, 55, - 58, + 56, true, "500", "500" @@ -34348,8 +34347,8 @@ 265, 257, 259, - 58, - 59, + 56, + 57, true, "\u00a7\u00a7", "\u00a7\u00a7" @@ -34369,8 +34368,8 @@ 277, 260, 271, - 59, - 65, + 57, + 62, true, "(64M edges)", "(64M edges)" @@ -34390,8 +34389,8 @@ 269, 261, 263, - 60, - 62, + 58, + 59, true, "64", "64" @@ -34411,8 +34410,8 @@ 276, 263, 270, - 62, - 64, + 59, + 61, true, "M edges", "M edges" @@ -34432,8 +34431,8 @@ 295, 276, 289, + 63, 66, - 69, true, "twitter-graph", "twitter-graph" @@ -34453,8 +34452,8 @@ 300, 284, 292, - 68, - 70, + 65, + 67, true, "graph \u00b6\u00b6", "graph \u00b6\u00b6" @@ -34474,8 +34473,8 @@ 313, 293, 305, - 70, - 77, + 67, + 74, true, "(1.5B edges)", "(1.5B edges)" @@ -34495,8 +34494,8 @@ 305, 294, 297, + 68, 71, - 74, true, "1.5", "1.5" @@ -34516,8 +34515,8 @@ 312, 297, 304, - 74, - 76, + 71, + 73, true, "B edges", "B edges" @@ -34537,8 +34536,8 @@ 354, 307, 346, - 78, - 85, + 75, + 82, true, "Two important observations can be made.", "Two important observations can be made." @@ -34558,8 +34557,8 @@ 341, 307, 333, + 75, 78, - 81, true, "Two important observations", "Two important observations" @@ -34579,8 +34578,8 @@ 457, 347, 449, - 85, - 107, + 82, + 104, true, "Firstly, our graph engine is able to run easily third, fourth, and even higher-order graph traversals.", "Firstly, our graph engine is able to run easily third, fourth, and even higher-order graph traversals." @@ -34600,8 +34599,8 @@ 380, 360, 372, - 88, - 90, + 85, + 87, true, "graph engine", "graph engine" @@ -34621,8 +34620,8 @@ 439, 419, 431, + 98, 101, - 104, true, "higher-order", "higher-order" @@ -34642,8 +34641,8 @@ 456, 426, 448, + 100, 103, - 106, true, "order graph traversals", "order graph traversals" @@ -34663,8 +34662,8 @@ 533, 450, 525, - 107, - 126, + 104, + 123, true, "With Neo4J, this proves very difficult, as the TTS grows upwards of 1 hour.", "With Neo4J, this proves very difficult, as the TTS grows upwards of 1 hour." @@ -34684,8 +34683,8 @@ 468, 455, 460, + 105, 108, - 111, true, "Neo4J", "Neo4J" @@ -34705,8 +34704,8 @@ 466, 455, 458, - 108, - 109, + 105, + 106, true, "Neo", "Neo" @@ -34726,8 +34725,8 @@ 467, 458, 459, - 109, - 110, + 106, + 107, true, "4", "4" @@ -34747,8 +34746,8 @@ 508, 497, 500, - 119, - 120, + 116, + 117, true, "TTS", "TTS" @@ -34768,8 +34767,8 @@ 522, 507, 514, - 121, - 122, + 118, + 119, true, "upwards", "upwards" @@ -34789,8 +34788,8 @@ 527, 518, 519, - 123, - 124, + 120, + 121, true, "1", "1" @@ -34810,8 +34809,8 @@ 532, 520, 524, - 124, - 125, + 121, + 122, true, "hour", "hour" @@ -34831,8 +34830,8 @@ 644, 526, 636, - 126, - 149, + 123, + 146, true, "Secondly, our graph engine shows minimal variance in the TTS between all runs of the k-order graph-traversals.", "Secondly, our graph engine shows minimal variance in the TTS between all runs of the k-order graph-traversals." @@ -34852,8 +34851,8 @@ 560, 540, 552, - 129, - 131, + 126, + 128, true, "graph engine", "graph engine" @@ -34873,8 +34872,8 @@ 583, 559, 575, - 132, - 134, + 129, + 131, true, "minimal variance", "minimal variance" @@ -34894,8 +34893,8 @@ 594, 583, 586, - 136, - 137, + 133, + 134, true, "TTS", "TTS" @@ -34915,8 +34914,8 @@ 611, 599, 603, - 139, - 140, + 136, + 137, true, "runs", "runs" @@ -34936,8 +34935,8 @@ 626, 611, 618, + 139, 142, - 145, true, "k-order", "k-order" @@ -34957,8 +34956,8 @@ 632, 613, 624, - 144, - 146, + 141, + 143, true, "order graph", "order graph" @@ -34978,8 +34977,8 @@ 643, 619, 635, + 142, 145, - 148, true, "graph-traversals", "graph-traversals" @@ -34999,8 +34998,8 @@ 643, 625, 635, - 147, - 148, + 144, + 145, true, "traversals", "traversals" @@ -35020,8 +35019,8 @@ 745, 637, 737, - 149, - 174, + 146, + 171, true, "This is in stark contrast to Neo4J, where the TTS strongly depends on which node(s) one starts from.", "This is in stark contrast to Neo4J, where the TTS strongly depends on which node(s) one starts from." @@ -35041,8 +35040,8 @@ 670, 648, 662, - 152, - 154, + 149, + 151, true, "stark contrast", "stark contrast" @@ -35062,8 +35061,8 @@ 679, 666, 671, + 152, 155, - 158, true, "Neo4J", "Neo4J" @@ -35083,8 +35082,8 @@ 677, 666, 669, - 155, - 156, + 152, + 153, true, "Neo", "Neo" @@ -35104,8 +35103,8 @@ 678, 669, 670, - 156, - 157, + 153, + 154, true, "4", "4" @@ -35125,8 +35124,8 @@ 694, 683, 686, - 161, - 162, + 158, + 159, true, "TTS", "TTS" @@ -35146,8 +35145,8 @@ 728, 713, 720, - 166, - 170, + 163, + 167, true, "node(s)", "node(s)" @@ -35167,8 +35166,8 @@ 725, 713, 717, - 166, - 167, + 163, + 164, true, "node", "node" @@ -35188,8 +35187,8 @@ 728, 717, 720, + 164, 167, - 170, true, "(s)", "(s)" @@ -35210,7 +35209,7 @@ 0, 141, 0, - 26, + 25, true, "Another big advantage of using the adjacency matrix format is that we can exploit advanced compression methods 18 such as CSR or blocked COO.", "Another big advantage of using the adjacency matrix format is that we can exploit advanced compression methods 18 such as CSR or blocked COO." @@ -35294,7 +35293,7 @@ 111, 113, 17, - 19, + 18, true, "18", "18" @@ -35314,8 +35313,8 @@ 125, 122, 125, + 20, 21, - 22, true, "CSR", "CSR" @@ -35335,8 +35334,8 @@ 140, 137, 140, + 23, 24, - 25, true, "COO", "COO" @@ -35356,8 +35355,8 @@ 260, 142, 260, - 26, - 47, + 25, + 46, true, "This reduces significantly the memory footprint of the graph and allows bigger graphs to be hosted entirely in-memory.", "This reduces significantly the memory footprint of the graph and allows bigger graphs to be hosted entirely in-memory." @@ -35377,8 +35376,8 @@ 189, 173, 189, - 30, - 32, + 29, + 31, true, "memory footprint", "memory footprint" @@ -35398,8 +35397,8 @@ 202, 197, 202, + 33, 34, - 35, true, "graph", "graph" @@ -35419,8 +35418,8 @@ 227, 221, 227, + 37, 38, - 39, true, "graphs", "graphs" @@ -35440,8 +35439,8 @@ 259, 250, 259, - 43, - 46, + 42, + 45, true, "in-memory", "in-memory" @@ -35461,8 +35460,8 @@ 259, 252, 259, - 44, - 46, + 43, + 45, true, "-memory", "-memory" @@ -35482,8 +35481,8 @@ 390, 261, 390, - 47, - 76, + 46, + 75, true, "In our case, we have opted to represent the edges by blocked matrices of a fixed size, in which each block matrix is of type COO.", "In our case, we have opted to represent the edges by blocked matrices of a fixed size, in which each block matrix is of type COO." @@ -35503,8 +35502,8 @@ 272, 268, 272, + 48, 49, - 50, true, "case", "case" @@ -35524,8 +35523,8 @@ 310, 305, 310, + 56, 57, - 58, true, "edges", "edges" @@ -35545,8 +35544,8 @@ 330, 314, 330, - 59, - 61, + 58, + 60, true, "blocked matrices", "blocked matrices" @@ -35566,8 +35565,8 @@ 346, 336, 346, - 63, - 65, + 62, + 64, true, "fixed size", "fixed size" @@ -35587,8 +35586,8 @@ 374, 362, 374, - 69, - 71, + 68, + 70, true, "block matrix", "block matrix" @@ -35608,8 +35607,8 @@ 389, 381, 389, - 73, - 75, + 72, + 74, true, "type COO", "type COO" @@ -35629,8 +35628,8 @@ 536, 391, 536, - 76, - 112, + 75, + 107, true, "We chose the size of the block-matrix to be 2 16 = 65 536, allowing a pair of indices to be compactly represented by two unsigned short integers.", "We chose the size of the block-matrix to be 2 16 = 65 536, allowing a pair of indices to be compactly represented by two unsigned short integers." @@ -35650,8 +35649,8 @@ 408, 404, 408, + 78, 79, - 80, true, "size", "size" @@ -35671,8 +35670,8 @@ 428, 416, 428, - 82, - 85, + 81, + 84, true, "block-matrix", "block-matrix" @@ -35692,8 +35691,8 @@ 421, 416, 421, + 81, 82, - 83, true, "block", "block" @@ -35713,8 +35712,8 @@ 428, 422, 428, + 83, 84, - 85, true, "matrix", "matrix" @@ -35734,8 +35733,8 @@ 436, 435, 436, + 86, 87, - 88, true, "2", "2" @@ -35755,8 +35754,8 @@ 439, 437, 439, + 87, 88, - 90, true, "16", "16" @@ -35776,8 +35775,8 @@ 444, 442, 444, - 91, - 93, + 89, + 90, true, "65", "65" @@ -35797,8 +35796,8 @@ 448, 445, 448, - 93, - 96, + 90, + 91, true, "536", "536" @@ -35818,8 +35817,8 @@ 465, 461, 465, - 99, - 100, + 94, + 95, true, "pair", "pair" @@ -35839,8 +35838,8 @@ 476, 469, 476, - 101, - 102, + 96, + 97, true, "indices", "indices" @@ -35860,8 +35859,8 @@ 535, 512, 535, - 108, - 111, + 103, + 106, true, "unsigned short integers", "unsigned short integers" @@ -35881,8 +35880,8 @@ 684, 537, 684, - 112, - 146, + 107, + 140, true, "Consequently, an edge has a memory footprint of only 4 bytes (equivalent to a single 32-bit integer), while a weighted edge a footprint of 8 bytes.", "Consequently, an edge has a memory footprint of only 4 bytes (equivalent to a single 32-bit integer), while a weighted edge a footprint of 8 bytes." @@ -35902,8 +35901,8 @@ 558, 554, 558, - 115, - 116, + 110, + 111, true, "edge", "edge" @@ -35923,8 +35922,8 @@ 581, 565, 581, - 118, - 120, + 113, + 115, true, "memory footprint", "memory footprint" @@ -35944,8 +35943,8 @@ 591, 590, 591, - 122, - 123, + 117, + 118, true, "4", "4" @@ -35965,8 +35964,8 @@ 597, 592, 597, - 123, - 124, + 118, + 119, true, "bytes", "bytes" @@ -35986,8 +35985,8 @@ 637, 598, 637, - 124, - 135, + 119, + 129, true, "(equivalent to a single 32-bit integer)", "(equivalent to a single 32-bit integer)" @@ -36007,8 +36006,8 @@ 628, 622, 628, - 129, - 133, + 124, + 127, true, "32-bit", "32-bit" @@ -36028,8 +36027,8 @@ 624, 622, 624, - 129, - 131, + 124, + 125, true, "32", "32" @@ -36049,8 +36048,8 @@ 636, 625, 636, - 132, - 134, + 126, + 128, true, "bit integer", "bit integer" @@ -36070,8 +36069,8 @@ 660, 647, 660, - 138, - 140, + 132, + 134, true, "weighted edge", "weighted edge" @@ -36091,8 +36090,8 @@ 672, 663, 672, - 141, - 142, + 135, + 136, true, "footprint", "footprint" @@ -36112,8 +36111,8 @@ 677, 676, 677, - 143, - 144, + 137, + 138, true, "8", "8" @@ -36133,8 +36132,8 @@ 683, 678, 683, - 144, - 145, + 138, + 139, true, "bytes", "bytes" @@ -36154,8 +36153,8 @@ 832, 689, 826, - 147, - 179, + 141, + 172, true, "This is a significant reduction in memory footprint compared to Neo4J graph databases, which use 33 bytes for unweighted edges $^{\u2020\u2020\u2020}$).", "This is a significant reduction in memory footprint compared to Neo4J graph databases, which use 33 bytes for unweighted edges $^{\u2020\u2020\u2020}$)." @@ -36175,8 +36174,8 @@ 720, 699, 720, - 150, - 152, + 144, + 146, true, "significant reduction", "significant reduction" @@ -36196,8 +36195,8 @@ 740, 724, 740, - 153, - 155, + 147, + 149, true, "memory footprint", "memory footprint" @@ -36217,8 +36216,8 @@ 758, 753, 758, - 157, - 160, + 151, + 154, true, "Neo4J", "Neo4J" @@ -36238,8 +36237,8 @@ 756, 753, 756, - 157, - 158, + 151, + 152, true, "Neo", "Neo" @@ -36259,8 +36258,8 @@ 757, 756, 757, - 158, - 159, + 152, + 153, true, "4", "4" @@ -36280,8 +36279,8 @@ 774, 757, 774, - 159, - 162, + 153, + 156, true, "J graph databases", "J graph databases" @@ -36301,8 +36300,8 @@ 788, 786, 788, - 165, - 167, + 159, + 160, true, "33", "33" @@ -36322,8 +36321,8 @@ 794, 789, 794, - 167, - 168, + 160, + 161, true, "bytes", "bytes" @@ -36343,8 +36342,8 @@ 815, 799, 815, - 169, - 171, + 162, + 164, true, "unweighted edges", "unweighted edges" @@ -36364,8 +36363,8 @@ 830, 816, 824, - 171, - 177, + 164, + 170, true, "^{\u2020\u2020\u2020}", "$^{\u2020\u2020\u2020}$" @@ -36385,8 +36384,8 @@ 828, 819, 822, - 174, - 175, + 167, + 168, true, "\u2020\u2020\u2020", "\u2020\u2020\u2020" @@ -36406,8 +36405,8 @@ 1027, 827, 1021, - 179, - 224, + 172, + 216, true, "Consequently, we can host graphs of close to 8 billion edges on a virtual machine with 32 GB of free memory, and even close to one trillion edges on a bare-metal POWER9 node with 4 TB of memory.", "Consequently, we can host graphs of close to 8 billion edges on a virtual machine with 32 GB of free memory, and even close to one trillion edges on a bare-metal POWER9 node with 4 TB of memory." @@ -36427,8 +36426,8 @@ 865, 853, 859, - 184, - 185, + 177, + 178, true, "graphs", "graphs" @@ -36448,8 +36447,8 @@ 879, 872, 873, - 188, - 189, + 181, + 182, true, "8", "8" @@ -36469,8 +36468,8 @@ 893, 874, 887, - 189, - 191, + 182, + 184, true, "billion edges", "billion edges" @@ -36490,8 +36489,8 @@ 914, 893, 908, - 193, - 195, + 186, + 188, true, "virtual machine", "virtual machine" @@ -36511,8 +36510,8 @@ 922, 914, 916, - 196, - 198, + 189, + 190, true, "32", "32" @@ -36532,8 +36531,8 @@ 925, 917, 919, - 198, - 199, + 190, + 191, true, "GB", "GB" @@ -36553,8 +36552,8 @@ 940, 923, 934, - 200, - 202, + 192, + 194, true, "free memory", "free memory" @@ -36574,8 +36573,8 @@ 978, 958, 972, - 208, - 210, + 200, + 202, true, "trillion edges", "trillion edges" @@ -36595,8 +36594,8 @@ 994, 978, 988, - 212, - 215, + 204, + 207, true, "bare-metal", "bare-metal" @@ -36616,8 +36615,8 @@ 988, 978, 982, - 212, - 213, + 204, + 205, true, "bare", "bare" @@ -36637,8 +36636,8 @@ 1000, 983, 994, - 214, - 216, + 206, + 208, true, "metal POWER", "metal POWER" @@ -36658,8 +36657,8 @@ 1001, 989, 995, - 215, - 217, + 207, + 209, true, "POWER9", "POWER9" @@ -36679,8 +36678,8 @@ 1001, 994, 995, - 216, - 217, + 208, + 209, true, "9", "9" @@ -36700,8 +36699,8 @@ 1006, 996, 1000, - 217, - 218, + 209, + 210, true, "node", "node" @@ -36721,8 +36720,8 @@ 1013, 1006, 1007, - 219, - 220, + 211, + 212, true, "4", "4" @@ -36742,8 +36741,8 @@ 1016, 1008, 1010, - 220, - 221, + 212, + 213, true, "TB", "TB" @@ -36763,8 +36762,8 @@ 1026, 1014, 1020, - 222, - 223, + 214, + 215, true, "memory", "memory" @@ -38087,7 +38086,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -38107,8 +38106,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -38128,8 +38127,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -38149,8 +38148,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -38170,8 +38169,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -38191,8 +38190,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -38212,8 +38211,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -38233,8 +38232,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -38254,8 +38253,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -38275,8 +38274,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -38296,8 +38295,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -38317,8 +38316,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -38338,8 +38337,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -38359,8 +38358,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -38380,8 +38379,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -38401,8 +38400,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -38422,8 +38421,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -38443,8 +38442,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -38464,8 +38463,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -38485,8 +38484,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -38506,8 +38505,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -38527,8 +38526,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -38548,8 +38547,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -38569,8 +38568,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -38612,7 +38611,7 @@ 3, 5, 2, - 4, + 3, true, "15", "15" @@ -39095,7 +39094,7 @@ 53, 63, 28, - 34, + 33, true, "GLYPH", "GLYPH" @@ -39116,7 +39115,7 @@ 60, 62, 31, - 33, + 32, true, "26", "26" @@ -39136,8 +39135,8 @@ 68, 66, 67, + 34, 35, - 36, true, "3", "3" @@ -40376,7 +40375,7 @@ 64, 74, 35, - 41, + 40, true, "GLYPH", "GLYPH" @@ -40397,7 +40396,7 @@ 71, 73, 38, - 40, + 39, true, "18", "18" @@ -40417,8 +40416,8 @@ 85, 75, 85, - 41, - 47, + 40, + 45, true, "GLYPH", "GLYPH" @@ -40438,8 +40437,8 @@ 84, 82, 84, + 43, 44, - 46, true, "19", "19" @@ -40459,8 +40458,8 @@ 95, 88, 95, - 48, - 55, + 46, + 53, true, "^{!}=", "$^{!}$=" @@ -40480,8 +40479,8 @@ 104, 96, 104, - 55, - 63, + 53, + 61, true, "e^{A}-", "e$^{A}$-" @@ -40501,8 +40500,8 @@ 106, 103, 106, + 60, 62, - 64, true, "- 1", "- 1" @@ -40522,8 +40521,8 @@ 116, 107, 116, - 64, - 69, + 62, + 67, true, "GLYPH", "GLYPH" @@ -40543,8 +40542,8 @@ 115, 114, 115, - 67, - 68, + 65, + 66, true, "0", "0" @@ -40564,8 +40563,8 @@ 126, 117, 126, - 69, - 74, + 67, + 72, true, "GLYPH", "GLYPH" @@ -40585,8 +40584,8 @@ 125, 124, 125, - 72, - 73, + 70, + 71, true, "1", "1" @@ -40606,8 +40605,8 @@ 135, 129, 135, - 75, - 81, + 73, + 79, true, "^{!}", "$^{!}$" @@ -40627,8 +40626,8 @@ 141, 139, 140, - 83, - 84, + 81, + 82, true, "4", "4" @@ -41048,7 +41047,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -41068,8 +41067,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -41089,8 +41088,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -41110,8 +41109,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -41131,8 +41130,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -41152,8 +41151,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -41173,8 +41172,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -41194,8 +41193,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -41215,8 +41214,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -41236,8 +41235,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -41257,8 +41256,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -41278,8 +41277,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -41299,8 +41298,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -41320,8 +41319,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -41341,8 +41340,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -41362,8 +41361,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -41383,8 +41382,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -41404,8 +41403,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -41425,8 +41424,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -41446,8 +41445,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -41467,8 +41466,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -41488,8 +41487,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -41509,8 +41508,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -41530,8 +41529,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -43841,7 +43840,7 @@ 189, 334, 32, - 67, + 66, true, "For example we can easily deploy the platform on the IBM Cloud or on-premise in an IBM Cloud Private instance, both on x86-and POWER-based nodes.", "For example we can easily deploy the platform on the IBM Cloud or on-premise in an IBM Cloud Private instance, both on x86-and POWER-based nodes." @@ -43988,7 +43987,7 @@ 308, 315, 57, - 62, + 61, true, "x86-and", "x86-and" @@ -44009,7 +44008,7 @@ 309, 311, 58, - 60, + 59, true, "86", "86" @@ -44029,8 +44028,8 @@ 327, 316, 327, - 62, - 65, + 61, + 64, true, "POWER-based", "POWER-based" @@ -44050,8 +44049,8 @@ 321, 316, 321, + 61, 62, - 63, true, "POWER", "POWER" @@ -44071,8 +44070,8 @@ 333, 328, 333, + 64, 65, - 66, true, "nodes", "nodes" @@ -46697,7 +46696,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -46717,8 +46716,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -46738,8 +46737,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -46759,8 +46758,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -46780,8 +46779,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -46801,8 +46800,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -46822,8 +46821,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -46843,8 +46842,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -46864,8 +46863,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -46885,8 +46884,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -46906,8 +46905,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -46927,8 +46926,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -46948,8 +46947,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -46969,8 +46968,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -46990,8 +46989,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -47011,8 +47010,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -47032,8 +47031,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -47053,8 +47052,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -47074,8 +47073,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -47095,8 +47094,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -47116,8 +47115,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -47137,8 +47136,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -47158,8 +47157,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -47179,8 +47178,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -47201,7 +47200,7 @@ 0, 2, 0, - 2, + 1, true, "11", "11" @@ -47221,8 +47220,8 @@ 6, 4, 6, + 2, 3, - 5, true, "15", "15" @@ -49406,7 +49405,7 @@ 378, 380, 67, - 69, + 68, true, "19", "19" @@ -49426,8 +49425,8 @@ 491, 381, 491, - 69, - 91, + 68, + 90, true, "In its most minimalistic form, a petroleum system is defined by three components: source, reservoir, and seal.", "In its most minimalistic form, a petroleum system is defined by three components: source, reservoir, and seal." @@ -49447,8 +49446,8 @@ 410, 393, 410, - 72, - 74, + 71, + 73, true, "minimalistic form", "minimalistic form" @@ -49468,8 +49467,8 @@ 430, 414, 430, - 76, - 78, + 75, + 77, true, "petroleum system", "petroleum system" @@ -49489,8 +49488,8 @@ 461, 451, 461, + 81, 82, - 83, true, "components", "components" @@ -49510,8 +49509,8 @@ 490, 463, 490, - 84, - 90, + 83, + 89, true, "source, reservoir, and seal", "source, reservoir, and seal" @@ -49531,8 +49530,8 @@ 469, 463, 469, + 83, 84, - 85, true, "source", "source" @@ -49552,8 +49551,8 @@ 480, 471, 480, + 85, 86, - 87, true, "reservoir", "reservoir" @@ -49573,8 +49572,8 @@ 490, 486, 490, + 88, 89, - 90, true, "seal", "seal" @@ -49594,8 +49593,8 @@ 561, 492, 561, - 91, - 106, + 90, + 105, true, "The source is the rock formation in which the oil or gas was created.", "The source is the rock formation in which the oil or gas was created." @@ -49615,8 +49614,8 @@ 502, 496, 502, + 91, 92, - 93, true, "source", "source" @@ -49636,8 +49635,8 @@ 524, 510, 524, - 95, - 97, + 94, + 96, true, "rock formation", "rock formation" @@ -49657,8 +49656,8 @@ 548, 538, 548, - 100, - 103, + 99, + 102, true, "oil or gas", "oil or gas" @@ -49678,8 +49677,8 @@ 541, 538, 541, + 99, 100, - 101, true, "oil", "oil" @@ -49699,8 +49698,8 @@ 548, 545, 548, + 101, 102, - 103, true, "gas", "gas" @@ -49720,8 +49719,8 @@ 666, 562, 666, - 106, - 128, + 105, + 127, true, "Once created, the oil or gas typically migrates to a porous reservoir rock, which holds the oil and gas.", "Once created, the oil or gas typically migrates to a porous reservoir rock, which holds the oil and gas." @@ -49741,8 +49740,8 @@ 590, 580, 590, - 110, - 113, + 109, + 112, true, "oil or gas", "oil or gas" @@ -49762,8 +49761,8 @@ 583, 580, 583, + 109, 110, - 111, true, "oil", "oil" @@ -49783,8 +49782,8 @@ 590, 587, 590, + 111, 112, - 113, true, "gas", "gas" @@ -49804,8 +49803,8 @@ 636, 615, 636, - 117, - 120, + 116, + 119, true, "porous reservoir rock", "porous reservoir rock" @@ -49825,8 +49824,8 @@ 665, 654, 665, - 124, - 127, + 123, + 126, true, "oil and gas", "oil and gas" @@ -49846,8 +49845,8 @@ 657, 654, 657, + 123, 124, - 125, true, "oil", "oil" @@ -49867,8 +49866,8 @@ 665, 662, 665, + 125, 126, - 127, true, "gas", "gas" @@ -49888,8 +49887,8 @@ 803, 667, 803, - 128, - 156, + 127, + 155, true, "In order for the oil and gas not to escape, the reservoir needs to be covered by an impermeable rock formation which is called the seal.", "In order for the oil and gas not to escape, the reservoir needs to be covered by an impermeable rock formation which is called the seal." @@ -49909,8 +49908,8 @@ 675, 670, 675, + 128, 129, - 130, true, "order", "order" @@ -49930,8 +49929,8 @@ 695, 684, 695, - 132, - 135, + 131, + 134, true, "oil and gas", "oil and gas" @@ -49951,8 +49950,8 @@ 687, 684, 687, + 131, 132, - 133, true, "oil", "oil" @@ -49972,8 +49971,8 @@ 695, 692, 695, + 133, 134, - 135, true, "gas", "gas" @@ -49993,8 +49992,8 @@ 724, 715, 724, + 139, 140, - 141, true, "reservoir", "reservoir" @@ -50014,8 +50013,8 @@ 777, 751, 777, - 147, - 150, + 146, + 149, true, "impermeable rock formation", "impermeable rock formation" @@ -50035,8 +50034,8 @@ 802, 798, 802, + 153, 154, - 155, true, "seal", "seal" @@ -50056,8 +50055,8 @@ 913, 804, 913, - 156, - 177, + 155, + 176, true, "Each one of these components is comprised of one or more formations, with a certain age and rock composition.", "Each one of these components is comprised of one or more formations, with a certain age and rock composition." @@ -50077,8 +50076,8 @@ 832, 822, 832, + 159, 160, - 161, true, "components", "components" @@ -50098,8 +50097,8 @@ 871, 861, 871, + 166, 167, - 168, true, "formations", "formations" @@ -50119,8 +50118,8 @@ 891, 880, 891, - 171, - 173, + 170, + 172, true, "certain age", "certain age" @@ -50140,8 +50139,8 @@ 912, 888, 912, - 172, - 176, + 171, + 175, true, "age and rock composition", "age and rock composition" @@ -50161,8 +50160,8 @@ 912, 896, 912, - 174, - 176, + 173, + 175, true, "rock composition", "rock composition" @@ -50182,8 +50181,8 @@ 1162, 914, 1162, - 177, - 223, + 176, + 222, true, "To identify a petroleum system in a certain geographical area, one has to find a candidate formation for each component (ie, reservoir, seal, and source) and observe that the properties of these components satisfy some well-established constraints.", "To identify a petroleum system in a certain geographical area, one has to find a candidate formation for each component (ie, reservoir, seal, and source) and observe that the properties of these components satisfy some well-established constraints." @@ -50203,8 +50202,8 @@ 944, 928, 944, - 180, - 182, + 179, + 181, true, "petroleum system", "petroleum system" @@ -50224,8 +50223,8 @@ 975, 950, 975, - 184, - 187, + 183, + 186, true, "certain geographical area", "certain geographical area" @@ -50245,8 +50244,8 @@ 1014, 995, 1014, - 193, - 195, + 192, + 194, true, "candidate formation", "candidate formation" @@ -50266,8 +50265,8 @@ 1033, 1024, 1033, + 196, 197, - 198, true, "component", "component" @@ -50287,8 +50286,8 @@ 1067, 1034, 1067, - 198, - 208, + 197, + 207, true, "(ie, reservoir, seal, and source)", "(ie, reservoir, seal, and source)" @@ -50308,8 +50307,8 @@ 1066, 1035, 1066, - 199, - 207, + 198, + 206, true, "ie, reservoir, seal, and source", "ie, reservoir, seal, and source" @@ -50329,8 +50328,8 @@ 1037, 1035, 1037, + 198, 199, - 200, true, "ie", "ie" @@ -50350,8 +50349,8 @@ 1048, 1039, 1048, + 200, 201, - 202, true, "reservoir", "reservoir" @@ -50371,8 +50370,8 @@ 1054, 1050, 1054, + 202, 203, - 204, true, "seal", "seal" @@ -50392,8 +50391,8 @@ 1066, 1060, 1066, + 205, 206, - 207, true, "source", "source" @@ -50413,8 +50412,8 @@ 1099, 1089, 1099, + 211, 212, - 213, true, "properties", "properties" @@ -50434,8 +50433,8 @@ 1119, 1109, 1119, + 214, 215, - 216, true, "components", "components" @@ -50455,8 +50454,8 @@ 1149, 1133, 1149, - 218, - 221, + 217, + 220, true, "well-established", "well-established" @@ -50476,8 +50475,8 @@ 1161, 1150, 1161, + 220, 221, - 222, true, "constraints", "constraints" @@ -50497,8 +50496,8 @@ 1174, 1167, 1174, + 223, 224, - 225, true, "example", "example" @@ -50518,8 +50517,8 @@ 1189, 1180, 1189, + 226, 227, - 228, true, "reservoir", "reservoir" @@ -50540,7 +50539,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -50560,8 +50559,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -50581,8 +50580,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -50602,8 +50601,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -50623,8 +50622,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -50644,8 +50643,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -50665,8 +50664,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -50686,8 +50685,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -50707,8 +50706,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -50728,8 +50727,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -50749,8 +50748,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -50770,8 +50769,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -50791,8 +50790,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -50812,8 +50811,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -50833,8 +50832,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -50854,8 +50853,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -50875,8 +50874,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -50896,8 +50895,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -50917,8 +50916,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -50938,8 +50937,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -50959,8 +50958,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -50980,8 +50979,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -51001,8 +51000,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -51022,8 +51021,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -51044,7 +51043,7 @@ 0, 2, 0, - 2, + 1, true, "12", "12" @@ -51065,7 +51064,7 @@ 3, 5, 1, - 3, + 2, true, "15", "15" @@ -52745,7 +52744,7 @@ 0, 154, 0, - 32, + 29, true, "On the suggestion of the experts in the client team, the entire pipeline was run on the 1051 Field Evaluation Reports from the C&C Reservoirs \u00a7\u00a7\u00a7 dataset.", "On the suggestion of the experts in the client team, the entire pipeline was run on the 1051 Field Evaluation Reports from the C&C Reservoirs \u00a7\u00a7\u00a7 dataset." @@ -52850,7 +52849,7 @@ 88, 92, 18, - 22, + 19, true, "1051", "1051" @@ -52870,8 +52869,8 @@ 117, 93, 117, + 19, 22, - 25, true, "Field Evaluation Reports", "Field Evaluation Reports" @@ -52891,8 +52890,8 @@ 141, 127, 141, - 27, - 29, + 24, + 26, true, "C&C Reservoirs", "C&C Reservoirs" @@ -52912,8 +52911,8 @@ 156, 146, 153, - 30, - 31, + 27, + 28, true, "dataset", "dataset" @@ -52933,8 +52932,8 @@ 259, 155, 256, - 32, - 50, + 29, + 47, true, "The advantage of using this dataset for an accuracy benchmark is that each report includes two parts.", "The advantage of using this dataset for an accuracy benchmark is that each report includes two parts." @@ -52954,8 +52953,8 @@ 171, 159, 168, - 33, - 34, + 30, + 31, true, "advantage", "advantage" @@ -52975,8 +52974,8 @@ 193, 183, 190, - 37, - 38, + 34, + 35, true, "dataset", "dataset" @@ -52996,8 +52995,8 @@ 219, 198, 216, - 40, - 42, + 37, + 39, true, "accuracy benchmark", "accuracy benchmark" @@ -53017,8 +53016,8 @@ 239, 230, 236, - 45, - 46, + 42, + 43, true, "report", "report" @@ -53038,8 +53037,8 @@ 258, 250, 255, - 48, - 49, + 45, + 46, true, "parts", "parts" @@ -53059,8 +53058,8 @@ 350, 257, 347, - 50, - 67, + 47, + 64, true, "One part is verbose text describing the history, evolution, and composition of the fields.", "One part is verbose text describing the history, evolution, and composition of the fields." @@ -53080,8 +53079,8 @@ 268, 257, 265, - 50, - 52, + 47, + 49, true, "One part", "One part" @@ -53101,8 +53100,8 @@ 284, 269, 281, - 53, - 55, + 50, + 52, true, "verbose text", "verbose text" @@ -53122,8 +53121,8 @@ 335, 297, 332, - 57, - 63, + 54, + 60, true, "history, evolution, and composition", "history, evolution, and composition" @@ -53143,8 +53142,8 @@ 307, 297, 304, - 57, - 58, + 54, + 55, true, "history", "history" @@ -53164,8 +53163,8 @@ 318, 306, 315, - 59, - 60, + 56, + 57, true, "evolution", "evolution" @@ -53185,8 +53184,8 @@ 335, 321, 332, - 62, - 63, + 59, + 60, true, "composition", "composition" @@ -53206,8 +53205,8 @@ 349, 340, 346, - 65, - 66, + 62, + 63, true, "fields", "fields" @@ -53227,8 +53226,8 @@ 490, 348, 487, - 67, - 89, + 64, + 86, true, "The language used is of similar complexity to standard geological publications and thus a realistic challenge for our KG creation pipeline.", "The language used is of similar complexity to standard geological publications and thus a realistic challenge for our KG creation pipeline." @@ -53248,8 +53247,8 @@ 363, 352, 360, - 68, - 69, + 65, + 66, true, "language", "language" @@ -53269,8 +53268,8 @@ 393, 372, 390, - 72, - 74, + 69, + 71, true, "similar complexity", "similar complexity" @@ -53290,8 +53289,8 @@ 429, 394, 426, + 72, 75, - 78, true, "standard geological publications", "standard geological publications" @@ -53311,8 +53310,8 @@ 460, 438, 457, - 81, - 83, + 78, + 80, true, "realistic challenge", "realistic challenge" @@ -53332,8 +53331,8 @@ 489, 466, 486, + 82, 85, - 88, true, "KG creation pipeline", "KG creation pipeline" @@ -53353,8 +53352,8 @@ 656, 488, 653, - 89, - 119, + 86, + 116, true, "The second part at the end of each report is comprised of tables which summarize the text and provide us the elements of the petroleum systems with their properties.", "The second part at the end of each report is comprised of tables which summarize the text and provide us the elements of the petroleum systems with their properties." @@ -53374,8 +53373,8 @@ 506, 492, 503, - 90, - 92, + 87, + 89, true, "second part", "second part" @@ -53395,8 +53394,8 @@ 517, 511, 514, - 94, - 95, + 91, + 92, true, "end", "end" @@ -53416,8 +53415,8 @@ 532, 523, 529, - 97, - 98, + 94, + 95, true, "report", "report" @@ -53437,8 +53436,8 @@ 555, 546, 552, - 101, - 102, + 98, + 99, true, "tables", "tables" @@ -53458,8 +53457,8 @@ 580, 573, 577, - 105, - 106, + 102, + 103, true, "text", "text" @@ -53479,8 +53478,8 @@ 608, 597, 605, - 110, - 111, + 107, + 108, true, "elements", "elements" @@ -53500,8 +53499,8 @@ 633, 613, 630, - 113, - 115, + 110, + 112, true, "petroleum systems", "petroleum systems" @@ -53521,8 +53520,8 @@ 655, 642, 652, - 117, - 118, + 114, + 115, true, "properties", "properties" @@ -53542,8 +53541,8 @@ 734, 654, 731, - 119, - 134, + 116, + 131, true, "Therefore, we ingest these reports into CCS and extract both text and tables.", "Therefore, we ingest these reports into CCS and extract both text and tables." @@ -53563,8 +53562,8 @@ 691, 681, 688, - 124, - 125, + 121, + 122, true, "reports", "reports" @@ -53584,8 +53583,8 @@ 700, 694, 697, - 126, - 127, + 123, + 124, true, "CCS", "CCS" @@ -53605,8 +53604,8 @@ 722, 715, 719, - 130, - 131, + 127, + 128, true, "text", "text" @@ -53626,8 +53625,8 @@ 733, 724, 730, - 132, - 133, + 129, + 130, true, "tables", "tables" @@ -53647,8 +53646,8 @@ 923, 732, 920, - 134, - 176, + 131, + 173, true, "Then, by generating a KG only from the text and keeping the tables as ground-truth to compare answers of the KG queries against, we obtain a well-controlled, end-to-end accuracy benchmark.", "Then, by generating a KG only from the text and keeping the tables as ground-truth to compare answers of the KG queries against, we obtain a well-controlled, end-to-end accuracy benchmark." @@ -53668,8 +53667,8 @@ 759, 754, 756, - 139, - 140, + 136, + 137, true, "KG", "KG" @@ -53689,8 +53688,8 @@ 778, 771, 775, - 143, - 144, + 140, + 141, true, "text", "text" @@ -53710,8 +53709,8 @@ 801, 792, 798, - 147, - 148, + 144, + 145, true, "tables", "tables" @@ -53731,8 +53730,8 @@ 817, 802, 814, + 146, 149, - 152, true, "ground-truth", "ground-truth" @@ -53752,8 +53751,8 @@ 811, 802, 808, - 149, - 150, + 146, + 147, true, "ground", "ground" @@ -53773,8 +53772,8 @@ 817, 809, 814, - 151, - 152, + 148, + 149, true, "truth", "truth" @@ -53794,8 +53793,8 @@ 836, 826, 833, - 154, - 155, + 151, + 152, true, "answers", "answers" @@ -53815,8 +53814,8 @@ 854, 841, 851, - 157, - 159, + 154, + 156, true, "KG queries", "KG queries" @@ -53836,8 +53835,8 @@ 891, 873, 888, + 161, 164, - 167, true, "well-controlled", "well-controlled" @@ -53857,8 +53856,8 @@ 903, 890, 900, - 168, - 173, + 165, + 170, true, "end-to-end", "end-to-end" @@ -53878,8 +53877,8 @@ 922, 897, 919, + 169, 172, - 175, true, "end accuracy benchmark", "end accuracy benchmark" @@ -53900,7 +53899,7 @@ 0, 140, 0, - 40, + 32, true, "For step (1) of the pipeline, we ingested all 1051 PDFs into CCS and visually annotated the document structure on 300 (out of 46 019) pages.", "For step (1) of the pipeline, we ingested all 1051 PDFs into CCS and visually annotated the document structure on 300 (out of 46 019) pages." @@ -54005,7 +54004,7 @@ 46, 50, 12, - 16, + 13, true, "1051", "1051" @@ -54025,8 +54024,8 @@ 55, 51, 55, - 16, - 17, + 13, + 14, true, "PDFs", "PDFs" @@ -54046,8 +54045,8 @@ 64, 61, 64, - 18, - 19, + 15, + 16, true, "CCS", "CCS" @@ -54067,8 +54066,8 @@ 110, 92, 110, - 23, - 25, + 20, + 22, true, "document structure", "document structure" @@ -54088,8 +54087,8 @@ 117, 114, 117, - 26, - 29, + 23, + 24, true, "300", "300" @@ -54109,8 +54108,8 @@ 133, 118, 133, - 29, - 38, + 24, + 30, true, "(out of 46 019)", "(out of 46 019)" @@ -54130,8 +54129,8 @@ 128, 126, 128, - 32, - 34, + 27, + 28, true, "46", "46" @@ -54151,8 +54150,8 @@ 132, 129, 132, - 34, - 37, + 28, + 29, true, "019", "019" @@ -54172,8 +54171,8 @@ 139, 134, 139, - 38, - 39, + 30, + 31, true, "pages", "pages" @@ -54193,8 +54192,8 @@ 290, 141, 290, - 40, - 73, + 32, + 63, true, "This yielded a page model which accurately converted all documents to JSON format with a 99.7% recall and 99.3% precision in the converted structure.", "This yielded a page model which accurately converted all documents to JSON format with a 99.7% recall and 99.3% precision in the converted structure." @@ -54214,8 +54213,8 @@ 166, 156, 166, - 43, - 45, + 35, + 37, true, "page model", "page model" @@ -54235,8 +54234,8 @@ 207, 198, 207, - 49, - 50, + 41, + 42, true, "documents", "documents" @@ -54256,8 +54255,8 @@ 235, 230, 235, - 55, - 60, + 47, + 51, true, "99.7%", "99.7%" @@ -54277,8 +54276,8 @@ 234, 230, 234, - 55, - 59, + 47, + 50, true, "99.7", "99.7" @@ -54298,8 +54297,8 @@ 242, 234, 242, - 59, - 61, + 50, + 52, true, "% recall", "% recall" @@ -54319,8 +54318,8 @@ 252, 247, 252, - 62, - 67, + 53, + 57, true, "99.3%", "99.3%" @@ -54340,8 +54339,8 @@ 251, 247, 251, - 62, - 66, + 53, + 56, true, "99.3", "99.3" @@ -54361,8 +54360,8 @@ 262, 251, 262, - 66, - 68, + 56, + 58, true, "% precision", "% precision" @@ -54382,8 +54381,8 @@ 289, 270, 289, - 70, - 72, + 60, + 62, true, "converted structure", "converted structure" @@ -54403,8 +54402,8 @@ 359, 291, 359, - 73, - 86, + 63, + 76, true, "These numbers are in line with those reported in our previous works.", "These numbers are in line with those reported in our previous works." @@ -54424,8 +54423,8 @@ 304, 297, 304, - 74, - 75, + 64, + 65, true, "numbers", "numbers" @@ -54445,8 +54444,8 @@ 316, 312, 316, - 77, - 78, + 67, + 68, true, "line", "line" @@ -54466,8 +54465,8 @@ 358, 344, 358, - 83, - 85, + 73, + 75, true, "previous works", "previous works" @@ -54487,8 +54486,8 @@ 361, 360, 361, - 86, - 87, + 76, + 77, true, "1", "1" @@ -54508,8 +54507,8 @@ 569, 362, 569, - 87, - 119, + 77, + 109, true, "Importantly, very accurate conversion results are key to the resulting quality, since otherwise the language annotators will process incomplete data and eventually the relevance of query results will suffer.", "Importantly, very accurate conversion results are key to the resulting quality, since otherwise the language annotators will process incomplete data and eventually the relevance of query results will suffer." @@ -54529,8 +54528,8 @@ 407, 380, 407, - 90, - 93, + 80, + 83, true, "accurate conversion results", "accurate conversion results" @@ -54550,8 +54549,8 @@ 440, 433, 440, - 98, - 99, + 88, + 89, true, "quality", "quality" @@ -54571,8 +54570,8 @@ 481, 462, 481, - 103, - 105, + 93, + 95, true, "language annotators", "language annotators" @@ -54592,8 +54591,8 @@ 510, 495, 510, - 107, - 109, + 97, + 99, true, "incomplete data", "incomplete data" @@ -54613,8 +54612,8 @@ 539, 530, 539, - 112, - 113, + 102, + 103, true, "relevance", "relevance" @@ -54634,8 +54633,8 @@ 556, 543, 556, - 114, - 116, + 104, + 106, true, "query results", "query results" @@ -55286,7 +55285,7 @@ 363, 486, 74, - 111, + 102, true, "Overall, we extracted a total of 4597 PSEs, 8811 formations, 471 geological ages, and 64 rock types (relevant to the PSEs).", "Overall, we extracted a total of 4597 PSEs, 8811 formations, 471 geological ages, and 64 rock types (relevant to the PSEs)." @@ -55328,7 +55327,7 @@ 396, 400, 81, - 85, + 82, true, "4597", "4597" @@ -55348,8 +55347,8 @@ 405, 401, 405, - 85, - 86, + 82, + 83, true, "PSEs", "PSEs" @@ -55369,8 +55368,8 @@ 411, 407, 411, - 87, - 91, + 84, + 85, true, "8811", "8811" @@ -55390,8 +55389,8 @@ 422, 412, 422, - 91, - 92, + 85, + 86, true, "formations", "formations" @@ -55411,8 +55410,8 @@ 427, 424, 427, - 93, - 96, + 87, + 88, true, "471", "471" @@ -55432,8 +55431,8 @@ 443, 428, 443, - 96, - 98, + 88, + 90, true, "geological ages", "geological ages" @@ -55453,8 +55452,8 @@ 451, 449, 451, - 100, - 102, + 92, + 93, true, "64", "64" @@ -55474,8 +55473,8 @@ 462, 452, 462, - 102, - 104, + 93, + 95, true, "rock types", "rock types" @@ -55495,8 +55494,8 @@ 485, 463, 485, - 104, - 110, + 95, + 101, true, "(relevant to the PSEs)", "(relevant to the PSEs)" @@ -55516,8 +55515,8 @@ 484, 480, 484, - 108, - 109, + 99, + 100, true, "PSEs", "PSEs" @@ -55537,8 +55536,8 @@ 630, 487, 630, - 111, - 142, + 102, + 131, true, "The full processing performed at an average rate of 130 ms per page per worker core, on a system with three worker nodes each using four cores.", "The full processing performed at an average rate of 130 ms per page per worker core, on a system with three worker nodes each using four cores." @@ -55558,8 +55557,8 @@ 506, 491, 506, - 112, - 114, + 103, + 105, true, "full processing", "full processing" @@ -55579,8 +55578,8 @@ 535, 523, 535, - 117, - 119, + 108, + 110, true, "average rate", "average rate" @@ -55600,8 +55599,8 @@ 542, 539, 542, - 120, - 123, + 111, + 112, true, "130", "130" @@ -55621,8 +55620,8 @@ 545, 543, 545, - 123, - 124, + 112, + 113, true, "ms", "ms" @@ -55642,8 +55641,8 @@ 554, 550, 554, - 125, - 126, + 114, + 115, true, "page", "page" @@ -55663,8 +55662,8 @@ 570, 559, 570, - 127, - 129, + 116, + 118, true, "worker core", "worker core" @@ -55684,8 +55683,8 @@ 583, 577, 583, - 132, - 133, + 121, + 122, true, "system", "system" @@ -55705,8 +55704,8 @@ 607, 595, 607, - 135, - 137, + 124, + 126, true, "worker nodes", "worker nodes" @@ -55726,8 +55725,8 @@ 629, 624, 629, - 140, - 141, + 129, + 130, true, "cores", "cores" @@ -55747,8 +55746,8 @@ 698, 631, 698, - 142, - 163, + 131, + 144, true, "Eventually, the KG included 679 296 edges connecting 116 662 nodes.", "Eventually, the KG included 679 296 edges connecting 116 662 nodes." @@ -55768,8 +55767,8 @@ 649, 647, 649, - 145, - 146, + 134, + 135, true, "KG", "KG" @@ -55789,8 +55788,8 @@ 662, 659, 662, - 147, - 150, + 136, + 137, true, "679", "679" @@ -55810,8 +55809,8 @@ 666, 663, 666, - 150, - 153, + 137, + 138, true, "296", "296" @@ -55831,8 +55830,8 @@ 672, 667, 672, - 153, - 154, + 138, + 139, true, "edges", "edges" @@ -55852,8 +55851,8 @@ 687, 684, 687, - 155, - 158, + 140, + 141, true, "116", "116" @@ -55873,8 +55872,8 @@ 691, 688, 691, - 158, - 161, + 141, + 142, true, "662", "662" @@ -55894,8 +55893,8 @@ 697, 692, 697, - 161, - 162, + 142, + 143, true, "nodes", "nodes" @@ -56231,7 +56230,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -56251,8 +56250,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -56272,8 +56271,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -56293,8 +56292,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -56314,8 +56313,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -56335,8 +56334,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -56356,8 +56355,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -56377,8 +56376,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -56398,8 +56397,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -56419,8 +56418,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -56440,8 +56439,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -56461,8 +56460,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -56482,8 +56481,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -56503,8 +56502,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -56524,8 +56523,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -56545,8 +56544,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -56566,8 +56565,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -56587,8 +56586,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -56608,8 +56607,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -56629,8 +56628,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -56650,8 +56649,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -56671,8 +56670,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -56692,8 +56691,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -56713,8 +56712,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -56735,7 +56734,7 @@ 0, 2, 0, - 2, + 1, true, "13", "13" @@ -56756,7 +56755,7 @@ 3, 5, 1, - 3, + 2, true, "15", "15" @@ -57596,7 +57595,7 @@ 412, 567, 87, - 130, + 129, true, "First, the top-1 numbers are in the range of 0.75-0.9, meaning that for 3 in 4 cases, the most relevant result predicted by the KG was correct (precision).", "First, the top-1 numbers are in the range of 0.75-0.9, meaning that for 3 in 4 cases, the most relevant result predicted by the KG was correct (precision)." @@ -57701,7 +57700,7 @@ 457, 465, 99, - 107, + 106, true, "0.75-0.9", "0.75-0.9" @@ -57721,8 +57720,8 @@ 485, 484, 485, + 110, 111, - 112, true, "3", "3" @@ -57742,8 +57741,8 @@ 490, 489, 490, + 112, 113, - 114, true, "4", "4" @@ -57763,8 +57762,8 @@ 496, 491, 496, + 113, 114, - 115, true, "cases", "cases" @@ -57784,8 +57783,8 @@ 522, 507, 522, - 118, - 120, + 117, + 119, true, "relevant result", "relevant result" @@ -57805,8 +57804,8 @@ 542, 540, 542, + 122, 123, - 124, true, "KG", "KG" @@ -57826,8 +57825,8 @@ 566, 555, 566, - 126, - 129, + 125, + 128, true, "(precision)", "(precision)" @@ -57847,8 +57846,8 @@ 565, 556, 565, + 126, 127, - 128, true, "precision", "precision" @@ -57868,8 +57867,8 @@ 739, 568, 737, - 130, - 171, + 129, + 169, true, "Secondly, we observe that the top-5 numbers are very high (\u2265 0.97), showing that the system was able detect and aggregate most of the PSEs and their properties (recall).", "Secondly, we observe that the top-5 numbers are very high (\u2265 0.97), showing that the system was able detect and aggregate most of the PSEs and their properties (recall)." @@ -57889,8 +57888,8 @@ 603, 598, 603, - 136, - 139, + 135, + 138, true, "top-5", "top-5" @@ -57910,8 +57909,8 @@ 603, 601, 603, - 137, - 139, + 136, + 138, true, "-5", "-5" @@ -57931,8 +57930,8 @@ 611, 604, 611, + 138, 139, - 140, true, "numbers", "numbers" @@ -57952,8 +57951,8 @@ 636, 626, 634, - 143, - 150, + 142, + 148, true, "(\u2265 0.97)", "(\u2265 0.97)" @@ -57973,8 +57972,8 @@ 635, 629, 633, - 145, - 149, + 144, + 147, true, "0.97", "0.97" @@ -57994,8 +57993,8 @@ 661, 653, 659, - 154, - 155, + 152, + 153, true, "system", "system" @@ -58015,8 +58014,8 @@ 677, 664, 675, + 154, 156, - 158, true, "able detect", "able detect" @@ -58036,8 +58035,8 @@ 708, 702, 706, - 163, - 164, + 161, + 162, true, "PSEs", "PSEs" @@ -58057,8 +58056,8 @@ 729, 717, 727, - 166, - 167, + 164, + 165, true, "properties", "properties" @@ -58078,8 +58077,8 @@ 738, 728, 736, - 167, - 170, + 165, + 168, true, "(recall)", "(recall)" @@ -58099,8 +58098,8 @@ 737, 729, 735, - 168, - 169, + 166, + 167, true, "recall", "recall" @@ -58120,8 +58119,8 @@ 834, 738, 832, - 171, - 188, + 169, + 186, true, "Thus, the recall of the language annotators in the KG creation pipeline was very satisfactory.", "Thus, the recall of the language annotators in the KG creation pipeline was very satisfactory." @@ -58141,8 +58140,8 @@ 756, 748, 754, - 174, - 175, + 172, + 173, true, "recall", "recall" @@ -58162,8 +58161,8 @@ 783, 762, 781, + 175, 177, - 179, true, "language annotators", "language annotators" @@ -58183,8 +58182,8 @@ 811, 789, 809, - 181, - 184, + 179, + 182, true, "KG creation pipeline", "KG creation pipeline" @@ -58919,7 +58918,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -58939,8 +58938,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -58960,8 +58959,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -58981,8 +58980,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -59002,8 +59001,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -59023,8 +59022,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -59044,8 +59043,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -59065,8 +59064,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -59086,8 +59085,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -59107,8 +59106,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -59128,8 +59127,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -59149,8 +59148,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -59170,8 +59169,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -59191,8 +59190,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -59212,8 +59211,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -59233,8 +59232,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -59254,8 +59253,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -59275,8 +59274,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -59296,8 +59295,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -59317,8 +59316,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -59338,8 +59337,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -59359,8 +59358,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -59380,8 +59379,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -59401,8 +59400,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -59423,7 +59422,7 @@ 0, 6, 0, - 5, + 3, true, "14of15", "14of15" @@ -60305,7 +60304,7 @@ 18, 55, 6, - 33, + 21, true, "https://orcid.org/0000-0002-8088-0823", "https://orcid.org/0000-0002-8088-0823" @@ -60326,7 +60325,7 @@ 18, 55, 6, - 33, + 21, true, "https://orcid.org/0000-0002-8088-0823", "https://orcid.org/0000-0002-8088-0823" @@ -60347,7 +60346,7 @@ 36, 45, 14, - 23, + 17, true, "0000-0002", "0000-0002" @@ -60367,8 +60366,8 @@ 55, 46, 55, - 24, - 33, + 18, + 21, true, "8088-0823", "8088-0823" @@ -60388,8 +60387,8 @@ 107, 70, 107, - 35, - 62, + 23, + 38, true, "https://orcid.org/0000-0001-7216-8505", "https://orcid.org/0000-0001-7216-8505" @@ -60409,8 +60408,8 @@ 107, 70, 107, - 35, - 62, + 23, + 38, true, "https://orcid.org/0000-0001-7216-8505", "https://orcid.org/0000-0001-7216-8505" @@ -60430,8 +60429,8 @@ 97, 88, 97, - 43, - 52, + 31, + 34, true, "0000-0001", "0000-0001" @@ -60451,8 +60450,8 @@ 107, 98, 107, - 53, - 62, + 35, + 38, true, "7216-8505", "7216-8505" @@ -60472,8 +60471,8 @@ 160, 123, 160, - 64, - 91, + 40, + 55, true, "https://orcid.org/0000-0001-5761-0422", "https://orcid.org/0000-0001-5761-0422" @@ -60493,8 +60492,8 @@ 150, 141, 150, - 72, - 81, + 48, + 51, true, "0000-0001", "0000-0001" @@ -60514,8 +60513,8 @@ 160, 151, 160, - 82, - 91, + 52, + 55, true, "5761-0422", "5761-0422" @@ -61544,7 +61543,7 @@ 3, 207, 1, - 69, + 65, true, "This memory architecture is clearly documented for Titan (http://s3.thinkaurelius.com/docs/titan/current/data-model.html) and Neo4J (http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html).", "This memory architecture is clearly documented for Titan (http://s3.thinkaurelius.com/docs/titan/current/data-model.html) and Neo4J (http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html)." @@ -61733,7 +61732,7 @@ 135, 206, 37, - 68, + 64, true, "(http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html)", "(http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html)" @@ -61754,7 +61753,7 @@ 136, 205, 38, - 67, + 63, true, "http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html", "http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html" @@ -61775,7 +61774,7 @@ 174, 178, 52, - 56, + 53, true, "2015", "2015" @@ -61795,8 +61794,8 @@ 185, 179, 181, - 57, - 59, + 54, + 55, true, "02", "02" @@ -61816,8 +61815,8 @@ 190, 185, 186, - 61, - 62, + 57, + 58, true, "4", "4" @@ -61922,7 +61921,7 @@ 0, 23, 0, - 12, + 10, true, "\u00a7\u00a7 http://graph500.org/", "\u00a7\u00a7 http://graph500.org/" @@ -62447,31 +62446,31 @@ 138, 146, 29, - 34, + 33, true, "KDD '18.", "KDD '18." ], [ "reference", - "journal", + "container-title", 10480452763767134455, "TEXT", "#/texts/169", 1.0, - 12178341415896253943, - 16661690143811416648, + 8106351470704634736, + 17995829417296331915, null, null, 138, - 141, + 145, 138, - 141, + 145, 29, - 30, + 32, true, - "KDD", - "KDD" + "KDD '18", + "KDD '18" ], [ "term", @@ -62494,27 +62493,6 @@ "KDD", "KDD" ], - [ - "expression", - "wtoken-concatenation", - 10480452763767134455, - "TEXT", - "#/texts/169", - 1.0, - 15441160910541481862, - 8386387545198933993, - null, - null, - 143, - 145, - 143, - 145, - 31, - 33, - true, - "18", - "18" - ], [ "sentence", "", @@ -62530,8 +62508,8 @@ 179, 147, 179, - 34, - 54, + 33, + 46, true, "New York, NY: ACM; 2018:774-782.", "New York, NY: ACM; 2018:774-782." @@ -62551,8 +62529,8 @@ 160, 147, 160, - 34, - 39, + 33, + 38, true, "New York, NY:", "New York, NY:" @@ -62572,8 +62550,8 @@ 155, 147, 155, - 34, - 36, + 33, + 35, true, "New York", "New York" @@ -62593,8 +62571,8 @@ 159, 157, 159, + 36, 37, - 38, true, "NY", "NY" @@ -62614,8 +62592,8 @@ 164, 161, 164, + 38, 39, - 40, true, "ACM", "ACM" @@ -62627,40 +62605,19 @@ "TEXT", "#/texts/169", 1.0, - 8104408419226439021, - 7524634383995046949, + 8751415320993915403, + 4351521141262751348, null, null, 164, - 171, - 164, - 171, - 40, - 46, - true, - "; 2018:", - "; 2018:" - ], - [ - "reference", - "title", - 10480452763767134455, - "TEXT", - "#/texts/169", - 1.0, - 8104408789160133341, - 11698475954970405279, - null, - null, - 171, 178, - 171, + 164, 178, - 46, - 53, + 39, + 45, true, - "774-782", - "774-782" + "; 2018:774-782", + "; 2018:774-782" ], [ "reference", @@ -63035,7 +62992,7 @@ 131, 163, 30, - 49, + 42, true, "Chicago, IL: IEEE; 2016:812-821.", "Chicago, IL: IEEE; 2016:812-821." @@ -63124,6 +63081,27 @@ "IEEE", "IEEE" ], + [ + "reference", + "date", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 389609625548777056, + 17963509656509068572, + null, + null, + 150, + 154, + 150, + 154, + 36, + 37, + true, + "2016", + "2016" + ], [ "reference", "citation-number", @@ -63476,7 +63454,7 @@ 141, 168, 28, - 48, + 38, true, "ArXiv.abs/1907.08400; 2019.", "ArXiv.abs/1907.08400; 2019." @@ -63497,7 +63475,7 @@ 141, 161, 28, - 42, + 35, true, "ArXiv.abs/1907.08400", "ArXiv.abs/1907.08400" @@ -63509,19 +63487,19 @@ "TEXT", "#/texts/171", 1.0, - 329104162009513145, - 17357826688115480551, + 7543597897356589805, + 187532807533800461, null, null, 141, - 146, + 151, 141, - 146, + 151, 28, - 29, + 32, true, - "ArXiv", - "ArXiv" + "ArXiv.abs/", + "ArXiv.abs/" ], [ "term", @@ -63565,6 +63543,27 @@ "abs", "abs" ], + [ + "reference", + "date", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 16381206542172555288, + 10693536807570486686, + null, + null, + 161, + 167, + 161, + 167, + 35, + 37, + true, + "; 2019", + "; 2019" + ], [ "reference", "citation-number", @@ -63602,7 +63601,7 @@ 3, 176, 2, - 41, + 38, true, "Paolo R, Marco P, Floriana B, Peter S, Costas B. Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019).", "Paolo R, Marco P, Floriana B, Peter S, Costas B. Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019)." @@ -63833,7 +63832,7 @@ 52, 174, 17, - 39, + 36, true, "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019", "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019" @@ -64000,8 +63999,8 @@ 256, 177, 256, - 41, - 58, + 38, + 54, true, "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10.", "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10." @@ -64021,8 +64020,8 @@ 245, 177, 245, - 41, - 51, + 38, + 48, true, "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi", "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi" @@ -64042,8 +64041,8 @@ 234, 177, 234, - 41, - 48, + 38, + 45, true, "Abu Dhabi International Petroleum Exhibition & Conference", "Abu Dhabi International Petroleum Exhibition & Conference" @@ -64063,8 +64062,8 @@ 221, 177, 221, - 41, - 46, + 38, + 43, true, "Abu Dhabi International Petroleum Exhibition", "Abu Dhabi International Petroleum Exhibition" @@ -64084,8 +64083,8 @@ 234, 224, 234, - 47, - 48, + 44, + 45, true, "Conference", "Conference" @@ -64105,8 +64104,8 @@ 245, 236, 245, - 49, - 51, + 46, + 48, true, "Abu Dhabi", "Abu Dhabi" @@ -64126,8 +64125,8 @@ 253, 247, 253, + 49, 52, - 55, true, "UAE, :", "UAE, :" @@ -64147,8 +64146,8 @@ 250, 247, 250, - 52, - 53, + 49, + 50, true, "UAE", "UAE" @@ -64168,8 +64167,8 @@ 255, 252, 255, - 54, - 57, + 51, + 53, true, ":10", ":10" @@ -64189,8 +64188,8 @@ 269, 257, 269, - 58, - 64, + 54, + 60, true, "https://doi.", "https://doi." @@ -64210,8 +64209,8 @@ 268, 257, 268, - 58, - 63, + 54, + 59, true, "https://doi", "https://doi" @@ -64231,8 +64230,8 @@ 268, 257, 268, - 58, - 63, + 54, + 59, true, "https://doi", "https://doi" @@ -64252,8 +64251,8 @@ 262, 257, 262, - 58, - 59, + 54, + 55, true, "https", "https" @@ -64273,8 +64272,8 @@ 268, 265, 268, - 62, - 63, + 58, + 59, true, "doi", "doi" @@ -64294,8 +64293,8 @@ 276, 270, 276, - 64, - 68, + 60, + 63, true, "org/10", "org/10" @@ -64315,8 +64314,8 @@ 273, 270, 273, - 64, - 65, + 60, + 61, true, "org", "org" @@ -64336,8 +64335,8 @@ 292, 289, 292, - 81, - 83, + 68, + 70, true, "MS.", "MS." @@ -64357,8 +64356,8 @@ 291, 289, 291, - 81, - 82, + 68, + 69, true, "MS", "MS" @@ -64400,7 +64399,7 @@ 3, 171, 2, - 37, + 34, true, "Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D. Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics; 2016.", "Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D. Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics; 2016." @@ -65014,6 +65013,48 @@ "TACL", "TACL" ], + [ + "reference", + "date", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 389609625548777056, + 1668465275038003542, + null, + null, + 87, + 91, + 87, + 91, + 20, + 21, + true, + "2016", + "2016" + ], + [ + "reference", + "title", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 9584872678510603869, + 10893893406063870923, + null, + null, + 91, + 101, + 91, + 101, + 21, + 27, + true, + ";4:357-370", + ";4:357-370" + ], [ "reference", "citation-number", @@ -65308,6 +65349,27 @@ "To appear.", "To appear." ], + [ + "reference", + "date", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 389609625548777057, + 14192492111179186414, + null, + null, + 151, + 155, + 151, + 155, + 28, + 29, + true, + "2017", + "2017" + ], [ "reference", "citation-number", @@ -65728,6 +65790,27 @@ "Am Assoc Pet Geol Bull", "Am Assoc Pet Geol Bull" ], + [ + "reference", + "date", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 329104147695665975, + 7749771140976442, + null, + null, + 163, + 168, + 163, + 168, + 38, + 40, + true, + "2005;", + "2005;" + ], [ "parenthesis", "reference", @@ -65743,8 +65826,8 @@ 173, 170, 173, - 45, - 48, + 41, + 44, true, "(9)", "(9)" @@ -65765,7 +65848,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -65785,8 +65868,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -65806,8 +65889,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -65827,8 +65910,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -65848,8 +65931,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -65869,8 +65952,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -65890,8 +65973,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -65911,8 +65994,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -65932,8 +66015,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -65953,8 +66036,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -65974,8 +66057,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -65995,8 +66078,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -66016,8 +66099,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -66037,8 +66120,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -66058,8 +66141,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -66079,8 +66162,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -66100,8 +66183,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -66121,8 +66204,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -66142,8 +66225,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -66163,8 +66246,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -66184,8 +66267,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -66205,8 +66288,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -66226,8 +66309,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -66247,8 +66330,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -66464,67 +66547,46 @@ "Phys Rev E" ], [ - "parenthesis", "reference", + "date", 11430385775112165283, "TEXT", "#/texts/178", 1.0, - 12178341415896395383, - 3095186558758793614, + 8104407400303630267, + 3516783299715161152, null, null, + 67, 74, - 77, + 67, 74, - 77, - 22, - 25, - true, - "(5)", - "(5)" - ], - [ - "sentence", - "", - 5825495964576843004, - "TEXT", - "#/texts/179", - 1.0, - 12178341415896426716, - 2496381961233018859, - null, - null, - 0, - 3, - 0, - 3, - 0, - 3, + 15, + 18, true, - "10.", - "10." + "2005;71", + "2005;71" ], [ - "expression", - "wtoken-concatenation", - 5825495964576843004, + "parenthesis", + "reference", + 11430385775112165283, "TEXT", - "#/texts/179", + "#/texts/178", 1.0, - 15441160910541481982, - 2952327273286615865, + 12178341415896395383, + 3095186558758793614, null, null, - 0, - 2, - 0, - 2, - 0, - 2, + 74, + 77, + 74, + 77, + 18, + 21, true, - "10", - "10" + "(5)", + "(5)" ], [ "reference", @@ -66542,7 +66604,7 @@ 0, 2, 0, - 2, + 1, true, "10", "10" @@ -66562,8 +66624,8 @@ 38, 4, 38, - 3, - 10, + 2, + 9, true, "Estrada Ernesto, Higham Desmond J.", "Estrada Ernesto, Higham Desmond J." @@ -66583,8 +66645,8 @@ 19, 4, 19, - 3, - 5, + 2, + 4, true, "Estrada Ernesto", "Estrada Ernesto" @@ -66604,8 +66666,8 @@ 19, 4, 19, - 3, - 5, + 2, + 4, true, "Estrada Ernesto", "Estrada Ernesto" @@ -66625,8 +66687,8 @@ 38, 21, 38, - 6, - 10, + 5, + 9, true, "Higham Desmond J.", "Higham Desmond J." @@ -66646,8 +66708,8 @@ 37, 21, 37, - 6, - 9, + 5, + 8, true, "Higham Desmond J", "Higham Desmond J" @@ -66667,12 +66729,33 @@ 45, 39, 45, - 10, - 16, + 9, + 12, true, "(2010)", "(2010)" ], + [ + "reference", + "date", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 389609625548777062, + 8937154938925173833, + null, + null, + 40, + 44, + 40, + 44, + 10, + 11, + true, + "2010", + "2010" + ], [ "reference", "journal", @@ -66688,8 +66771,8 @@ 112, 47, 112, - 17, - 26, + 13, + 22, true, "Network Properties Revealed through Matrix Functions. SIAM Review", "Network Properties Revealed through Matrix Functions. SIAM Review" @@ -66709,8 +66792,8 @@ 100, 47, 100, - 17, - 24, + 13, + 20, true, "Network Properties Revealed through Matrix Functions.", "Network Properties Revealed through Matrix Functions." @@ -66730,8 +66813,8 @@ 74, 47, 74, - 17, - 20, + 13, + 16, true, "Network Properties Revealed", "Network Properties Revealed" @@ -66751,8 +66834,8 @@ 99, 83, 99, - 21, - 23, + 17, + 19, true, "Matrix Functions", "Matrix Functions" @@ -66772,8 +66855,8 @@ 131, 101, 131, - 24, - 42, + 20, + 33, true, "SIAM Review, 52, (4), 696-714.", "SIAM Review, 52, (4), 696-714." @@ -66793,33 +66876,12 @@ 112, 101, 112, - 24, - 26, + 20, + 22, true, "SIAM Review", "SIAM Review" ], - [ - "expression", - "wtoken-concatenation", - 5825495964576843004, - "TEXT", - "#/texts/179", - 1.0, - 15441160910541486331, - 2952320863259255438, - null, - null, - 114, - 116, - 114, - 116, - 27, - 29, - true, - "52", - "52" - ], [ "parenthesis", "reference", @@ -66835,8 +66897,8 @@ 121, 118, 121, - 30, - 33, + 25, + 28, true, "(4)", "(4)" @@ -66856,8 +66918,8 @@ 130, 123, 130, - 34, - 41, + 29, + 32, true, "696-714", "696-714" @@ -66869,19 +66931,19 @@ "TEXT", "#/texts/179", 1.0, - 5265581660556298059, - 103427027730748467, + 16159594323378820687, + 15692242274322104012, null, null, 132, - 152, + 167, 132, - 152, - 42, - 54, + 167, + 33, + 48, true, - "http://dx.doi.org/10", - "http://dx.doi.org/10" + "http://dx.doi.org/10.1137/090761070", + "http://dx.doi.org/10.1137/090761070" ], [ "term", @@ -66898,8 +66960,8 @@ 136, 132, 136, - 42, - 43, + 33, + 34, true, "http", "http" @@ -66919,8 +66981,8 @@ 141, 139, 141, - 46, - 47, + 37, + 38, true, "dx", "dx" @@ -66940,53 +67002,53 @@ 149, 146, 149, - 50, - 51, + 41, + 42, true, "org", "org" ], [ - "sentence", - "", + "reference", + "citation-number", 5698421097735371040, "TEXT", "#/texts/180", 1.0, - 12178341415896426655, - 7596226664406524957, + 15441160910541481983, + 11293846485728944316, null, null, 0, - 3, + 2, 0, - 3, + 2, 0, - 3, + 1, true, - "11.", - "11." + "11", + "11" ], [ - "expression", - "wtoken-concatenation", + "reference", + "author", 5698421097735371040, "TEXT", "#/texts/180", 1.0, - 15441160910541481983, - 11293846485728944316, + 12825927039497398082, + 7276111248299729235, null, null, - 0, - 2, - 0, - 2, - 0, + 4, + 39, + 4, + 39, 2, + 7, true, - "11", - "11" + "Labs Redis. Benchmarking RedisGraph", + "Labs Redis. Benchmarking RedisGraph" ], [ "sentence", @@ -67003,8 +67065,8 @@ 15, 4, 15, - 3, - 6, + 2, + 5, true, "Labs Redis.", "Labs Redis." @@ -67024,8 +67086,8 @@ 14, 4, 14, - 3, - 5, + 2, + 4, true, "Labs Redis", "Labs Redis" @@ -67045,8 +67107,8 @@ 44, 16, 44, - 6, - 12, + 5, + 11, true, "Benchmarking RedisGraph 1.0.", "Benchmarking RedisGraph 1.0." @@ -67066,8 +67128,8 @@ 39, 16, 39, - 6, - 8, + 5, + 7, true, "Benchmarking RedisGraph", "Benchmarking RedisGraph" @@ -67087,15 +67149,36 @@ 43, 40, 43, - 8, - 11, + 7, + 10, true, "1.0", "1.0" ], [ "reference", - "citation-number", + "date", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 17767354399704235161, + 12147516458969154680, + null, + null, + 40, + 41, + 40, + 41, + 7, + 8, + true, + "1", + "1" + ], + [ + "reference", + "title", 5698421097735371040, "TEXT", "#/texts/180", @@ -67108,36 +67191,36 @@ 43, 42, 43, + 9, 10, - 11, true, "0", "0" ], [ - "sentence", - "", - 5870535063942256428, + "reference", + "date", + 5698421097735371040, "TEXT", - "#/texts/181", + "#/texts/180", 1.0, - 12178341415896426590, - 4180477249261114913, + 389609625548777055, + 1517668227262464254, null, null, - 0, - 3, - 0, - 3, - 0, - 3, + 45, + 49, + 45, + 49, + 11, + 12, true, - "12.", - "12." + "2019", + "2019" ], [ - "expression", - "wtoken-concatenation", + "reference", + "citation-number", 5870535063942256428, "TEXT", "#/texts/181", @@ -67151,7 +67234,7 @@ 0, 2, 0, - 2, + 1, true, "12", "12" @@ -67171,8 +67254,8 @@ 15, 4, 15, - 3, - 5, + 2, + 4, true, "TigerGraph.", "TigerGraph." @@ -67192,8 +67275,8 @@ 15, 4, 15, - 3, - 5, + 2, + 4, true, "TigerGraph.", "TigerGraph." @@ -67213,8 +67296,8 @@ 14, 4, 14, + 2, 3, - 4, true, "TigerGraph", "TigerGraph" @@ -67234,8 +67317,8 @@ 46, 16, 46, - 5, - 12, + 4, + 11, true, "Real-Time Deep Link Analytics.", "Real-Time Deep Link Analytics." @@ -67255,8 +67338,8 @@ 45, 16, 45, - 5, - 11, + 4, + 10, true, "Real-Time Deep Link Analytics", "Real-Time Deep Link Analytics" @@ -67276,8 +67359,8 @@ 25, 16, 25, - 5, - 8, + 4, + 7, true, "Real-Time", "Real-Time" @@ -67297,53 +67380,32 @@ 45, 21, 45, - 7, - 11, + 6, + 10, true, "Time Deep Link Analytics", "Time Deep Link Analytics" ], [ - "sentence", - "", - 18196767266655606709, - "TEXT", - "#/texts/182", - 1.0, - 12178341415896424072, - 14083466083102208723, - null, - null, - 0, - 3, - 0, - 3, - 0, - 3, - true, - "13.", - "13." - ], - [ - "expression", - "wtoken-concatenation", - 18196767266655606709, + "reference", + "date", + 5870535063942256428, "TEXT", - "#/texts/182", + "#/texts/181", 1.0, - 15441160910541481977, - 12462842527617278799, + 389609625548777054, + 3194806985827377522, null, null, - 0, - 2, - 0, - 2, - 0, - 2, + 47, + 51, + 47, + 51, + 11, + 12, true, - "13", - "13" + "2018", + "2018" ], [ "reference", @@ -67361,7 +67423,7 @@ 0, 2, 0, - 2, + 1, true, "13", "13" @@ -67381,8 +67443,8 @@ 73, 4, 73, - 3, - 18, + 2, + 17, true, "Jeremy K, John G. Graph Algorithms in the Language of Linear Algebra.", "Jeremy K, John G. Graph Algorithms in the Language of Linear Algebra." @@ -67402,8 +67464,8 @@ 12, 4, 12, - 3, - 5, + 2, + 4, true, "Jeremy K", "Jeremy K" @@ -67423,8 +67485,8 @@ 12, 4, 12, - 3, - 5, + 2, + 4, true, "Jeremy K", "Jeremy K" @@ -67444,8 +67506,8 @@ 27, 14, 27, - 6, - 10, + 5, + 9, true, "John G Graph", "John G. Graph" @@ -67465,8 +67527,8 @@ 21, 14, 21, - 6, - 9, + 5, + 8, true, "John G.", "John G." @@ -67486,8 +67548,8 @@ 72, 22, 72, - 9, - 17, + 8, + 16, true, "Graph Algorithms in the Language of Linear Algebra", "Graph Algorithms in the Language of Linear Algebra" @@ -67507,8 +67569,8 @@ 38, 22, 38, - 9, - 11, + 8, + 10, true, "Graph Algorithms", "Graph Algorithms" @@ -67528,8 +67590,8 @@ 54, 46, 54, + 12, 13, - 14, true, "Language", "Language" @@ -67549,8 +67611,8 @@ 72, 58, 72, - 15, - 17, + 14, + 16, true, "Linear Algebra", "Linear Algebra" @@ -67570,8 +67632,8 @@ 145, 74, 145, - 18, - 34, + 17, + 30, true, "Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011.", "Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011." @@ -67591,8 +67653,8 @@ 86, 74, 86, + 17, 18, - 19, true, "Philadelphia", "Philadelphia" @@ -67612,8 +67674,8 @@ 86, 74, 86, + 17, 18, - 19, true, "Philadelphia", "Philadelphia" @@ -67633,8 +67695,8 @@ 90, 88, 90, + 19, 20, - 21, true, "PA", "PA" @@ -67654,8 +67716,8 @@ 99, 92, 99, + 21, 22, - 23, true, "Society", "Society" @@ -67675,8 +67737,8 @@ 138, 104, 138, - 24, - 28, + 23, + 27, true, "Industrial and Applied Mathematics", "Industrial and Applied Mathematics" @@ -67696,8 +67758,8 @@ 114, 104, 114, + 23, 24, - 25, true, "Industrial", "Industrial" @@ -67717,8 +67779,8 @@ 138, 119, 138, - 26, - 28, + 25, + 27, true, "Applied Mathematics", "Applied Mathematics" @@ -67738,54 +67800,12 @@ 144, 138, 144, - 28, - 33, + 27, + 29, true, "; 2011", "; 2011" ], - [ - "sentence", - "", - 3623403683642367845, - "TEXT", - "#/texts/183", - 1.0, - 12178341415896424137, - 2021336641528383539, - null, - null, - 0, - 3, - 0, - 3, - 0, - 3, - true, - "14.", - "14." - ], - [ - "expression", - "wtoken-concatenation", - 3623403683642367845, - "TEXT", - "#/texts/183", - 1.0, - 15441160910541481978, - 9067685736347109846, - null, - null, - 0, - 2, - 0, - 2, - 0, - 2, - true, - "14", - "14" - ], [ "reference", "citation-number", @@ -67802,7 +67822,7 @@ 0, 2, 0, - 2, + 1, true, "14", "14" @@ -67822,8 +67842,8 @@ 106, 4, 104, - 3, - 29, + 2, + 25, true, "Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning (2015).", "Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning (2015)." @@ -67843,8 +67863,8 @@ 17, 4, 17, - 3, - 5, + 2, + 4, true, "Kepner Jeremy", "Kepner Jeremy" @@ -67864,8 +67884,8 @@ 17, 4, 17, - 3, - 5, + 2, + 4, true, "Kepner Jeremy", "Kepner Jeremy" @@ -67885,8 +67905,8 @@ 30, 19, 30, - 6, - 8, + 5, + 7, true, "Bader David", "Bader David" @@ -67906,8 +67926,8 @@ 30, 19, 30, - 6, - 8, + 5, + 7, true, "Bader David", "Bader David" @@ -67927,8 +67947,8 @@ 47, 32, 45, - 9, - 13, + 8, + 12, true, "Bulu\u00e7 Ayd \u0131 n", "Bulu\u00e7 Ayd \u0131 n" @@ -67948,8 +67968,8 @@ 42, 32, 41, - 9, - 11, + 8, + 10, true, "Bulu\u00e7 Ayd", "Bulu\u00e7 Ayd" @@ -67969,8 +67989,8 @@ 61, 47, 59, - 14, - 16, + 13, + 15, true, "Gilbert John", "Gilbert John" @@ -67990,8 +68010,8 @@ 61, 47, 59, - 14, - 16, + 13, + 15, true, "Gilbert John", "Gilbert John" @@ -68011,8 +68031,8 @@ 78, 61, 76, - 17, - 19, + 16, + 18, true, "Mattson Timothy", "Mattson Timothy" @@ -68032,8 +68052,8 @@ 78, 61, 76, - 17, - 19, + 16, + 18, true, "Mattson Timothy", "Mattson Timothy" @@ -68053,8 +68073,8 @@ 98, 78, 96, - 20, - 22, + 19, + 21, true, "Meyerhenke Henning", "Meyerhenke Henning" @@ -68074,8 +68094,8 @@ 98, 78, 96, - 20, - 22, + 19, + 21, true, "Meyerhenke Henning", "Meyerhenke Henning" @@ -68095,30 +68115,51 @@ 105, 97, 103, - 22, - 28, + 21, + 24, true, "(2015)", "(2015)" ], [ - "sentence", - "", + "reference", + "date", 3623403683642367845, "TEXT", "#/texts/183", 1.0, - 17293964586930460261, - 12804061004186124881, + 389609625548777059, + 3330964369910711146, null, null, - 107, - 163, - 105, - 161, - 29, - 41, - true, + 100, + 104, + 98, + 102, + 22, + 23, + true, + "2015", + "2015" + ], + [ + "sentence", + "", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 17293964586930460261, + 12804061004186124881, + null, + null, + 107, + 163, + 105, + 161, + 25, + 37, + true, "Graphs, Matrices, and the GraphBLAS: Seven Good Reasons.", "Graphs, Matrices, and the GraphBLAS: Seven Good Reasons." ], @@ -68137,8 +68178,8 @@ 143, 105, 141, - 29, - 37, + 25, + 33, true, "Graphs, Matrices, and the GraphBLAS:", "Graphs, Matrices, and the GraphBLAS:" @@ -68158,8 +68199,8 @@ 113, 105, 111, - 29, - 30, + 25, + 26, true, "Graphs", "Graphs" @@ -68179,8 +68220,8 @@ 123, 113, 121, - 31, - 32, + 27, + 28, true, "Matrices", "Matrices" @@ -68200,8 +68241,8 @@ 142, 131, 140, - 35, - 36, + 31, + 32, true, "GraphBLAS", "GraphBLAS" @@ -68221,8 +68262,8 @@ 162, 142, 160, - 37, - 40, + 33, + 36, true, "Seven Good Reasons", "Seven Good Reasons" @@ -68242,8 +68283,8 @@ 205, 162, 203, - 41, - 58, + 37, + 47, true, "Procedia Computer Science, 51, 2453-2462.", "Procedia Computer Science, 51, 2453-2462." @@ -68263,33 +68304,12 @@ 189, 162, 187, - 41, - 44, + 37, + 40, true, "Procedia Computer Science", "Procedia Computer Science" ], - [ - "expression", - "wtoken-concatenation", - 3623403683642367845, - "TEXT", - "#/texts/183", - 1.0, - 15441160910541486330, - 9067694506000682765, - null, - null, - 191, - 193, - 189, - 191, - 45, - 47, - true, - "51", - "51" - ], [ "expression", "wtoken-concatenation", @@ -68305,8 +68325,8 @@ 204, 193, 202, - 48, - 57, + 43, + 46, true, "2453-2462", "2453-2462" @@ -68318,19 +68338,19 @@ "TEXT", "#/texts/183", 1.0, - 5265581660556298059, - 13062621179079245410, + 16959048237954323084, + 10596594611762835857, null, null, 206, - 226, + 239, 204, - 224, - 58, - 70, + 237, + 47, + 64, true, - "http://dx.doi.org/10", - "http://dx.doi.org/10" + "http://dx.doi.org/10.1016/j.procs", + "http://dx.doi.org/10.1016/j.procs" ], [ "term", @@ -68347,8 +68367,8 @@ 210, 204, 208, - 58, - 59, + 47, + 48, true, "http", "http" @@ -68368,8 +68388,8 @@ 215, 211, 213, - 62, - 63, + 51, + 52, true, "dx", "dx" @@ -68389,8 +68409,8 @@ 223, 218, 221, - 66, - 67, + 55, + 56, true, "org", "org" @@ -68410,8 +68430,8 @@ 233, 229, 231, - 75, - 77, + 60, + 62, true, "/j", "/j" @@ -68431,53 +68451,32 @@ 239, 232, 237, - 78, - 79, + 63, + 64, true, "procs", "procs" ], [ - "sentence", - "", - 13936866850854297069, - "TEXT", - "#/texts/184", - 1.0, - 12178341415896420618, - 3824456860028023899, - null, - null, - 0, - 3, - 0, - 3, - 0, - 3, - true, - "15.", - "15." - ], - [ - "expression", - "wtoken-concatenation", - 13936866850854297069, + "reference", + "date", + 3623403683642367845, "TEXT", - "#/texts/184", + "#/texts/183", 1.0, - 15441160910541481979, - 10213682970367471311, + 389609625548777059, + 3330964369910703397, null, null, - 0, - 2, - 0, - 2, - 0, - 2, + 240, + 244, + 238, + 242, + 65, + 66, true, - "15", - "15" + "2015", + "2015" ], [ "reference", @@ -68495,7 +68494,7 @@ 0, 2, 0, - 2, + 1, true, "15", "15" @@ -68515,8 +68514,8 @@ 94, 4, 94, - 3, - 21, + 2, + 20, true, "Aydin B, Gilbert John R. The combinatorial BLAS: design, implementation, and applications.", "Aydin B, Gilbert John R. The combinatorial BLAS: design, implementation, and applications." @@ -68536,8 +68535,8 @@ 11, 4, 11, - 3, - 5, + 2, + 4, true, "Aydin B", "Aydin B" @@ -68557,8 +68556,8 @@ 11, 4, 11, - 3, - 5, + 2, + 4, true, "Aydin B", "Aydin B" @@ -68578,8 +68577,8 @@ 32, 13, 32, - 6, - 11, + 5, + 10, true, "Gilbert John R The", "Gilbert John R. The" @@ -68599,8 +68598,8 @@ 28, 13, 28, - 6, - 10, + 5, + 9, true, "Gilbert John R.", "Gilbert John R." @@ -68620,8 +68619,8 @@ 93, 29, 93, - 10, - 20, + 9, + 19, true, "The combinatorial BLAS: design, implementation, and applications", "The combinatorial BLAS: design, implementation, and applications" @@ -68641,8 +68640,8 @@ 51, 33, 51, - 11, - 13, + 10, + 12, true, "combinatorial BLAS", "combinatorial BLAS" @@ -68662,8 +68661,8 @@ 59, 53, 59, + 13, 14, - 15, true, "design", "design" @@ -68683,8 +68682,8 @@ 75, 61, 75, + 15, 16, - 17, true, "implementation", "implementation" @@ -68704,8 +68703,8 @@ 93, 81, 93, + 18, 19, - 20, true, "applications", "applications" @@ -68725,8 +68724,8 @@ 126, 95, 126, - 21, - 28, + 20, + 27, true, "Int J High Perform Comput Appl.", "Int J High Perform Comput Appl." @@ -68746,8 +68745,8 @@ 125, 95, 125, - 21, - 27, + 20, + 26, true, "Int J High Perform Comput Appl", "Int J High Perform Comput Appl" @@ -68767,8 +68766,8 @@ 125, 95, 125, - 21, - 27, + 20, + 26, true, "Int J High Perform Comput Appl", "Int J High Perform Comput Appl" @@ -68788,8 +68787,8 @@ 147, 127, 147, - 28, - 47, + 27, + 38, true, "2011;25 (4):496-509.", "2011;25 (4):496-509." @@ -68809,12 +68808,33 @@ 134, 127, 134, - 28, - 35, + 27, + 30, true, "2011;25", "2011;25" ], + [ + "reference", + "date", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 329104147695662665, + 13454856964816440075, + null, + null, + 127, + 132, + 127, + 132, + 27, + 29, + true, + "2011;", + "2011;" + ], [ "parenthesis", "reference", @@ -68830,36 +68850,15 @@ 138, 135, 138, - 35, - 38, + 30, + 33, true, "(4)", "(4)" ], [ - "sentence", - "", - 8497015665124263236, - "TEXT", - "#/texts/185", - 1.0, - 12178341415896420683, - 15900700274059095170, - null, - null, - 0, - 3, - 0, - 3, - 0, - 3, - true, - "16.", - "16." - ], - [ - "expression", - "wtoken-concatenation", + "reference", + "citation-number", 8497015665124263236, "TEXT", "#/texts/185", @@ -68873,7 +68872,7 @@ 0, 2, 0, - 2, + 1, true, "16", "16" @@ -68893,8 +68892,8 @@ 87, 4, 87, - 3, - 22, + 2, + 21, true, "Jeremy K, Peter A, Bader David A, et al. Mathematical foundations of the GraphBLAS.", "Jeremy K, Peter A, Bader David A, et al. Mathematical foundations of the GraphBLAS." @@ -68914,8 +68913,8 @@ 12, 4, 12, - 3, - 5, + 2, + 4, true, "Jeremy K", "Jeremy K" @@ -68935,8 +68934,8 @@ 12, 4, 12, - 3, - 5, + 2, + 4, true, "Jeremy K", "Jeremy K" @@ -68956,8 +68955,8 @@ 21, 14, 21, - 6, - 8, + 5, + 7, true, "Peter A", "Peter A" @@ -68977,8 +68976,8 @@ 21, 14, 21, - 6, - 8, + 5, + 7, true, "Peter A", "Peter A" @@ -68998,8 +68997,8 @@ 36, 23, 36, - 9, - 12, + 8, + 11, true, "Bader David A", "Bader David A" @@ -69019,8 +69018,8 @@ 36, 23, 36, - 9, - 12, + 8, + 11, true, "Bader David A", "Bader David A" @@ -69040,8 +69039,8 @@ 44, 38, 44, - 13, - 16, + 12, + 15, true, "et al", "et al." @@ -69061,8 +69060,8 @@ 86, 45, 86, - 16, - 21, + 15, + 20, true, "Mathematical foundations of the GraphBLAS", "Mathematical foundations of the GraphBLAS" @@ -69082,8 +69081,8 @@ 69, 45, 69, - 16, - 18, + 15, + 17, true, "Mathematical foundations", "Mathematical foundations" @@ -69103,53 +69102,53 @@ 86, 77, 86, + 19, 20, - 21, true, "GraphBLAS", "GraphBLAS" ], [ - "sentence", - "", + "reference", + "container-title", 8497015665124263236, "TEXT", "#/texts/185", 1.0, - 16880245774154997143, - 4827142095236375911, + 10709633855219206820, + 961925091352749103, null, null, 88, - 103, + 102, 88, - 103, - 22, - 29, + 102, + 21, + 24, true, - "2016 IEEE HPEC.", - "2016 IEEE HPEC." + "2016 IEEE HPEC", + "2016 IEEE HPEC" ], [ - "expression", - "wtoken-concatenation", + "sentence", + "", 8497015665124263236, "TEXT", "#/texts/185", 1.0, - 389609625548777056, - 8567475520614412130, + 515474695412696961, + 6296343322569991622, null, null, - 88, - 92, - 88, - 92, + 93, + 103, + 93, + 103, 22, - 26, + 25, true, - "2016", - "2016" + "IEEE HPEC.", + "IEEE HPEC." ], [ "term", @@ -69166,95 +69165,32 @@ 102, 93, 102, - 26, - 28, + 22, + 24, true, "IEEE HPEC", "IEEE HPEC" ], [ - "sentence", - "", - 8497015665124263236, - "TEXT", - "#/texts/185", - 1.0, - 12668400427997832797, - 10477465110317917500, - null, - null, - 104, - 114, - 104, - 114, - 29, - 38, - true, - "2016; 1-9.", - "2016; 1-9." - ], - [ - "expression", - "wtoken-concatenation", + "reference", + "date", 8497015665124263236, "TEXT", "#/texts/185", 1.0, - 389609625548777056, - 8567475520614425446, + 6573474049096193902, + 2260581871937703980, null, null, 104, - 108, + 113, 104, - 108, - 29, - 33, - true, - "2016", - "2016" - ], - [ - "sentence", - "", - 15947529491299956047, - "TEXT", - "#/texts/186", - 1.0, - 12178341415896424331, - 1785950286755592566, - null, - null, - 0, - 3, - 0, - 3, - 0, - 3, - true, - "17.", - "17." - ], - [ - "expression", - "wtoken-concatenation", - 15947529491299956047, - "TEXT", - "#/texts/186", - 1.0, - 15441160910541481861, - 5749903657566610070, - null, - null, - 0, - 2, - 0, - 2, - 0, - 2, + 113, + 25, + 30, true, - "17", - "17" + "2016; 1-9", + "2016; 1-9" ], [ "reference", @@ -69272,7 +69208,7 @@ 0, 2, 0, - 2, + 1, true, "17", "17" @@ -69292,8 +69228,8 @@ 46, 4, 46, - 3, - 16, + 2, + 15, true, "Ariful A, Mathias J, Aydin B, Ng Esmond G.", "Ariful A, Mathias J, Aydin B, Ng Esmond G." @@ -69313,8 +69249,8 @@ 12, 4, 12, - 3, - 5, + 2, + 4, true, "Ariful A", "Ariful A" @@ -69334,8 +69270,8 @@ 12, 4, 12, - 3, - 5, + 2, + 4, true, "Ariful A", "Ariful A" @@ -69355,8 +69291,8 @@ 23, 14, 23, - 6, - 8, + 5, + 7, true, "Mathias J", "Mathias J" @@ -69376,8 +69312,8 @@ 23, 14, 23, - 6, - 8, + 5, + 7, true, "Mathias J", "Mathias J" @@ -69397,8 +69333,8 @@ 32, 25, 32, - 9, - 11, + 8, + 10, true, "Aydin B", "Aydin B" @@ -69418,8 +69354,8 @@ 32, 25, 32, - 9, - 11, + 8, + 10, true, "Aydin B", "Aydin B" @@ -69439,8 +69375,8 @@ 46, 34, 46, - 12, - 16, + 11, + 15, true, "Ng Esmond G.", "Ng Esmond G." @@ -69460,8 +69396,8 @@ 45, 34, 45, - 12, - 15, + 11, + 14, true, "Ng Esmond G", "Ng Esmond G" @@ -69481,8 +69417,8 @@ 105, 47, 105, - 16, - 27, + 15, + 26, true, "The reverse Cuthill-McKee algorithm in distributed-memory.", "The reverse Cuthill-McKee algorithm in distributed-memory." @@ -69502,8 +69438,8 @@ 104, 47, 104, - 16, - 26, + 15, + 25, true, "The reverse Cuthill-McKee algorithm in distributed-memory", "The reverse Cuthill-McKee algorithm in distributed-memory" @@ -69523,8 +69459,8 @@ 66, 51, 66, - 17, - 19, + 16, + 18, true, "reverse Cuthill", "reverse Cuthill" @@ -69544,8 +69480,8 @@ 72, 59, 72, - 18, - 21, + 17, + 20, true, "Cuthill-McKee", "Cuthill-McKee" @@ -69565,8 +69501,8 @@ 69, 59, 69, - 18, - 20, + 17, + 19, true, "Cuthill-Mc", "Cuthill-Mc" @@ -69586,8 +69522,8 @@ 82, 67, 82, - 20, - 22, + 19, + 21, true, "McKee algorithm", "McKee algorithm" @@ -69607,8 +69543,8 @@ 104, 86, 104, - 23, - 26, + 22, + 25, true, "distributed-memory", "distributed-memory" @@ -69628,53 +69564,53 @@ 104, 98, 104, + 24, 25, - 26, true, "memory", "memory" ], [ - "sentence", - "", + "reference", + "container-title", 15947529491299956047, "TEXT", "#/texts/186", 1.0, - 3082191344763127479, - 11864574566029857426, + 10701056912570859123, + 6872071652706022831, null, null, 106, - 184, + 175, 106, - 184, - 27, - 42, + 175, + 26, + 34, true, - "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS).", - "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS)." + "2017 IEEE International Parallel and Distributed Processing Symposium", + "2017 IEEE International Parallel and Distributed Processing Symposium" ], [ - "expression", - "wtoken-concatenation", + "sentence", + "", 15947529491299956047, "TEXT", "#/texts/186", 1.0, - 389609625548777057, - 8314107736373646335, + 10555308991053583656, + 8625840606506711403, null, null, - 106, - 110, - 106, - 110, + 111, + 184, + 111, + 184, 27, - 31, + 38, true, - "2017", - "2017" + "IEEE International Parallel and Distributed Processing Symposium (IPDPS).", + "IEEE International Parallel and Distributed Processing Symposium (IPDPS)." ], [ "term", @@ -69691,8 +69627,8 @@ 175, 111, 175, - 31, - 38, + 27, + 34, true, "IEEE International Parallel and Distributed Processing Symposium", "IEEE International Parallel and Distributed Processing Symposium" @@ -69712,8 +69648,8 @@ 138, 111, 138, - 31, - 34, + 27, + 30, true, "IEEE International Parallel", "IEEE International Parallel" @@ -69733,8 +69669,8 @@ 175, 143, 175, - 35, - 38, + 31, + 34, true, "Distributed Processing Symposium", "Distributed Processing Symposium" @@ -69754,8 +69690,8 @@ 183, 176, 183, - 38, - 41, + 34, + 37, true, "(IPDPS)", "(IPDPS)" @@ -69775,116 +69711,53 @@ 182, 177, 182, - 39, - 40, - true, - "IPDPS", - "IPDPS" - ], - [ - "term", - "single-term", - 15947529491299956047, - "TEXT", - "#/texts/186", - 1.0, - 329104161866629985, - 4498077561104002021, - null, - null, - 177, - 182, - 177, - 182, - 39, - 40, + 35, + 36, true, "IPDPS", "IPDPS" ], - [ - "sentence", - "", - 15947529491299956047, - "TEXT", - "#/texts/186", - 1.0, - 15668671505312224859, - 7267236904131898531, - null, - null, - 185, - 197, - 185, - 197, - 42, - 53, - true, - "2017: 22-31.", - "2017: 22-31." - ], - [ - "expression", - "wtoken-concatenation", - 15947529491299956047, - "TEXT", - "#/texts/186", - 1.0, - 389609625548777057, - 8314107736373645299, - null, - null, - 185, - 189, - 185, - 189, - 42, - 46, - true, - "2017", - "2017" - ], - [ - "sentence", - "", - 14843401725435831033, + [ + "term", + "single-term", + 15947529491299956047, "TEXT", - "#/texts/187", + "#/texts/186", 1.0, - 12178341415896424394, - 9464187724344101613, + 329104161866629985, + 4498077561104002021, null, null, - 0, - 3, - 0, - 3, - 0, - 3, + 177, + 182, + 177, + 182, + 35, + 36, true, - "18.", - "18." + "IPDPS", + "IPDPS" ], [ - "expression", - "wtoken-concatenation", - 14843401725435831033, + "reference", + "title", + 15947529491299956047, "TEXT", - "#/texts/187", + "#/texts/186", 1.0, - 15441160910541481862, - 17618650105274567067, + 7366731910384143591, + 4074534479596534226, null, null, - 0, - 2, - 0, - 2, - 0, - 2, + 185, + 196, + 185, + 196, + 38, + 43, true, - "18", - "18" + "2017: 22-31", + "2017: 22-31" ], [ "reference", @@ -69902,7 +69775,7 @@ 0, 2, 0, - 2, + 1, true, "18", "18" @@ -69922,8 +69795,8 @@ 87, 4, 87, - 3, - 20, + 2, + 19, true, "Rukhsana S, Anila U, Chughtai IR. Review of storage techniques for sparse matrices.", "Rukhsana S, Anila U, Chughtai IR. Review of storage techniques for sparse matrices." @@ -69943,8 +69816,8 @@ 14, 4, 14, - 3, - 5, + 2, + 4, true, "Rukhsana S", "Rukhsana S" @@ -69964,8 +69837,8 @@ 14, 4, 14, - 3, - 5, + 2, + 4, true, "Rukhsana S", "Rukhsana S" @@ -69985,8 +69858,8 @@ 23, 16, 23, - 6, - 8, + 5, + 7, true, "Anila U", "Anila U" @@ -70006,8 +69879,8 @@ 23, 16, 23, - 6, - 8, + 5, + 7, true, "Anila U", "Anila U" @@ -70027,8 +69900,8 @@ 37, 25, 37, - 9, - 12, + 8, + 11, true, "Chughtai IR.", "Chughtai IR." @@ -70048,8 +69921,8 @@ 36, 25, 36, - 9, - 11, + 8, + 10, true, "Chughtai IR", "Chughtai IR" @@ -70069,8 +69942,8 @@ 44, 35, 44, - 10, - 13, + 9, + 12, true, "R Review", "R. Review" @@ -70090,8 +69963,8 @@ 86, 38, 86, - 12, - 19, + 11, + 18, true, "Review of storage techniques for sparse matrices", "Review of storage techniques for sparse matrices" @@ -70111,8 +69984,8 @@ 66, 48, 66, - 14, - 16, + 13, + 15, true, "storage techniques", "storage techniques" @@ -70132,57 +70005,57 @@ 86, 71, 86, - 17, - 19, + 16, + 18, true, "sparse matrices", "sparse matrices" ], [ - "sentence", - "", + "reference", + "date", 14843401725435831033, "TEXT", "#/texts/187", 1.0, - 4338648312784447485, - 11592735192995388998, + 389609625548757410, + 18165604049296771030, null, null, 88, - 132, + 92, 88, - 132, + 92, + 19, 20, - 29, true, - "2005 Pakistan Section Multitopic Conference.", - "2005 Pakistan Section Multitopic Conference." + "2005", + "2005" ], [ - "expression", - "wtoken-concatenation", + "sentence", + "", 14843401725435831033, "TEXT", "#/texts/187", 1.0, - 389609625548757410, - 18165604049296771030, + 14938776978172003836, + 10713320247466750625, null, null, - 88, - 92, - 88, - 92, + 93, + 132, + 93, + 132, 20, - 24, + 25, true, - "2005", - "2005" + "Pakistan Section Multitopic Conference.", + "Pakistan Section Multitopic Conference." ], [ "reference", - "container-title", + "title", 14843401725435831033, "TEXT", "#/texts/187", @@ -70195,8 +70068,8 @@ 131, 93, 131, + 20, 24, - 28, true, "Pakistan Section Multitopic Conference", "Pakistan Section Multitopic Conference" @@ -70216,95 +70089,32 @@ 131, 93, 131, + 20, 24, - 28, true, "Pakistan Section Multitopic Conference", "Pakistan Section Multitopic Conference" ], [ - "sentence", - "", - 14843401725435831033, - "TEXT", - "#/texts/187", - 1.0, - 6573469177968412116, - 6998677959073478193, - null, - null, - 133, - 142, - 133, - 142, - 29, - 37, - true, - "2005 1-7.", - "2005 1-7." - ], - [ - "expression", - "wtoken-concatenation", + "reference", + "date", 14843401725435831033, "TEXT", "#/texts/187", 1.0, - 389609625548757410, - 18165604049296772353, + 14654380575675005536, + 9801102795206480618, null, null, 133, - 137, + 141, 133, - 137, + 141, + 25, 29, - 33, - true, - "2005", - "2005" - ], - [ - "sentence", - "", - 16676439669743530711, - "TEXT", - "#/texts/188", - 1.0, - 12178341415896423945, - 1346293265340748508, - null, - null, - 0, - 3, - 0, - 3, - 0, - 3, - true, - "19.", - "19." - ], - [ - "expression", - "wtoken-concatenation", - 16676439669743530711, - "TEXT", - "#/texts/188", - 1.0, - 15441160910541481863, - 8099163979199984832, - null, - null, - 0, - 2, - 0, - 2, - 0, - 2, true, - "19", - "19" + "2005 1-7", + "2005 1-7" ], [ "reference", @@ -70322,7 +70132,7 @@ 0, 2, 0, - 2, + 1, true, "19", "19" @@ -70342,8 +70152,8 @@ 176, 4, 176, - 3, - 40, + 2, + 36, true, "Welte DH, Horsfield B, Baker DR. Petroleum and Basin Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling, Berlin Heidelberg: Springer-Verlag; 1997.", "Welte DH, Horsfield B, Baker DR. Petroleum and Basin Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling, Berlin Heidelberg: Springer-Verlag; 1997." @@ -70363,8 +70173,8 @@ 12, 4, 12, - 3, - 5, + 2, + 4, true, "Welte DH", "Welte DH" @@ -70384,8 +70194,8 @@ 12, 4, 12, - 3, - 5, + 2, + 4, true, "Welte DH", "Welte DH" @@ -70405,8 +70215,8 @@ 25, 14, 25, - 6, - 8, + 5, + 7, true, "Horsfield B", "Horsfield B" @@ -70426,8 +70236,8 @@ 25, 14, 25, - 6, - 8, + 5, + 7, true, "Horsfield B", "Horsfield B" @@ -70447,8 +70257,8 @@ 46, 27, 46, - 9, - 13, + 8, + 12, true, "Baker DR. Petroleum", "Baker DR. Petroleum" @@ -70468,8 +70278,8 @@ 35, 27, 35, - 9, - 11, + 8, + 10, true, "Baker DR", "Baker DR" @@ -70489,8 +70299,8 @@ 46, 34, 46, - 10, - 13, + 9, + 12, true, "R Petroleum", "R. Petroleum" @@ -70510,8 +70320,8 @@ 66, 37, 66, - 12, - 16, + 11, + 15, true, "Petroleum and Basin Evolution", "Petroleum and Basin Evolution" @@ -70531,8 +70341,8 @@ 67, 51, 67, - 14, - 17, + 13, + 16, true, "Basin Evolution:", "Basin Evolution:" @@ -70552,8 +70362,8 @@ 66, 51, 66, - 14, - 16, + 13, + 15, true, "Basin Evolution", "Basin Evolution" @@ -70573,8 +70383,8 @@ 104, 68, 104, - 17, - 21, + 16, + 20, true, "Insights from Petroleum Geochemistry", "Insights from Petroleum Geochemistry" @@ -70594,8 +70404,8 @@ 76, 68, 76, + 16, 17, - 18, true, "Insights", "Insights" @@ -70615,8 +70425,8 @@ 133, 82, 133, - 19, - 27, + 18, + 26, true, "Petroleum Geochemistry, Geology, and Basin Modeling", "Petroleum Geochemistry, Geology, and Basin Modeling" @@ -70636,8 +70446,8 @@ 104, 82, 104, - 19, - 21, + 18, + 20, true, "Petroleum Geochemistry", "Petroleum Geochemistry" @@ -70657,8 +70467,8 @@ 133, 106, 133, - 22, - 27, + 21, + 26, true, "Geology, and Basin Modeling", "Geology, and Basin Modeling" @@ -70678,8 +70488,8 @@ 113, 106, 113, + 21, 22, - 23, true, "Geology", "Geology" @@ -70699,8 +70509,8 @@ 133, 119, 133, - 25, - 27, + 24, + 26, true, "Basin Modeling", "Basin Modeling" @@ -70720,8 +70530,8 @@ 153, 135, 153, - 28, - 31, + 27, + 30, true, "Berlin Heidelberg:", "Berlin Heidelberg:" @@ -70741,8 +70551,8 @@ 152, 135, 152, - 28, - 30, + 27, + 29, true, "Berlin Heidelberg", "Berlin Heidelberg" @@ -70762,12 +70572,33 @@ 169, 154, 169, - 31, - 34, + 30, + 33, true, "Springer-Verlag", "Springer-Verlag" ], + [ + "reference", + "date", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 16381206542172924133, + 9981189962990674937, + null, + null, + 169, + 175, + 169, + 175, + 33, + 35, + true, + "; 1997", + "; 1997" + ], [ "sentence", "", @@ -71120,7 +70951,7 @@ 173, 184, 35, - 46, + 42, true, "2020;1:e20.", "2020;1:e20." @@ -71141,7 +70972,28 @@ 173, 183, 35, - 45, + 41, + true, + "2020;1:e20", + "2020;1:e20" + ], + [ + "reference", + "date", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 12668563530344603848, + 14820206483220239473, + null, + null, + 173, + 183, + 173, + 183, + 35, + 41, true, "2020;1:e20", "2020;1:e20" @@ -71161,8 +71013,8 @@ 203, 185, 203, - 46, - 56, + 42, + 51, true, "https://doi.org/10", "https://doi.org/10" @@ -71182,8 +71034,8 @@ 190, 185, 190, - 46, - 47, + 42, + 43, true, "https", "https" @@ -71203,8 +71055,8 @@ 196, 193, 196, - 50, - 51, + 46, + 47, true, "doi", "doi" @@ -71224,8 +71076,8 @@ 200, 197, 200, - 52, - 53, + 48, + 49, true, "org", "org" @@ -71237,19 +71089,19 @@ "TEXT", "#/texts/189", 1.0, - 12178341415895571884, - 2509344748616837201, + 389609625537446556, + 7737228572826305234, null, null, - 209, + 208, 212, - 209, + 208, 212, - 62, - 63, + 53, + 55, true, - "ail", - "ail" + "/ail", + "/ail" ], [ "numval", @@ -71267,7 +71119,7 @@ 0, 8, 0, - 8, + 1, true, "26895595", "26895595" @@ -71287,8 +71139,8 @@ 14, 10, 14, - 9, - 13, + 2, + 3, true, "2020", "2020" @@ -71308,8 +71160,8 @@ 17, 16, 17, - 14, - 15, + 4, + 5, true, "2", "2" @@ -71329,8 +71181,8 @@ 125, 19, 125, - 16, - 61, + 6, + 41, true, "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." @@ -71350,8 +71202,8 @@ 87, 35, 87, - 18, - 44, + 8, + 29, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," @@ -71371,8 +71223,8 @@ 86, 35, 86, - 18, - 43, + 8, + 28, true, "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" @@ -71392,8 +71244,8 @@ 87, 67, 87, - 28, - 44, + 18, + 29, true, "doi/10.1002/ail2.20,", "doi/10.1002/ail2.20," @@ -71413,8 +71265,8 @@ 78, 71, 78, - 30, - 37, + 20, + 23, true, "10.1002", "10.1002" @@ -71434,8 +71286,8 @@ 86, 82, 86, - 39, - 43, + 25, + 28, true, "2.20", "2.20" @@ -71455,8 +71307,8 @@ 108, 88, 108, - 44, - 47, + 29, + 32, true, "Wiley Online Library", "Wiley Online Library" @@ -71476,8 +71328,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -71497,8 +71349,8 @@ 124, 112, 124, - 48, - 60, + 33, + 40, true, "[23/08/2023]", "[23/08/2023]" @@ -71518,8 +71370,8 @@ 115, 113, 115, - 49, - 51, + 34, + 35, true, "23", "23" @@ -71539,8 +71391,8 @@ 118, 116, 118, - 52, - 54, + 36, + 37, true, "08", "08" @@ -71560,8 +71412,8 @@ 123, 119, 123, - 55, - 59, + 38, + 39, true, "2023", "2023" @@ -71581,8 +71433,8 @@ 139, 134, 139, - 63, - 64, + 43, + 44, true, "Terms", "Terms" @@ -71602,8 +71454,8 @@ 154, 144, 154, - 65, - 66, + 45, + 46, true, "Conditions", "Conditions" @@ -71623,8 +71475,8 @@ 209, 155, 209, - 66, - 83, + 46, + 63, true, "(https://onlinelibrary.wiley.com/terms-and-conditions)", "(https://onlinelibrary.wiley.com/terms-and-conditions)" @@ -71644,8 +71496,8 @@ 208, 156, 208, - 67, - 82, + 47, + 62, true, "https://onlinelibrary.wiley.com/terms-and-conditions", "https://onlinelibrary.wiley.com/terms-and-conditions" @@ -71665,8 +71517,8 @@ 233, 213, 233, - 84, - 87, + 64, + 67, true, "Wiley Online Library", "Wiley Online Library" @@ -71686,8 +71538,8 @@ 243, 238, 243, - 88, - 89, + 68, + 69, true, "rules", "rules" @@ -71707,8 +71559,8 @@ 250, 247, 250, - 90, - 91, + 70, + 71, true, "use", "use" @@ -71728,8 +71580,8 @@ 263, 252, 263, - 92, - 94, + 72, + 74, true, "OA articles", "OA articles" @@ -71749,8 +71601,8 @@ 319, 284, 319, - 98, - 102, + 78, + 82, true, "applicable Creative Commons License", "applicable Creative Commons License" @@ -71855,7 +71707,7 @@ 0, 4, 0, - 4, + 3, true, "0.82", "0.82" @@ -71876,7 +71728,7 @@ 0, 4, 0, - 4, + 3, true, "0.96", "0.96" @@ -71897,7 +71749,7 @@ 0, 4, 0, - 4, + 3, true, "0.98", "0.98" @@ -71918,7 +71770,7 @@ 0, 4, 0, - 4, + 3, true, "1.00", "1.00" @@ -71939,7 +71791,7 @@ 0, 4, 0, - 4, + 3, true, "0.93", "0.93" @@ -71960,7 +71812,7 @@ 0, 4, 0, - 4, + 3, true, "0.98", "0.98" @@ -71981,7 +71833,7 @@ 0, 4, 0, - 4, + 3, true, "1.00", "1.00" @@ -72002,7 +71854,7 @@ 0, 4, 0, - 4, + 3, true, "1.00", "1.00" @@ -72023,7 +71875,7 @@ 0, 4, 0, - 4, + 3, true, "0.62", "0.62" @@ -72044,7 +71896,7 @@ 0, 4, 0, - 4, + 3, true, "0.80", "0.80" @@ -72065,7 +71917,7 @@ 0, 4, 0, - 4, + 3, true, "0.87", "0.87" @@ -72086,7 +71938,7 @@ 0, 4, 0, - 4, + 3, true, "0.94", "0.94" @@ -72107,7 +71959,7 @@ 0, 4, 0, - 4, + 3, true, "0.73", "0.73" @@ -72128,7 +71980,7 @@ 0, 4, 0, - 4, + 3, true, "0.91", "0.91" @@ -72149,7 +72001,7 @@ 0, 4, 0, - 4, + 3, true, "0.94", "0.94" @@ -72170,7 +72022,7 @@ 0, 4, 0, - 4, + 3, true, "0.97", "0.97" @@ -72191,7 +72043,7 @@ 0, 4, 0, - 4, + 3, true, "0.82", "0.82" @@ -72212,7 +72064,7 @@ 0, 4, 0, - 4, + 3, true, "0.94", "0.94" @@ -72233,7 +72085,7 @@ 0, 4, 0, - 4, + 3, true, "0.97", "0.97" @@ -72254,7 +72106,7 @@ 0, 4, 0, - 4, + 3, true, "0.98", "0.98" @@ -72275,7 +72127,7 @@ 0, 4, 0, - 4, + 3, true, "0.82", "0.82" @@ -72296,7 +72148,7 @@ 0, 4, 0, - 4, + 3, true, "0.92", "0.92" @@ -72317,7 +72169,7 @@ 0, 4, 0, - 4, + 3, true, "0.95", "0.95" @@ -72338,7 +72190,7 @@ 0, 4, 0, - 4, + 3, true, "0.97", "0.97" @@ -72359,7 +72211,7 @@ 0, 4, 0, - 4, + 3, true, "0.75", "0.75" @@ -72380,7 +72232,7 @@ 0, 4, 0, - 4, + 3, true, "0.92", "0.92" @@ -72401,7 +72253,7 @@ 0, 4, 0, - 4, + 3, true, "0.96", "0.96" @@ -72422,7 +72274,7 @@ 0, 4, 0, - 4, + 3, true, "0.97", "0.97" @@ -72443,7 +72295,7 @@ 0, 4, 0, - 4, + 3, true, "0.89", "0.89" @@ -72464,7 +72316,7 @@ 0, 4, 0, - 4, + 3, true, "0.96", "0.96" @@ -72485,7 +72337,7 @@ 0, 4, 0, - 4, + 3, true, "0.97", "0.97" @@ -72506,7 +72358,7 @@ 0, 4, 0, - 4, + 3, true, "0.98", "0.98" @@ -72527,7 +72379,7 @@ 0, 4, 0, - 4, + 3, true, "0.83", "0.83" @@ -72548,7 +72400,7 @@ 0, 4, 0, - 4, + 3, true, "0.92", "0.92" @@ -72569,7 +72421,7 @@ 0, 4, 0, - 4, + 3, true, "0.95", "0.95" @@ -72590,7 +72442,7 @@ 0, 4, 0, - 4, + 3, true, "0.96", "0.96" @@ -77183,7 +77035,6 @@ "page-footers": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-footers/0", - "hash": 12400883656433726216, "orig": "Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", "prov": [ { @@ -77191,13 +77042,13 @@ } ], "sref": "#/page-footers/0", + "subj_hash": 12400883656433726216, "text": "Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", "text-hash": 8372141692634509619, "type": "page-footer" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-footers/1", - "hash": 10244115652970867690, "orig": "wileyonlinelibrary.com/journal/ail2 1of15", "prov": [ { @@ -77205,6 +77056,7 @@ } ], "sref": "#/page-footers/1", + "subj_hash": 10244115652970867690, "text": "wileyonlinelibrary.com/journal/ail2 1of15", "text-hash": 6196517219334265105, "type": "page-footer" @@ -77213,7 +77065,6 @@ "page-headers": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/0", - "hash": 1841431076736563689, "orig": "Received: 15 September 2020", "prov": [ { @@ -77221,13 +77072,13 @@ } ], "sref": "#/page-headers/0", + "subj_hash": 1841431076736563689, "text": "Received: 15 September 2020", "text-hash": 16688788223092401940, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/1", - "hash": 3915126318503464014, "orig": "Revised: 23 November 2020", "prov": [ { @@ -77235,13 +77086,13 @@ } ], "sref": "#/page-headers/1", + "subj_hash": 3915126318503464014, "text": "Revised: 23 November 2020", "text-hash": 1000711515083668085, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/2", - "hash": 1727876228376027809, "orig": "Accepted: 25 November 2020", "prov": [ { @@ -77249,13 +77100,13 @@ } ], "sref": "#/page-headers/2", + "subj_hash": 1727876228376027809, "text": "Accepted: 25 November 2020", "text-hash": 17099649843681009628, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/3", - "hash": 4558221577189246496, "orig": "DOI: 10.1002/ail2.20", "prov": [ { @@ -77263,13 +77114,13 @@ } ], "sref": "#/page-headers/3", + "subj_hash": 4558221577189246496, "text": "DOI: 10.1002/ail2.20", "text-hash": 348625343742526555, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/4", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77277,13 +77128,13 @@ } ], "sref": "#/page-headers/4", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/5", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77291,13 +77142,13 @@ } ], "sref": "#/page-headers/5", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/6", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77305,13 +77156,13 @@ } ], "sref": "#/page-headers/6", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/7", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77319,13 +77170,13 @@ } ], "sref": "#/page-headers/7", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/8", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77333,13 +77184,13 @@ } ], "sref": "#/page-headers/8", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/9", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77347,13 +77198,13 @@ } ], "sref": "#/page-headers/9", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/10", - "hash": 4361549266732238272, "orig": "8of15", "prov": [ { @@ -77361,13 +77212,13 @@ } ], "sref": "#/page-headers/10", + "subj_hash": 4361549266732238272, "text": "8of15", "text-hash": 329104147727696635, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/11", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77375,13 +77226,13 @@ } ], "sref": "#/page-headers/11", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/12", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77389,13 +77240,13 @@ } ], "sref": "#/page-headers/12", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/13", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77403,13 +77254,13 @@ } ], "sref": "#/page-headers/13", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/14", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77417,13 +77268,13 @@ } ], "sref": "#/page-headers/14", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/15", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77431,13 +77282,13 @@ } ], "sref": "#/page-headers/15", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/16", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77445,13 +77296,13 @@ } ], "sref": "#/page-headers/16", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/17", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77459,13 +77310,13 @@ } ], "sref": "#/page-headers/17", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/18", - "hash": 8492015887072434396, "orig": "STAAR ET AL.", "prov": [ { @@ -77473,6 +77324,7 @@ } ], "sref": "#/page-headers/18", + "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", "text-hash": 14658966106383255015, "type": "page-header" @@ -80555,6 +80407,7 @@ ] }, "sref": "#", + "subj_hash": 18446744073709551615, "tables": [ { "#-cols": 6, @@ -80562,7 +80415,6 @@ "captions": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/146", - "hash": 8669048055071941045, "orig": "TABLE 1 Top-k accuracies validation of KG query results. Numbers represent the fraction in which any of the k highest ranked answers matches the expected answer", "prov": [ { @@ -80570,6 +80422,7 @@ } ], "sref": "#/tables/0/captions/0", + "subj_hash": 8669048055071941045, "text": "TABLE 1 Top-k accuracies validation of KG query results. Numbers represent the fraction in which any of the k highest ranked answers matches the expected answer", "text-hash": 14400864471075544784, "type": "caption" @@ -82251,7 +82104,6 @@ ], "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/tables/0", "footnotes": [], - "hash": 12469893451248582632, "mentions": [], "prov": [ { @@ -82259,13 +82111,13 @@ } ], "sref": "#/tables/0", + "subj_hash": 12469893451248582632, "type": "table" } ], "texts": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/0", - "hash": 2144509362215609527, "orig": "LETTER", "prov": [ { @@ -82273,13 +82125,13 @@ } ], "sref": "#/texts/0", + "subj_hash": 2144509362215609527, "text": "LETTER", "text-hash": 16381206540184854990, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/1", - "hash": 16672720454366774824, "orig": "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", "prov": [ { @@ -82287,13 +82139,13 @@ } ], "sref": "#/texts/1", + "subj_hash": 16672720454366774824, "text": "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", "text-hash": 4375081646508065875, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/2", - "hash": 16781763356419781679, "orig": "Peter W. J. Staar", "prov": [ { @@ -82301,13 +82153,13 @@ } ], "sref": "#/texts/2", + "subj_hash": 16781763356419781679, "text": "Peter W. J. Staar", "text-hash": 4049808513512976982, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/3", - "hash": 3352447812305581329, "orig": "|", "prov": [ { @@ -82315,13 +82167,13 @@ } ], "sref": "#/texts/3", + "subj_hash": 3352447812305581329, "text": "|", "text-hash": 17767354399704232748, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/4", - "hash": 14877831450145300436, "orig": "Michele Dolfi", "prov": [ { @@ -82329,13 +82181,13 @@ } ], "sref": "#/texts/4", + "subj_hash": 14877831450145300436, "text": "Michele Dolfi", "text-hash": 1571808557594152175, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/5", - "hash": 3352447812305581329, "orig": "|", "prov": [ { @@ -82343,13 +82195,13 @@ } ], "sref": "#/texts/5", + "subj_hash": 3352447812305581329, "text": "|", "text-hash": 17767354399704232748, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/6", - "hash": 13336841394978214677, "orig": "Christoph Auer", "prov": [ { @@ -82357,13 +82209,13 @@ } ], "sref": "#/texts/6", + "subj_hash": 13336841394978214677, "text": "Christoph Auer", "text-hash": 9737597816447750448, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/7", - "hash": 15325526562897377208, "orig": "IBM Research, Rueschlikon, Switzerland", "prov": [ { @@ -82371,13 +82223,13 @@ } ], "sref": "#/texts/7", + "subj_hash": 15325526562897377208, "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/8", - "hash": 4017434568255781081, "orig": "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland. Email: taa@zurich.ibm.com", "prov": [ { @@ -82385,13 +82237,13 @@ } ], "sref": "#/texts/8", + "subj_hash": 4017434568255781081, "text": "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland. Email: taa@zurich.ibm.com", "text-hash": 961470147553945060, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/9", - "hash": 8487024695951375934, "orig": "Abstract", "prov": [ { @@ -82399,13 +82251,13 @@ } ], "sref": "#/texts/9", + "subj_hash": 8487024695951375934, "text": "Abstract", "text-hash": 14650447666970618949, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/10", - "hash": 11695737263227886476, "orig": "Knowledge Graphs have been fast emerging as the de facto standard to model and explore knowledge in weakly structured data. Large corpora of documents constitute a source of weakly structured data of particular interest for both the academic and business world. Key examples include scientific publications, technical reports, manuals, patents, regulations, etc. Such corpora embed many facts that are elementary to critical decision making or enabling new discoveries. In this paper, we present a scalable cloud platform to create and serve Knowledge Graphs, which we named corpus processing service (CPS). Its purpose is to process large document corpora, extract the content and embedded facts, and ultimately represent these in a consistent knowledge graph that can be intuitively queried. To accomplish this, we use state-of-the-art natural language understanding models to extract entities and relationships from documents converted with our previously presented corpus conversion service platform. This pipeline is complemented with a newly developed graph engine which ensures extremely performant graph queries and provides powerful graph analytics capabilities. Both components are tightly integrated and can be easily consumed through REST APIs. Additionally, we provide user interfaces to control the data ingestion flow and formulate queries using a visual programming approach. The CPS platform is designed as a modular microservice system operating on Kubernetes clusters. Finally, we validate the quality of queries on our endto-end knowledge pipeline in a real-world application in the oil and gas industry.", "prov": [ { @@ -82413,13 +82265,13 @@ } ], "sref": "#/texts/10", + "subj_hash": 11695737263227886476, "text": "Knowledge Graphs have been fast emerging as the de facto standard to model and explore knowledge in weakly structured data. Large corpora of documents constitute a source of weakly structured data of particular interest for both the academic and business world. Key examples include scientific publications, technical reports, manuals, patents, regulations, etc. Such corpora embed many facts that are elementary to critical decision making or enabling new discoveries. In this paper, we present a scalable cloud platform to create and serve Knowledge Graphs, which we named corpus processing service (CPS). Its purpose is to process large document corpora, extract the content and embedded facts, and ultimately represent these in a consistent knowledge graph that can be intuitively queried. To accomplish this, we use state-of-the-art natural language understanding models to extract entities and relationships from documents converted with our previously presented corpus conversion service platform. This pipeline is complemented with a newly developed graph engine which ensures extremely performant graph queries and provides powerful graph analytics capabilities. Both components are tightly integrated and can be easily consumed through REST APIs. Additionally, we provide user interfaces to control the data ingestion flow and formulate queries using a visual programming approach. The CPS platform is designed as a modular microservice system operating on Kubernetes clusters. Finally, we validate the quality of queries on our endto-end knowledge pipeline in a real-world application in the oil and gas industry.", "text-hash": 9356514212507371703, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/11", - "hash": 8500733160758672230, "orig": "KEYWORDS", "prov": [ { @@ -82427,13 +82279,13 @@ } ], "sref": "#/texts/11", + "subj_hash": 8500733160758672230, "text": "KEYWORDS", "text-hash": 14650267244735310237, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/12", - "hash": 4452030907228745864, "orig": "document processing, knowledge graph, semantic search", "prov": [ { @@ -82441,13 +82293,13 @@ } ], "sref": "#/texts/12", + "subj_hash": 4452030907228745864, "text": "document processing, knowledge graph, semantic search", "text-hash": 243147861724212659, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/13", - "hash": 11913688961435238004, "orig": "1 | INTRODUCTION", "prov": [ { @@ -82455,13 +82307,13 @@ } ], "sref": "#/texts/13", + "subj_hash": 11913688961435238004, "text": "1 | INTRODUCTION", "text-hash": 8854903187485535375, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/14", - "hash": 9977041563469582014, "orig": "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally. It is self-evident that this number has increased ever since. The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world. The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings. Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable.", "prov": [ { @@ -82469,13 +82321,13 @@ } ], "sref": "#/texts/14", + "subj_hash": 9977041563469582014, "text": "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally. It is self-evident that this number has increased ever since. The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world. The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings. Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable.", "text-hash": 6468010182398147525, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/15", - "hash": 4361549266817300114, "orig": "2of15", "prov": [ { @@ -82483,13 +82335,13 @@ } ], "sref": "#/texts/15", + "subj_hash": 4361549266817300114, "text": "2of15", "text-hash": 329104147827159977, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/16", - "hash": 8425126282903547933, "orig": "In a previous publication, we presented the corpus conversion service (CCS). 1 The CCS is a scalable cloud service, which leverages state-of-the-art machine learning to convert complex formats (eg, PDF, Word, and Bitmap) into a richly structured JSON representation of their content. As such, the CCS solves the first problem when confronted with a large corpus of documents, that is, make the content of the documents programmatically accessible. Examples of the latter would be ' List all images with their caption from the corpus or list all titles with their publication date. ' The second problem is to obviously search or explore the content of the documents in a large corpus. For this problem, we have developed the corpus processing service (CPS), which we present in this paper. The CPS is intended to create knowledge bases (KBs) from the converted JSON corpus and serve these KBs through in-memory knowledge graph stores. As such, the CPS is the natural extension of the CCS and has as an express purpose to make corpora of documents available for deep data exploration.", "prov": [ { @@ -82497,13 +82349,13 @@ } ], "sref": "#/texts/16", + "subj_hash": 8425126282903547933, "text": "In a previous publication, we presented the corpus conversion service (CCS). 1 The CCS is a scalable cloud service, which leverages state-of-the-art machine learning to convert complex formats (eg, PDF, Word, and Bitmap) into a richly structured JSON representation of their content. As such, the CCS solves the first problem when confronted with a large corpus of documents, that is, make the content of the documents programmatically accessible. Examples of the latter would be ' List all images with their caption from the corpus or list all titles with their publication date. ' The second problem is to obviously search or explore the content of the documents in a large corpus. For this problem, we have developed the corpus processing service (CPS), which we present in this paper. The CPS is intended to create knowledge bases (KBs) from the converted JSON corpus and serve these KBs through in-memory knowledge graph stores. As such, the CPS is the natural extension of the CCS and has as an express purpose to make corpora of documents available for deep data exploration.", "text-hash": 14716796829201051176, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/17", - "hash": 16507313240019459642, "orig": "The purpose of CPS is to enable deep data exploration directly on large corpora. Here, we define deep data exploration as the capability to ingest large corpora of documents into a scalable service and detect, extract and combine facts contained in these corpora in order to make new discoveries or support critical decision making. It is key to understand that our goal of creating and querying Knowledge Graphs to enable deep data exploration goes beyond search in the spirit of rank and retrieve. Although search is by no means trivial, many state-of-the art solutions exist for this purpose. * We argue, however, that one needs query capabilities which allow for a combination of extracted facts and a fast, onthe-fly creation of new datasets to enable actual deep data exploration. Those datasets can then be used for further anal-", "prov": [ { @@ -82511,13 +82363,13 @@ } ], "sref": "#/texts/17", + "subj_hash": 16507313240019459642, "text": "The purpose of CPS is to enable deep data exploration directly on large corpora. Here, we define deep data exploration as the capability to ingest large corpora of documents into a scalable service and detect, extract and combine facts contained in these corpora in order to make new discoveries or support critical decision making. It is key to understand that our goal of creating and querying Knowledge Graphs to enable deep data exploration goes beyond search in the spirit of rank and retrieve. Although search is by no means trivial, many state-of-the art solutions exist for this purpose. * We argue, however, that one needs query capabilities which allow for a combination of extracted facts and a fast, onthe-fly creation of new datasets to enable actual deep data exploration. Those datasets can then be used for further anal-", "text-hash": 4261190952114998337, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/18", - "hash": 7900229969942228522, "orig": "ysis, which might lead to new discoveries or support decision making.", "prov": [ { @@ -82525,13 +82377,13 @@ } ], "sref": "#/texts/18", + "subj_hash": 7900229969942228522, "text": "ysis, which might lead to new discoveries or support decision making.", "text-hash": 12931323242585971793, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/19", - "hash": 10081303962589804251, "orig": "To better distinguish this approach from conventional search, let us consider some example questions:", "prov": [ { @@ -82539,13 +82391,13 @@ } ], "sref": "#/texts/19", + "subj_hash": 10081303962589804251, "text": "To better distinguish this approach from conventional search, let us consider some example questions:", "text-hash": 6426882630003520482, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/20", - "hash": 12186698460099365002, "orig": "a. Definition of high temperature superconductor.", "prov": [ { @@ -82553,13 +82405,13 @@ } ], "sref": "#/texts/20", + "subj_hash": 12186698460099365002, "text": "a. Definition of high temperature superconductor.", "text-hash": 8586326920090596785, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/21", - "hash": 14190244699299580163, "orig": "b. Publications of before year 2010.", "prov": [ { @@ -82567,13 +82419,13 @@ } ], "sref": "#/texts/21", + "subj_hash": 14190244699299580163, "text": "b. Publications of before year 2010.", "text-hash": 2034196463390881594, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/22", - "hash": 1376279050886549305, "orig": "c. Maps of the Permian basin.", "prov": [ { @@ -82581,13 +82433,13 @@ } ], "sref": "#/texts/22", + "subj_hash": 1376279050886549305, "text": "c. Maps of the Permian basin.", "text-hash": 17379120122282474820, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/23", - "hash": 10155628801693924200, "orig": "d. Geological formations from the Miocene age with their depth, thickness, geographic location, and composition.", "prov": [ { @@ -82595,13 +82447,13 @@ } ], "sref": "#/texts/23", + "subj_hash": 10155628801693924200, "text": "d. Geological formations from the Miocene age with their depth, thickness, geographic location, and composition.", "text-hash": 6073268612165724563, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/24", - "hash": 9107499507097280105, "orig": "e. List all high-Tc superconductors with their known crystallographic and material properties?", "prov": [ { @@ -82609,13 +82461,13 @@ } ], "sref": "#/texts/24", + "subj_hash": 9107499507097280105, "text": "e. List all high-Tc superconductors with their known crystallographic and material properties?", "text-hash": 14246074989165808788, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/25", - "hash": 7248467870339433322, "orig": "Question (a) undoubtedly fits the classic search paradigm, since here one can expect a search engine to find a number sources with exact answers (ie, definitions). Likewise, question (b) can be easily answered through metadata based filter rules on a literature database. Question (c) already requires some extent of domain knowledge to be encoded in a model to accurately classify the relevance of all known maps to the query, at least assuming no manual curation effort has been done. Questions (d) and (e) ultimately impose query capabilities which are clearly infeasible to support through manual curation, and are very unlikely to be answered in any single data source. These questions require the system to return a more complex data structure (eg, a table in which the rows list the formations or materials while the columns contain their respective properties).", "prov": [ { @@ -82623,13 +82475,13 @@ } ], "sref": "#/texts/25", + "subj_hash": 7248467870339433322, "text": "Question (a) undoubtedly fits the classic search paradigm, since here one can expect a search engine to find a number sources with exact answers (ie, definitions). Likewise, question (b) can be easily answered through metadata based filter rules on a literature database. Question (c) already requires some extent of domain knowledge to be encoded in a model to accurately classify the relevance of all known maps to the query, at least assuming no manual curation effort has been done. Questions (d) and (e) ultimately impose query capabilities which are clearly infeasible to support through manual curation, and are very unlikely to be answered in any single data source. These questions require the system to return a more complex data structure (eg, a table in which the rows list the formations or materials while the columns contain their respective properties).", "text-hash": 13592184899010298257, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/26", - "hash": 13346892078888080449, "orig": "Concluding from the above examples, we define the following qualifying criteria for a system that supports deep data exploration on corpora:", "prov": [ { @@ -82637,13 +82489,13 @@ } ], "sref": "#/texts/26", + "subj_hash": 13346892078888080449, "text": "Concluding from the above examples, we define the following qualifying criteria for a system that supports deep data exploration on corpora:", "text-hash": 9732050976592056956, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/27", - "hash": 1118972765223422660, "orig": "1. It can answer queries by combining different data elements from different sources into a new data structure.", "prov": [ { @@ -82651,13 +82503,13 @@ } ], "sref": "#/texts/27", + "subj_hash": 1118972765223422660, "text": "1. It can answer queries by combining different data elements from different sources into a new data structure.", "text-hash": 15389200666968750079, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/28", - "hash": 324023167304456371, "orig": "2. It supports (1) by creating a knowledge model from a controlled, unstructured corpus in a mostly unsupervised way. It may profit from, but not require any manually curated data.", "prov": [ { @@ -82665,13 +82517,13 @@ } ], "sref": "#/texts/28", + "subj_hash": 324023167304456371, "text": "2. It supports (1) by creating a knowledge model from a controlled, unstructured corpus in a mostly unsupervised way. It may profit from, but not require any manually curated data.", "text-hash": 15837385157674255818, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/29", - "hash": 4651508276868765576, "orig": "3. It may restrict supported queries to a specific domain (eg, a technical field).", "prov": [ { @@ -82679,13 +82531,13 @@ } ], "sref": "#/texts/29", + "subj_hash": 4651508276868765576, "text": "3. It may restrict supported queries to a specific domain (eg, a technical field).", "text-hash": 11572955042484278451, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/30", - "hash": 3052020526349962744, "orig": "To meet the objectives defined earlier, CPS implements and tightly integrates two essential components. The first component is a scalable Knowledge Graph creation pipeline, which is used to automatically process text, tables and images through state-of-the-art segmentation and natural language understanding (NLU) models and extract entities and relationships from them. The second component serves the created KG, enabling users to perform deep queries and advanced graph analytics in real time. 2 This is supported through an underlying, highly optimized graph engine we developed to specifically address requirements for deep data exploration.", "prov": [ { @@ -82693,13 +82545,13 @@ } ], "sref": "#/texts/30", + "subj_hash": 3052020526349962744, "text": "To meet the objectives defined earlier, CPS implements and tightly integrates two essential components. The first component is a scalable Knowledge Graph creation pipeline, which is used to automatically process text, tables and images through state-of-the-art segmentation and natural language understanding (NLU) models and extract entities and relationships from them. The second component serves the created KG, enabling users to perform deep queries and advanced graph analytics in real time. 2 This is supported through an underlying, highly optimized graph engine we developed to specifically address requirements for deep data exploration.", "text-hash": 18009286910191614723, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/31", - "hash": 6725501529910185390, "orig": "It is worth noting that the CPS platform is a fully functioning cloud application that has been successfully deployed in multiple real-world scenarios in material science 3 and oil and gas industries. 4", "prov": [ { @@ -82707,13 +82559,13 @@ } ], "sref": "#/texts/31", + "subj_hash": 6725501529910185390, "text": "It is worth noting that the CPS platform is a fully functioning cloud application that has been successfully deployed in multiple real-world scenarios in material science 3 and oil and gas industries. 4", "text-hash": 11737175762912836309, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/32", - "hash": 14814111183601762276, "orig": "In the remainder of this paper, we discuss in detail the technical aspects and implementation details of the two main components of the CPS. In section 2, we present in depth how the platform extracts facts from corpora at a massive scale. In section 3, we go into detail of designing deep queries and show how we compute them in a very efficient", "prov": [ { @@ -82721,13 +82573,13 @@ } ], "sref": "#/texts/32", + "subj_hash": 14814111183601762276, "text": "In the remainder of this paper, we discuss in detail the technical aspects and implementation details of the two main components of the CPS. In section 2, we present in depth how the platform extracts facts from corpora at a massive scale. In section 3, we go into detail of designing deep queries and show how we compute them in a very efficient", "text-hash": 1414786465877142815, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/33", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -82735,13 +82587,13 @@ } ], "sref": "#/texts/33", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/34", - "hash": 4361549266681704196, "orig": "3of15", "prov": [ { @@ -82749,13 +82601,13 @@ } ], "sref": "#/texts/34", + "subj_hash": 4361549266681704196, "text": "3of15", "text-hash": 329104147711745343, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/35", - "hash": 8043608144162608258, "orig": "way with our high-performance graph engine. Later, in section 4, we will discuss in detail how both components are deployed and interacting on the cloud. Finally, in section 5, we present the complete system in a real world case study and benchmark its accuracy.", "prov": [ { @@ -82763,13 +82615,13 @@ } ], "sref": "#/texts/35", + "subj_hash": 8043608144162608258, "text": "way with our high-performance graph engine. Later, in section 4, we will discuss in detail how both components are deployed and interacting on the cloud. Finally, in section 5, we present the complete system in a real world case study and benchmark its accuracy.", "text-hash": 13076251584287625657, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/36", - "hash": 7159467829896778939, "orig": "2 | SCALABLE KNOWLEDGE GRAPH CREATION", "prov": [ { @@ -82777,13 +82629,13 @@ } ], "sref": "#/texts/36", + "subj_hash": 7159467829896778939, "text": "2 | SCALABLE KNOWLEDGE GRAPH CREATION", "text-hash": 13901790948575121858, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/37", - "hash": 5617240156952377, "orig": "In CPS, a Knowledge Graph is defined as a collection of entities and their relationships forming the graphs nodes and edges. Entities can have a wide variety of types. A basic scenario includes types such as documents, document components, keywords, and authors. In addition, there can be more specific types tied to domain verticals, such as materials and properties in material science, or geological ages, formations, rocks, minerals, structures, etc., for oil and gas exploration. Relationships in the KG are strictly defined between the entities. Similar to the entities, the relationships are typed (' has-material-property ' or ' has-geological-age '). Also, relationships in the KG can be weighted, for example, to represent the trustworthiness of a fact that the relationship represents.", "prov": [ { @@ -82791,13 +82643,13 @@ } ], "sref": "#/texts/37", + "subj_hash": 5617240156952377, "text": "In CPS, a Knowledge Graph is defined as a collection of entities and their relationships forming the graphs nodes and edges. Entities can have a wide variety of types. A basic scenario includes types such as documents, document components, keywords, and authors. In addition, there can be more specific types tied to domain verticals, such as materials and properties in material science, or geological ages, formations, rocks, minerals, structures, etc., for oil and gas exploration. Relationships in the KG are strictly defined between the entities. Similar to the entities, the relationships are typed (' has-material-property ' or ' has-geological-age '). Also, relationships in the KG can be weighted, for example, to represent the trustworthiness of a fact that the relationship represents.", "text-hash": 16151270992855323972, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/38", - "hash": 3276490574487379366, "orig": "In typical cases, we start from a collection of documents in different formats. Sometimes, documents are available in semistructured, machine-interpretable formatssuchasJSON,XML,orHTML.However,inthevastmajority of cases this does not apply, especially for proprietary documents of companies and organizations. The latter are very often scanned or programmatic PDF documents. Using the CCS, 1 these types of documents are converted into structured JSON files. Those provide easy access to the meta-data (eg, title, abstract, references, authors) and the document body. The latter is structured by subtitles (of various levels), paragraphs, lists, tables (with internal row and column structures), figures, and linked captions. O n c et h ec o r p u si sp r e s n ti nas t r u c t u r e d,m a c h i n e processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely extraction, annotation,and aggregation. The inherent dependencies between these three tasks are defined through a directed acyclic graph (DAG). We willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss the details for each DF task.", "prov": [ { @@ -82805,13 +82657,13 @@ } ], "sref": "#/texts/38", + "subj_hash": 3276490574487379366, "text": "In typical cases, we start from a collection of documents in different formats. Sometimes, documents are available in semistructured, machine-interpretable formatssuchasJSON,XML,orHTML.However,inthevastmajority of cases this does not apply, especially for proprietary documents of companies and organizations. The latter are very often scanned or programmatic PDF documents. Using the CCS, 1 these types of documents are converted into structured JSON files. Those provide easy access to the meta-data (eg, title, abstract, references, authors) and the document body. The latter is structured by subtitles (of various levels), paragraphs, lists, tables (with internal row and column structures), figures, and linked captions. O n c et h ec o r p u si sp r e s n ti nas t r u c t u r e d,m a c h i n e processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely extraction, annotation,and aggregation. The inherent dependencies between these three tasks are defined through a directed acyclic graph (DAG). We willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss the details for each DF task.", "text-hash": 17496609193730656989, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/39", - "hash": 3367451956962330174, "orig": "2.1 | DF tasks", "prov": [ { @@ -82819,13 +82671,13 @@ } ], "sref": "#/texts/39", + "subj_hash": 3367451956962330174, "text": "2.1 | DF tasks", "text-hash": 17765848133863277637, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/40", - "hash": 5509744459704235873, "orig": "In Figure 1, we sketch a minimal DF, in which each of the three tasks is used consecutively in order to generate entities and relationships for a generic KG. We will use Figure1toillustratethepurposeandimplementationof each DF task.", "prov": [ { @@ -82833,13 +82685,13 @@ } ], "sref": "#/texts/40", + "subj_hash": 5509744459704235873, "text": "In Figure 1, we sketch a minimal DF, in which each of the three tasks is used consecutively in order to generate entities and relationships for a generic KG. We will use Figure1toillustratethepurposeandimplementationof each DF task.", "text-hash": 10647094536020604316, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/42", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -82847,13 +82699,13 @@ } ], "sref": "#/texts/41", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/43", - "hash": 4361549176688508574, "orig": "4of15", "prov": [ { @@ -82861,13 +82713,13 @@ } ], "sref": "#/texts/42", + "subj_hash": 4361549176688508574, "text": "4of15", "text-hash": 329104066308221861, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/44", - "hash": 12374482891052873875, "orig": "2.1.1 | Extraction", "prov": [ { @@ -82875,13 +82727,13 @@ } ], "sref": "#/texts/43", + "subj_hash": 12374482891052873875, "text": "2.1.1 | Extraction", "text-hash": 8758905122433574314, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/45", - "hash": 2755397864153233778, "orig": "In an extraction task, we generate new data entities (eg, document components) from an original set of source entities (eg, documents). During this process, new links are created which connect these newly generated data entities to their original source entity. Typical examples of such extraction tasks are the extraction of abstracts, paragraphs, tables, or figures from the structured document files.", "prov": [ { @@ -82889,13 +82741,13 @@ } ], "sref": "#/texts/44", + "subj_hash": 2755397864153233778, "text": "In an extraction task, we generate new data entities (eg, document components) from an original set of source entities (eg, documents). During this process, new links are created which connect these newly generated data entities to their original source entity. Typical examples of such extraction tasks are the extraction of abstracts, paragraphs, tables, or figures from the structured document files.", "text-hash": 18305914688852125577, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/46", - "hash": 4698316471746130896, "orig": "From a scalability point of view, this task is embarrassingly parallel, which makes it extremely easy to implement on loosely interconnected environments such as a cloud. We simply iterate in parallel over all source entities in the backend database, extract the desired components and then insert those components as new data entities back into the database. Extraction tasks have no internal synchronization points.", "prov": [ { @@ -82903,13 +82755,13 @@ } ], "sref": "#/texts/45", + "subj_hash": 4698316471746130896, "text": "From a scalability point of view, this task is embarrassingly parallel, which makes it extremely easy to implement on loosely interconnected environments such as a cloud. We simply iterate in parallel over all source entities in the backend database, extract the desired components and then insert those components as new data entities back into the database. Extraction tasks have no internal synchronization points.", "text-hash": 11458501594938683627, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/47", - "hash": 11827267218358801841, "orig": "One particular benefit of this task is to make the query capability on the Knowledge Graph more fine grained by being able to provide provenance information on the result. For example, this would let the user explore all the paragraphs, tables, or figures that embed a certain fact.", "prov": [ { @@ -82917,13 +82769,13 @@ } ], "sref": "#/texts/46", + "subj_hash": 11827267218358801841, "text": "One particular benefit of this task is to make the query capability on the Knowledge Graph more fine grained by being able to provide provenance information on the result. For example, this would let the user explore all the paragraphs, tables, or figures that embed a certain fact.", "text-hash": 8932299863639200460, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/48", - "hash": 6297710299044869343, "orig": "2.1.2 | Annotation", "prov": [ { @@ -82931,13 +82783,13 @@ } ], "sref": "#/texts/47", + "subj_hash": 6297710299044869343, "text": "2.1.2 | Annotation", "text-hash": 12444247655523627494, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/49", - "hash": 7158837349769150986, "orig": "In the annotation task, we apply NLU methods to detect language entities and their relationships within a single data entity. Here, data entities can be as simple as a snippet of text (eg, a paragraph) or more complex structures such as tables or figures. The main goal of the annotation task is to obtain all relevant information from the data entity with regard to the domain of the corpus. Since different technical fields require different annotations, our annotation task is modular, allowing language entities to be annotated for material science, oil and gas, or more basic entities (eg, noun phrases, abbreviations, unit and values, etc.).", "prov": [ { @@ -82945,13 +82797,13 @@ } ], "sref": "#/texts/48", + "subj_hash": 7158837349769150986, "text": "In the annotation task, we apply NLU methods to detect language entities and their relationships within a single data entity. Here, data entities can be as simple as a snippet of text (eg, a paragraph) or more complex structures such as tables or figures. The main goal of the annotation task is to obtain all relevant information from the data entity with regard to the domain of the corpus. Since different technical fields require different annotations, our annotation task is modular, allowing language entities to be annotated for material science, oil and gas, or more basic entities (eg, noun phrases, abbreviations, unit and values, etc.).", "text-hash": 13902418307602972721, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/50", - "hash": 1150871476689677866, "orig": "From a technical perspective, the language entities are detected and annotated using multiple NLU methods, ranging from complex regular expressions \u2020 to LSTM networks. 5,6 We employ state-of-the-art NLU toolkits such as Spacy 7 or NLTK \u2021 to train and apply custom named entity recognition models. A detailed investigation of these NLU annotators unfortunately goes beyond of the scope of this paper. However, in Figure 2, we show the different types of named (geological) entities found in a paragraph by our oil and gas annotation model.", "prov": [ { @@ -82959,13 +82811,13 @@ } ], "sref": "#/texts/49", + "subj_hash": 1150871476689677866, "text": "From a technical perspective, the language entities are detected and annotated using multiple NLU methods, ranging from complex regular expressions \u2020 to LSTM networks. 5,6 We employ state-of-the-art NLU toolkits such as Spacy 7 or NLTK \u2021 to train and apply custom named entity recognition models. A detailed investigation of these NLU annotators unfortunately goes beyond of the scope of this paper. However, in Figure 2, we show the different types of named (geological) entities found in a paragraph by our oil and gas annotation model.", "text-hash": 15370812655802342481, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/51", - "hash": 5163702913945903725, "orig": "In Listing 1, we also show an excerpt of how the annotations (both language entities and relationships) are stored in the backend. It is noteworthy here that relationships are stored as (weighted) links between two entity references. \u00a7 The usage of references reduces data duplication and more importantly ensures that the relationships are always defined between two known entities in the KG. The latter simplifies the aggregation of the relationships significantly, since no new entities need to be created in the KG in order to aggregate the relationships (see section 2.1.4).", "prov": [ { @@ -82973,13 +82825,13 @@ } ], "sref": "#/texts/50", + "subj_hash": 5163702913945903725, "text": "In Listing 1, we also show an excerpt of how the annotations (both language entities and relationships) are stored in the backend. It is noteworthy here that relationships are stored as (weighted) links between two entity references. \u00a7 The usage of references reduces data duplication and more importantly ensures that the relationships are always defined between two known entities in the KG. The latter simplifies the aggregation of the relationships significantly, since no new entities need to be created in the KG in order to aggregate the relationships (see section 2.1.4).", "text-hash": 11348986383696847000, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/52", - "hash": 5462319091745771382, "orig": "FIGURE 2 Illustration of various detected language entities in a particularly rich snippet of an AAPG abstract. 8 The language entities here are all related to geological concepts in the domain of oil and gas exploration", "prov": [ { @@ -82987,13 +82839,13 @@ } ], "sref": "#/texts/51", + "subj_hash": 5462319091745771382, "text": "FIGURE 2 Illustration of various detected language entities in a particularly rich snippet of an AAPG abstract. 8 The language entities here are all related to geological concepts in the domain of oil and gas exploration", "text-hash": 11050304000116997517, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/53", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -83001,13 +82853,13 @@ } ], "sref": "#/texts/52", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/54", - "hash": 958124839653591304, "orig": "LISTING 1 Excerpt of the annotated abstract from an AAPG paper 8 with its original text and the detected entities and relationships. Note that relationships are typed (encoded in the field name) and weighted. The weight reflects the confidence of the language annotation model during extraction. Relationships are always defined on detected entities, and will therefore use references defining a link between two entities", "prov": [ { @@ -83015,13 +82867,13 @@ } ], "sref": "#/texts/53", + "subj_hash": 958124839653591304, "text": "LISTING 1 Excerpt of the annotated abstract from an AAPG paper 8 with its original text and the detected entities and relationships. Note that relationships are typed (encoded in the field name) and weighted. The weight reflects the confidence of the language annotation model during extraction. Relationships are always defined on detected entities, and will therefore use references defining a link between two entities", "text-hash": 15194258930241746739, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/55", - "hash": 1448405324616602032, "orig": "From a scaling perspective, this task is again embarrassingly parallel. Unlike the extraction task, the annotation task is not creating new data entities, but rather appending new data associated with an existing data entity. We simply apply the desired entity and relationship annotators on all document components (paragraphs, tables, etc.) in parallel by distributing the operations on all available compute resources. Annotation tasks have no internal synchronization points. From a corpus of about 100 000 documents, we typically extract about 3 million paragraphs. Assuming unlimited resources, the annotation task could be distributed to potentially 3 million independent workers.", "prov": [ { @@ -83029,13 +82881,13 @@ } ], "sref": "#/texts/54", + "subj_hash": 1448405324616602032, "text": "From a scaling perspective, this task is again embarrassingly parallel. Unlike the extraction task, the annotation task is not creating new data entities, but rather appending new data associated with an existing data entity. We simply apply the desired entity and relationship annotators on all document components (paragraphs, tables, etc.) in parallel by distributing the operations on all available compute resources. Annotation tasks have no internal synchronization points. From a corpus of about 100 000 documents, we typically extract about 3 million paragraphs. Assuming unlimited resources, the annotation task could be distributed to potentially 3 million independent workers.", "text-hash": 17018759417884348107, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/56", - "hash": 2617775076168299948, "orig": "2.1.3 | Aggregation of entities", "prov": [ { @@ -83043,13 +82895,13 @@ } ], "sref": "#/texts/55", + "subj_hash": 2617775076168299948, "text": "2.1.3 | Aggregation of entities", "text-hash": 18150799209915986647, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/57", - "hash": 13974986056043304735, "orig": "The aggregation task for entities is similar to an extraction task, in the sense that we create new entities and link them each to the source they were mentioned in. In addition to extraction, the entity aggregation task also applies a similarity metric \u00b6 between the entities during extraction. This similarity metric will define if two entities refer to the same language concept and thus need to be represented by a single entity in the KG, rather than remaining separated. In Figure 1, we have illustrated the aggregation task for two types of entities across many different document components. These entity types could be for example materials and properties or geological formations and geological ages. The links connecting the new entities to their source entity are weighted according to the frequency of the match, that is, we set a higher weight if the language entity has been found multiple times. From an implementation point of view, the aggregation task for entities is nontrivial. In distributed computing, it corresponds to a reduction operation. Our implementation distributes the iteration of the source elements among all available computational resources. The aggregation is first performed in a local buffer, which is then synchronized with the backend database only when it reaches a maximum size. The synchronization step is a simple atomic update into an existing (or a newly created) database object. The synchronization for updates from each worker task does not collide with the others.", "prov": [ { @@ -83057,13 +82909,13 @@ } ], "sref": "#/texts/56", + "subj_hash": 13974986056043304735, "text": "The aggregation task for entities is similar to an extraction task, in the sense that we create new entities and link them each to the source they were mentioned in. In addition to extraction, the entity aggregation task also applies a similarity metric \u00b6 between the entities during extraction. This similarity metric will define if two entities refer to the same language concept and thus need to be represented by a single entity in the KG, rather than remaining separated. In Figure 1, we have illustrated the aggregation task for two types of entities across many different document components. These entity types could be for example materials and properties or geological formations and geological ages. The links connecting the new entities to their source entity are weighted according to the frequency of the match, that is, we set a higher weight if the language entity has been found multiple times. From an implementation point of view, the aggregation task for entities is nontrivial. In distributed computing, it corresponds to a reduction operation. Our implementation distributes the iteration of the source elements among all available computational resources. The aggregation is first performed in a local buffer, which is then synchronized with the backend database only when it reaches a maximum size. The synchronization step is a simple atomic update into an existing (or a newly created) database object. The synchronization for updates from each worker task does not collide with the others.", "text-hash": 2253911354578933030, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/58", - "hash": 5985285694705576020, "orig": "2.1.4 | Aggregation of relationships", "prov": [ { @@ -83071,13 +82923,13 @@ } ], "sref": "#/texts/57", + "subj_hash": 5985285694705576020, "text": "2.1.4 | Aggregation of relationships", "text-hash": 12765605759878485615, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/59", - "hash": 11235296141350659290, "orig": "The aggregation of relationships introduces new links between the entities that were aggregated in the previous aggregation operation. In Figure 1, this task is depicted as the last operation, where entities with an annotated relationship are explicitly linked together. For example, we create an edge between the Egret-Hibernia Petroleum System and Jeanne D'Arc Basin from Listing 1.", "prov": [ { @@ -83085,13 +82937,13 @@ } ], "sref": "#/texts/58", + "subj_hash": 11235296141350659290, "text": "The aggregation of relationships introduces new links between the entities that were aggregated in the previous aggregation operation. In Figure 1, this task is depicted as the last operation, where entities with an annotated relationship are explicitly linked together. For example, we create an edge between the Egret-Hibernia Petroleum System and Jeanne D'Arc Basin from Listing 1.", "text-hash": 7583169921155047905, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/60", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -83099,13 +82951,13 @@ } ], "sref": "#/texts/59", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/61", - "hash": 4361549266576336732, "orig": "6of15", "prov": [ { @@ -83113,13 +82965,13 @@ } ], "sref": "#/texts/60", + "subj_hash": 4361549266576336732, "text": "6of15", "text-hash": 329104147615819111, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/62", - "hash": 5771309285006424458, "orig": "Similar to the aggregation of entities, the aggregation task for relationships is a reduction operation. Two independent document components could describe the same relationship between two entities. To minimize the synchronization lookup operation with the backend database, this task also utilizes a local buffer which accumulates the changes to be committed to the KG until the maximum size is reached. This approach allows to distribute the computation among all the source document components and performs very few blocking operations in the backend database.", "prov": [ { @@ -83127,13 +82979,13 @@ } ], "sref": "#/texts/61", + "subj_hash": 5771309285006424458, "text": "Similar to the aggregation of entities, the aggregation task for relationships is a reduction operation. Two independent document components could describe the same relationship between two entities. To minimize the synchronization lookup operation with the backend database, this task also utilizes a local buffer which accumulates the changes to be committed to the KG until the maximum size is reached. This approach allows to distribute the computation among all the source document components and performs very few blocking operations in the backend database.", "text-hash": 12691372718925440689, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/63", - "hash": 5371685212527510397, "orig": "2.2 | Data flows", "prov": [ { @@ -83141,13 +82993,13 @@ } ], "sref": "#/texts/62", + "subj_hash": 5371685212527510397, "text": "2.2 | Data flows", "text-hash": 11140938221338345864, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/64", - "hash": 7817257645383866853, "orig": "The purpose of a DF is to provide an execution plan for the task types detailed above in a meaningful order to generate or update a specific KG. When instantiating a DF, one has the possibility to define in a declarative way:", "prov": [ { @@ -83155,13 +83007,13 @@ } ], "sref": "#/texts/63", + "subj_hash": 7817257645383866853, "text": "The purpose of a DF is to provide an execution plan for the task types detailed above in a meaningful order to generate or update a specific KG. When instantiating a DF, one has the possibility to define in a declarative way:", "text-hash": 12955841367339550496, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/65", - "hash": 2929626768872004841, "orig": "1. Which document components should be extracted from a converted corpus to form source entities (eg, extract all paragraphs, tables, figures and captions from the AAPG articles)?", "prov": [ { @@ -83169,13 +83021,13 @@ } ], "sref": "#/texts/64", + "subj_hash": 2929626768872004841, "text": "1. Which document components should be extracted from a converted corpus to form source entities (eg, extract all paragraphs, tables, figures and captions from the AAPG articles)?", "text-hash": 17906500337671162388, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/66", - "hash": 15879756297712818143, "orig": "2. Which annotator model(s) to use on which type of source entity (eg, run the geology or material science annotators on paragraphs)?", "prov": [ { @@ -83183,13 +83035,13 @@ } ], "sref": "#/texts/65", + "subj_hash": 15879756297712818143, "text": "2. Which annotator model(s) to use on which type of source entity (eg, run the geology or material science annotators on paragraphs)?", "text-hash": 2573988876245521638, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/67", - "hash": 16116531546352845311, "orig": "3. Which entity and relationship aggregations to perform on which set of annotated language entities?", "prov": [ { @@ -83197,13 +83049,13 @@ } ], "sref": "#/texts/66", + "subj_hash": 16116531546352845311, "text": "3. Which entity and relationship aggregations to perform on which set of annotated language entities?", "text-hash": 2702000589258555142, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/68", - "hash": 9541434157786316356, "orig": "The DFs can thus be seen as blueprints for processing the corpus into a defined graph topology. Notably, our implementation of DFs and their tasks retains the flexibility of processing not only source documents of a well-known data schema such as from CCS, but virtually any structure that can be transformed to a JSON representation, including data entities from precurated databases. We designed the CPS platform to support export and import of DFs on entirely new datasets without the burden of recreating it from scratch.", "prov": [ { @@ -83211,13 +83063,13 @@ } ], "sref": "#/texts/67", + "subj_hash": 9541434157786316356, "text": "The DFs can thus be seen as blueprints for processing the corpus into a defined graph topology. Notably, our implementation of DFs and their tasks retains the flexibility of processing not only source documents of a well-known data schema such as from CCS, but virtually any structure that can be transformed to a JSON representation, including data entities from precurated databases. We designed the CPS platform to support export and import of DFs on entirely new datasets without the burden of recreating it from scratch.", "text-hash": 6610972392363355263, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/69", - "hash": 997682002692959482, "orig": "Our backend engine can exploit the DAG defined through the DF to massively distribute the individual tasks on all compute resources, because independent branches of the DAG each containing a chain of tasks can execute in parallel. The achievable level of parallelism changes throughout the execution. A practical example is a DF which extracts paragraphs and abstracts from all documents in the corpus, then annotates them and finally aggregates all entities. Here, the extraction tasks are distributed only over all documents; then, in the annotation tasks, we increase the parallelism to all document components. Any synchronization points thus can be pushed back into the aggregation tasks.", "prov": [ { @@ -83225,13 +83077,13 @@ } ], "sref": "#/texts/68", + "subj_hash": 997682002692959482, "text": "Our backend engine can exploit the DAG defined through the DF to massively distribute the individual tasks on all compute resources, because independent branches of the DAG each containing a chain of tasks can execute in parallel. The achievable level of parallelism changes throughout the execution. A practical example is a DF which extracts paragraphs and abstracts from all documents in the corpus, then annotates them and finally aggregates all entities. Here, the extraction tasks are distributed only over all documents; then, in the annotation tasks, we increase the parallelism to all document components. Any synchronization points thus can be pushed back into the aggregation tasks.", "text-hash": 15235788623540001281, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/70", - "hash": 11590138063543342276, "orig": "3 | DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS", "prov": [ { @@ -83239,13 +83091,13 @@ } ], "sref": "#/texts/69", + "subj_hash": 11590138063543342276, "text": "3 | DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS", "text-hash": 9254996552431571455, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/71", - "hash": 16380310806374538602, "orig": "We will now look into the requirements to perform deep data exploration on a populated Knowledge Graph. A deep data exploration requires two fundamental capabilities:", "prov": [ { @@ -83253,13 +83105,13 @@ } ], "sref": "#/texts/70", + "subj_hash": 16380310806374538602, "text": "We will now look into the requirements to perform deep data exploration on a populated Knowledge Graph. A deep data exploration requires two fundamental capabilities:", "text-hash": 4676441280076073873, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/72", - "hash": 5393976293631695754, "orig": "1. perform deep queries on the graph, that is, queries that require multi-hop traversals and", "prov": [ { @@ -83267,13 +83119,13 @@ } ], "sref": "#/texts/71", + "subj_hash": 5393976293631695754, "text": "1. perform deep queries on the graph, that is, queries that require multi-hop traversals and", "text-hash": 11127633169729292465, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/73", - "hash": 1988335831916069382, "orig": "2. perform graph analytics on the full graph or subsets of it on-the-fly.", "prov": [ { @@ -83281,13 +83133,13 @@ } ], "sref": "#/texts/72", + "subj_hash": 1988335831916069382, "text": "2. perform graph analytics on the full graph or subsets of it on-the-fly.", "text-hash": 16834701212347777085, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/74", - "hash": 5147764798816678886, "orig": "Deep queries are essential to dynamically combine independent facts together in the given query context. This would apply for example to explorational queries aimed to characterize petroleum system elements, as detailed in our case study (see section 5). Graph analytics can further reveal hidden structure in the KG topology. Examples of advanced graphanalytical operations are page rank, node centralities, 9,10 node clustering, spectral analysis, and label propagation.", "prov": [ { @@ -83295,13 +83147,13 @@ } ], "sref": "#/texts/73", + "subj_hash": 5147764798816678886, "text": "Deep queries are essential to dynamically combine independent facts together in the given query context. This would apply for example to explorational queries aimed to characterize petroleum system elements, as detailed in our case study (see section 5). Graph analytics can further reveal hidden structure in the KG topology. Examples of advanced graphanalytical operations are page rank, node centralities, 9,10 node clustering, spectral analysis, and label propagation.", "text-hash": 11297301064675504413, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/75", - "hash": 285583876932865368, "orig": "Both deep queries and graph analytics have in common that they are inherently expensive to compute on conventional graph databases, due to a rapid expansion of the number of visited nodes as a function of the graph-traversal depth. This is a major obstacle in providing reasonable time-to-solution in the aforementioned cases. Virtually all established graph database products on the market today ** fall victim to this, as was also reported in multiple sources. 11,12 Due to the poor performance we observed with available graph databases, we developed a new graph engine for the CPS platform. This graph engine is able to execute advanced graph-analytics 2 as well as evaluate deep queries with multi-hop traversals on large graphs (>1B edges) extremely fast.", "prov": [ { @@ -83309,13 +83161,13 @@ } ], "sref": "#/texts/74", + "subj_hash": 285583876932865368, "text": "Both deep queries and graph analytics have in common that they are inherently expensive to compute on conventional graph databases, due to a rapid expansion of the number of visited nodes as a function of the graph-traversal depth. This is a major obstacle in providing reasonable time-to-solution in the aforementioned cases. Virtually all established graph database products on the market today ** fall victim to this, as was also reported in multiple sources. 11,12 Due to the poor performance we observed with available graph databases, we developed a new graph engine for the CPS platform. This graph engine is able to execute advanced graph-analytics 2 as well as evaluate deep queries with multi-hop traversals on large graphs (>1B edges) extremely fast.", "text-hash": 16231538415772072803, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/76", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -83323,13 +83175,13 @@ } ], "sref": "#/texts/75", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/77", - "hash": 4361549257370278754, "orig": "7of15", "prov": [ { @@ -83337,13 +83189,13 @@ } ], "sref": "#/texts/76", + "subj_hash": 4361549257370278754, "text": "7of15", "text-hash": 329104161989101977, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/78", - "hash": 13183039880198077038, "orig": "In the remaining part of this section, we elaborate on our newly developed graph engine. In section 3.1, we discuss the implementation design. In section 3.2, we discuss performance results and compare it to Neo4J. Later, in section 3.3, we will explain how the deep queries are formulated and evaluated in the graph engine.", "prov": [ { @@ -83351,13 +83203,13 @@ } ], "sref": "#/texts/77", + "subj_hash": 13183039880198077038, "text": "In the remaining part of this section, we elaborate on our newly developed graph engine. In section 3.1, we discuss the implementation design. In section 3.2, we discuss performance results and compare it to Neo4J. Later, in section 3.3, we will explain how the deep queries are formulated and evaluated in the graph engine.", "text-hash": 10251595290936699029, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/79", - "hash": 13428900458866068249, "orig": "3.1 | Design of the graph engine", "prov": [ { @@ -83365,13 +83217,13 @@ } ], "sref": "#/texts/78", + "subj_hash": 13428900458866068249, "text": "3.1 | Design of the graph engine", "text-hash": 9938197928077211940, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/80", - "hash": 1430911655724119030, "orig": "In computer science, two prevalent implementation schemes for graphs have emerged, one using adjacency lists and one relying on adjacency matrices. 13,14 In the adjacency list format, every node is essentially an object which contains a set of indices representing its neighbors. \u2020\u2020 The edges are therefore stored as a property of the node. In the adjacency matrix approach, all nodes obtain an identifier (typically an unsigned integer) and the edges are stored as a list of nodeidentifier tuples.", "prov": [ { @@ -83379,13 +83231,13 @@ } ], "sref": "#/texts/79", + "subj_hash": 1430911655724119030, "text": "In computer science, two prevalent implementation schemes for graphs have emerged, one using adjacency lists and one relying on adjacency matrices. 13,14 In the adjacency list format, every node is essentially an object which contains a set of indices representing its neighbors. \u2020\u2020 The edges are therefore stored as a property of the node. In the adjacency matrix approach, all nodes obtain an identifier (typically an unsigned integer) and the edges are stored as a list of nodeidentifier tuples.", "text-hash": 17396562708416737549, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/81", - "hash": 13770706479324480755, "orig": "It is commonly known that most graph operations can be translated into matrix-operations using linear algebra. 13 For example, consider the graph-traversal V ! A W, in which we start from a set of nodes V and traverse the edge A in order to obtain a new set of nodes W. This can be directly translated into linear algebra as", "prov": [ { @@ -83393,13 +83245,13 @@ } ], "sref": "#/texts/80", + "subj_hash": 13770706479324480755, "text": "It is commonly known that most graph operations can be translated into matrix-operations using linear algebra. 13 For example, consider the graph-traversal V ! A W, in which we start from a set of nodes V and traverse the edge A in order to obtain a new set of nodes W. This can be directly translated into linear algebra as", "text-hash": 9596444718520353290, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/82", - "hash": 11165481757050847950, "orig": "w $^{!}$= Av ! with v $^{!}$$_{i}$= 1 if node i \\b V 0 if node i = 2 V , GLYPH \u00f0 1 \u00de", "prov": [ { @@ -83407,13 +83259,13 @@ } ], "sref": "#/texts/81", + "subj_hash": 11165481757050847950, "text": "w $^{!}$= Av ! with v $^{!}$$_{i}$= 1 if node i \\b V 0 if node i = 2 V , GLYPH \u00f0 1 \u00de", "text-hash": 7657471412122468341, "type": "equation" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/83", - "hash": 9572077971492738329, "orig": "and with A being the adjacency matrix representation of the edge A. Translating single graph-traversals into linear algebra operations significantly simplifies the job of deeper graph traversals. For example, to obtain the k-order neighborhood of node set V, one simply needs to evaluate Equation (1) k times recursively, as in", "prov": [ { @@ -83421,13 +83273,13 @@ } ], "sref": "#/texts/82", + "subj_hash": 9572077971492738329, "text": "and with A being the adjacency matrix representation of the edge A. Translating single graph-traversals into linear algebra operations significantly simplifies the job of deeper graph traversals. For example, to obtain the k-order neighborhood of node set V, one simply needs to evaluate Equation (1) k times recursively, as in", "text-hash": 6656818579934057252, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/84", - "hash": 14951391138799557075, "orig": "w $^{!}$= A$^{k}$v $^{!}$= AA \u2026 Av ! GLYPHGLYPH GLYPH GLYPH GLYPH GLYPH : \u00f0 2 \u00de", "prov": [ { @@ -83435,13 +83287,13 @@ } ], "sref": "#/texts/83", + "subj_hash": 14951391138799557075, "text": "w $^{!}$= A$^{k}$v $^{!}$= AA \u2026 Av ! GLYPHGLYPH GLYPH GLYPH GLYPH GLYPH : \u00f0 2 \u00de", "text-hash": 1498163960925914858, "type": "equation" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/85", - "hash": 16602156009514813718, "orig": "Therefore, deep queries can be implemented efficiently as long as Equation (1) can be evaluated efficiently. Over the past decades, lots of research has been conducted in the High Performance Computing community on the acceleration and parallelization of Equation (1) in the context of graphs. In this context, the matrix A is sparse and the linear operation of Equation (1) is referred to as a sparse matrix vector multiplication (SpMV), for which highly optimized implementations have been developed. 15,16 Notably, most advanced graph-analytical operations can be formulated using SpMV operations. The most trivial case is page-rank, in which one recursively executes Equation (1) in combination with a renormalization until w ! is equal to v $^{!}$. In our previous work, 2 we have also shown in detail that advanced graph-analytical operations such as node centralities and spectral analysis of the graph can be done effectively with only SpMV operations.", "prov": [ { @@ -83449,13 +83301,13 @@ } ], "sref": "#/texts/84", + "subj_hash": 16602156009514813718, "text": "Therefore, deep queries can be implemented efficiently as long as Equation (1) can be evaluated efficiently. Over the past decades, lots of research has been conducted in the High Performance Computing community on the acceleration and parallelization of Equation (1) in the context of graphs. In this context, the matrix A is sparse and the linear operation of Equation (1) is referred to as a sparse matrix vector multiplication (SpMV), for which highly optimized implementations have been developed. 15,16 Notably, most advanced graph-analytical operations can be formulated using SpMV operations. The most trivial case is page-rank, in which one recursively executes Equation (1) in combination with a renormalization until w ! is equal to v $^{!}$. In our previous work, 2 we have also shown in detail that advanced graph-analytical operations such as node centralities and spectral analysis of the graph can be done effectively with only SpMV operations.", "text-hash": 4445641728881669933, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/86", - "hash": 7162849562576593449, "orig": "Since both deep queries and advanced graph analytics hugely benefit from a fast SpMV kernel, we have opted to design the graph engine in the CPS platform to work entirely with the adjacency matrix format.", "prov": [ { @@ -83463,13 +83315,13 @@ } ], "sref": "#/texts/85", + "subj_hash": 7162849562576593449, "text": "Since both deep queries and advanced graph analytics hugely benefit from a fast SpMV kernel, we have opted to design the graph engine in the CPS platform to work entirely with the adjacency matrix format.", "text-hash": 13884895358995816532, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/87", - "hash": 15385417954505503552, "orig": "3.2 | Memory architecture and performance optimization", "prov": [ { @@ -83477,13 +83329,13 @@ } ], "sref": "#/texts/86", + "subj_hash": 15385417954505503552, "text": "3.2 | Memory architecture and performance optimization", "text-hash": 3140380205981200763, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/88", - "hash": 10815650641518265876, "orig": "Both adjacency lists and adjacency matrices-based graph implementations have specific advantages and disadvantages. The adjacency list format is very well suited for node-centric operations since it exploits data-locality for local graph operations, such as first order traversals. However, it proves suboptimal for global scale graph operations, which are required for deep queries and the advanced graph analytics. Here, one typically has to perform graph-traversals starting from many (or even all) nodes and accumulating the weight in the resulting nodes. In an adjacency list format, this often leads to many cache misses during execution, resulting in low performance. Furthermore, parallelizing global graph-traversals in the adjacency list format suffers significantly from concurrent write conflicts between threads during execution. In the adjacency matrix format, these problems are not encountered. The graph-traversals can be directly translated into a SpMV or even a sparse-matrix sparse-vector multiplication (SpMSpV). It has also been well established how to execute the SpMV effectively in a multithreaded fashion, and how to minimize cache-misses by applying a clever sorting of the tuples list. 17", "prov": [ { @@ -83491,13 +83343,13 @@ } ], "sref": "#/texts/87", + "subj_hash": 10815650641518265876, "text": "Both adjacency lists and adjacency matrices-based graph implementations have specific advantages and disadvantages. The adjacency list format is very well suited for node-centric operations since it exploits data-locality for local graph operations, such as first order traversals. However, it proves suboptimal for global scale graph operations, which are required for deep queries and the advanced graph analytics. Here, one typically has to perform graph-traversals starting from many (or even all) nodes and accumulating the weight in the resulting nodes. In an adjacency list format, this often leads to many cache misses during execution, resulting in low performance. Furthermore, parallelizing global graph-traversals in the adjacency list format suffers significantly from concurrent write conflicts between threads during execution. In the adjacency matrix format, these problems are not encountered. The graph-traversals can be directly translated into a SpMV or even a sparse-matrix sparse-vector multiplication (SpMSpV). It has also been well established how to execute the SpMV effectively in a multithreaded fashion, and how to minimize cache-misses by applying a clever sorting of the tuples list. 17", "text-hash": 7939832404963099695, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/89", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -83505,13 +83357,13 @@ } ], "sref": "#/texts/88", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/91", - "hash": 12004249365408683930, "orig": "To illustrate the advantages of the adjacency matrix format for our needs, we show the time-to-solution (TTS) for queries with increasing order of traversals for Neo4J \u2021\u2021 and our graph engine in Figure 3. We computed a k-hop traversal query on the graph500 \u00a7\u00a7 (64M edges) and twitter-graph \u00b6\u00b6 (1.5B edges). Two important observations can be made. Firstly, our graph engine is able to run easily third, fourth, and even higher-order graph traversals. With Neo4J, this proves very difficult, as the TTS grows upwards of 1 hour. Secondly, our graph engine shows minimal variance in the TTS between all runs of the k-order graph-traversals. This is in stark contrast to Neo4J, where the TTS strongly depends on which node(s) one starts from.", "prov": [ { @@ -83519,13 +83371,13 @@ } ], "sref": "#/texts/89", + "subj_hash": 12004249365408683930, "text": "To illustrate the advantages of the adjacency matrix format for our needs, we show the time-to-solution (TTS) for queries with increasing order of traversals for Neo4J \u2021\u2021 and our graph engine in Figure 3. We computed a k-hop traversal query on the graph500 \u00a7\u00a7 (64M edges) and twitter-graph \u00b6\u00b6 (1.5B edges). Two important observations can be made. Firstly, our graph engine is able to run easily third, fourth, and even higher-order graph traversals. With Neo4J, this proves very difficult, as the TTS grows upwards of 1 hour. Secondly, our graph engine shows minimal variance in the TTS between all runs of the k-order graph-traversals. This is in stark contrast to Neo4J, where the TTS strongly depends on which node(s) one starts from.", "text-hash": 9124629550221661345, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/92", - "hash": 7223381657047466215, "orig": "Another big advantage of using the adjacency matrix format is that we can exploit advanced compression methods 18 such as CSR or blocked COO. This reduces significantly the memory footprint of the graph and allows bigger graphs to be hosted entirely in-memory. In our case, we have opted to represent the edges by blocked matrices of a fixed size, in which each block matrix is of type COO. We chose the size of the block-matrix to be 2 16 = 65 536, allowing a pair of indices to be compactly represented by two unsigned short integers. Consequently, an edge has a memory footprint of only 4 bytes (equivalent to a single 32-bit integer), while a weighted edge a footprint of 8 bytes. *** This is a significant reduction in memory footprint compared to Neo4J graph databases, which use 33 bytes for unweighted edges $^{\u2020\u2020\u2020}$). Consequently, we can host graphs of close to 8 billion edges on a virtual machine with 32 GB of free memory, and even close to one trillion edges on a bare-metal POWER9 node with 4 TB of memory.", "prov": [ { @@ -83533,13 +83385,13 @@ } ], "sref": "#/texts/90", + "subj_hash": 7223381657047466215, "text": "Another big advantage of using the adjacency matrix format is that we can exploit advanced compression methods 18 such as CSR or blocked COO. This reduces significantly the memory footprint of the graph and allows bigger graphs to be hosted entirely in-memory. In our case, we have opted to represent the edges by blocked matrices of a fixed size, in which each block matrix is of type COO. We chose the size of the block-matrix to be 2 16 = 65 536, allowing a pair of indices to be compactly represented by two unsigned short integers. Consequently, an edge has a memory footprint of only 4 bytes (equivalent to a single 32-bit integer), while a weighted edge a footprint of 8 bytes. *** This is a significant reduction in memory footprint compared to Neo4J graph databases, which use 33 bytes for unweighted edges $^{\u2020\u2020\u2020}$). Consequently, we can host graphs of close to 8 billion edges on a virtual machine with 32 GB of free memory, and even close to one trillion edges on a bare-metal POWER9 node with 4 TB of memory.", "text-hash": 13549646715324792350, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/93", - "hash": 15132906055887224772, "orig": "3.3 | Formulation and evaluation of deep queries", "prov": [ { @@ -83547,13 +83399,13 @@ } ], "sref": "#/texts/91", + "subj_hash": 15132906055887224772, "text": "3.3 | Formulation and evaluation of deep queries", "text-hash": 3609048564712975615, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/94", - "hash": 17129434987283608290, "orig": "The goal of querying a KG is to answer complex questions. As such, users need to be provided with a functionality to formulate complex queries on the KG and quickly evaluate them.", "prov": [ { @@ -83561,13 +83413,13 @@ } ], "sref": "#/texts/92", + "subj_hash": 17129434987283608290, "text": "The goal of querying a KG is to answer complex questions. As such, users need to be provided with a functionality to formulate complex queries on the KG and quickly evaluate them.", "text-hash": 3711217782201102361, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/95", - "hash": 10350406469077463155, "orig": "In order to avoid imposing a complex query language onto users, we have devised a way to define complex graph queries in a declarative format, which we call a workflow. Workflows are represented as a DAG of operations and are conceptually related to DFs. Unlike the former, the nodes of workflow DAGs do not represent data-transformation tasks, but specific graph operations which mutate an input (or intermediate) set of nodes into another set. We call these operations worktasks. For further convenience, we have developed a graphical user interface (UI) which allows to define such workflows in a visual programming approach (see Figure 4).", "prov": [ { @@ -83575,13 +83427,13 @@ } ], "sref": "#/texts/93", + "subj_hash": 10350406469077463155, "text": "In order to avoid imposing a complex query language onto users, we have devised a way to define complex graph queries in a declarative format, which we call a workflow. Workflows are represented as a DAG of operations and are conceptually related to DFs. Unlike the former, the nodes of workflow DAGs do not represent data-transformation tasks, but specific graph operations which mutate an input (or intermediate) set of nodes into another set. We call these operations worktasks. For further convenience, we have developed a graphical user interface (UI) which allows to define such workflows in a visual programming approach (see Figure 4).", "text-hash": 6157696558870441610, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/96", - "hash": 16949854269270315165, "orig": "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions. In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design.", "prov": [ { @@ -83589,13 +83441,13 @@ } ], "sref": "#/texts/94", + "subj_hash": 16949854269270315165, "text": "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions. In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design.", "text-hash": 4111476184068705704, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/97", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -83603,13 +83455,13 @@ } ], "sref": "#/texts/95", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/98", - "hash": 4361549266593946746, "orig": "9of15", "prov": [ { @@ -83617,13 +83469,13 @@ } ], "sref": "#/texts/96", + "subj_hash": 4361549266593946746, "text": "9of15", "text-hash": 329104147597527681, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/100", - "hash": 9802652237802670052, "orig": "3.3.1 | Node retrieval", "prov": [ { @@ -83631,13 +83483,13 @@ } ], "sref": "#/texts/97", + "subj_hash": 9802652237802670052, "text": "3.3.1 | Node retrieval", "text-hash": 6349660887815587103, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/101", - "hash": 5524728206729419689, "orig": "This task finds a set of nodes which satisfy certain search criteria. This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property. The task constructs a node vector v $^{!}$, such that", "prov": [ { @@ -83645,13 +83497,13 @@ } ], "sref": "#/texts/98", + "subj_hash": 5524728206729419689, "text": "This task finds a set of nodes which satisfy certain search criteria. This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property. The task constructs a node vector v $^{!}$, such that", "text-hash": 10699646946138261716, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/102", - "hash": 4043385013945968936, "orig": "v $^{!}$$_{i}$= 1 if node i \\b S 0 if node i = 2 S , GLYPH \u00f0 3 \u00de", "prov": [ { @@ -83659,13 +83511,13 @@ } ], "sref": "#/texts/99", + "subj_hash": 4043385013945968936, "text": "v $^{!}$$_{i}$= 1 if node i \\b S 0 if node i = 2 S , GLYPH \u00f0 3 \u00de", "text-hash": 588808569772103507, "type": "equation" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/103", - "hash": 11778884428660217326, "orig": "where S represents the set of nodes that satisfy the search criteria.", "prov": [ { @@ -83673,13 +83525,13 @@ } ], "sref": "#/texts/100", + "subj_hash": 11778884428660217326, "text": "where S represents the set of nodes that satisfy the search criteria.", "text-hash": 9277850099981357845, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/104", - "hash": 12875050310340408203, "orig": "3.3.2 | Graph traversal", "prov": [ { @@ -83687,13 +83539,13 @@ } ], "sref": "#/texts/101", + "subj_hash": 12875050310340408203, "text": "3.3.2 | Graph traversal", "text-hash": 10555101842315227314, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/105", - "hash": 3785875504044487339, "orig": "The simplest type of graph-traversal is the direct graph-traversal. As explained in detail in section 3.1, these can be implemented as a straightforward SpMV operation w $^{!}$= Av $^{!}$. In more advanced types of graph-traversals, we evaluate all paths of different depth. Since the number of paths connecting two nodes might increase exponentially with the pathlength, one typically reduces the contribution of each path by weighting it with the inverse factorial of the path-length. For example, consider the case in which we want to explore deeper, indirect paths as follows,", "prov": [ { @@ -83701,13 +83553,13 @@ } ], "sref": "#/texts/102", + "subj_hash": 3785875504044487339, "text": "The simplest type of graph-traversal is the direct graph-traversal. As explained in detail in section 3.1, these can be implemented as a straightforward SpMV operation w $^{!}$= Av $^{!}$. In more advanced types of graph-traversals, we evaluate all paths of different depth. Since the number of paths connecting two nodes might increase exponentially with the pathlength, one typically reduces the contribution of each path by weighting it with the inverse factorial of the path-length. For example, consider the case in which we want to explore deeper, indirect paths as follows,", "text-hash": 909351913600217042, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/106", - "hash": 12105626155924658285, "orig": "w $^{!}$= A + A 2 2 ! + A 3 3 ! + GLYPH GLYPH GLYPH GLYPH GLYPH v $^{!}$= e$^{A}$\u2212 1 GLYPH GLYPH v $^{!}$: \u00f0 4 \u00de", "prov": [ { @@ -83715,13 +83567,13 @@ } ], "sref": "#/texts/103", + "subj_hash": 12105626155924658285, "text": "w $^{!}$= A + A 2 2 ! + A 3 3 ! + GLYPH GLYPH GLYPH GLYPH GLYPH v $^{!}$= e$^{A}$- 1 GLYPH GLYPH v $^{!}$: \u00f0 4 \u00de", "text-hash": 9027673695254677144, "type": "equation" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/107", - "hash": 16265612055607243129, "orig": "In its most generic case, a graph-traversal can therefore be written down as a matrix-function applied on an edge, that is, w $^{!}$= fA \u00f0 \u00de v $^{!}$. As discussed in detail in previous work, 2 this type of operation can be evaluated extremely efficiently using a recursive Chebyshev polynomial expansion.", "prov": [ { @@ -83729,13 +83581,13 @@ } ], "sref": "#/texts/104", + "subj_hash": 16265612055607243129, "text": "In its most generic case, a graph-traversal can therefore be written down as a matrix-function applied on an edge, that is, w $^{!}$= fA \u00f0 \u00de v $^{!}$. As discussed in detail in previous work, 2 this type of operation can be evaluated extremely efficiently using a recursive Chebyshev polynomial expansion.", "text-hash": 4579475315408875396, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/108", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -83743,13 +83595,13 @@ } ], "sref": "#/texts/105", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/109", - "hash": 10252446451495472512, "orig": "3.3.3 | Logical operations", "prov": [ { @@ -83757,13 +83609,13 @@ } ], "sref": "#/texts/106", + "subj_hash": 10252446451495472512, "text": "3.3.3 | Logical operations", "text-hash": 6188098459342469819, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/110", - "hash": 17011944206067158637, "orig": "In logical operations, two sets of nodes are merged into one resulting set, each represented through a node vector. There are three common logical operations, AND, OR, and NOT. In the AND and OR operations, we compute the geometric or the arithmetic mean respectively for each pairwise elements in the vectors. In the NOT operation, we inverse the sign for each element of the input vector.", "prov": [ { @@ -83771,13 +83623,13 @@ } ], "sref": "#/texts/107", + "subj_hash": 17011944206067158637, "text": "In logical operations, two sets of nodes are merged into one resulting set, each represented through a node vector. There are three common logical operations, AND, OR, and NOT. In the AND and OR operations, we compute the geometric or the arithmetic mean respectively for each pairwise elements in the vectors. In the NOT operation, we inverse the sign for each element of the input vector.", "text-hash": 3756558606376352920, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/111", - "hash": 16289627123982758705, "orig": "3.3.4 | Transform functions", "prov": [ { @@ -83785,13 +83637,13 @@ } ], "sref": "#/texts/108", + "subj_hash": 16289627123982758705, "text": "3.3.4 | Transform functions", "text-hash": 4767177430745297228, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/112", - "hash": 13969801897340997317, "orig": "Lastly, we implement operations which transform the weights associated with nodes. One such operation renormalizes and ultimately ranks the nodes according to their weight.", "prov": [ { @@ -83799,13 +83651,13 @@ } ], "sref": "#/texts/109", + "subj_hash": 13969801897340997317, "text": "Lastly, we implement operations which transform the weights associated with nodes. One such operation renormalizes and ultimately ranks the nodes according to their weight.", "text-hash": 2263647560089238528, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/113", - "hash": 105697770555684555, "orig": "With these four types of operations, we can express rich queries to answer complex questions, which can have multiple inputs and outputs. Let us now discuss how a workflow is evaluated within the graph engine. Once a workflow has been submitted, each worktask is initially assigned a vector. These vectors are all initialized to zero (v $^{!}$$_{i}$= 0). Next, the graph will analyze the DAG of worktasks and identify which tasks can be run in parallel. This is achieved by performing a topological sort using depth-first traversal, which yields a list in which each item is a set of tasks that can be executed in parallel. The graph engine then proceeds with the parallel task computations.", "prov": [ { @@ -83813,13 +83665,13 @@ } ], "sref": "#/texts/110", + "subj_hash": 105697770555684555, "text": "With these four types of operations, we can express rich queries to answer complex questions, which can have multiple inputs and outputs. Let us now discuss how a workflow is evaluated within the graph engine. Once a workflow has been submitted, each worktask is initially assigned a vector. These vectors are all initialized to zero (v $^{!}$$_{i}$= 0). Next, the graph will analyze the DAG of worktasks and identify which tasks can be run in parallel. This is achieved by performing a topological sort using depth-first traversal, which yields a list in which each item is a set of tasks that can be executed in parallel. The graph engine then proceeds with the parallel task computations.", "text-hash": 16051124526605366258, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/114", - "hash": 15938840672015995359, "orig": "For each task, we obtain a set of nodes with corresponding weights by identifying the nonzero elements in the associated node vector. After executing the full workflow, we therefore obtain for each task a list of nodes which can be sorted according to their weights. The higher the weight of the node, the more relevant this node is. As such, we can also retrace which nodes were important in each stage of the workflow.", "prov": [ { @@ -83827,13 +83679,13 @@ } ], "sref": "#/texts/111", + "subj_hash": 15938840672015995359, "text": "For each task, we obtain a set of nodes with corresponding weights by identifying the nonzero elements in the associated node vector. After executing the full workflow, we therefore obtain for each task a list of nodes which can be sorted according to their weights. The higher the weight of the node, the more relevant this node is. As such, we can also retrace which nodes were important in each stage of the workflow.", "text-hash": 2523894108122369766, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/115", - "hash": 16505790528099785698, "orig": "4 | CLOUD DESIGN AND DEPLOYMENT", "prov": [ { @@ -83841,13 +83693,13 @@ } ], "sref": "#/texts/112", + "subj_hash": 16505790528099785698, "text": "4 | CLOUD DESIGN AND DEPLOYMENT", "text-hash": 4262729847538649369, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/116", - "hash": 14738723905055920039, "orig": "The primary deployment target for the CPS is a cloud environment orchestrated via Kubernetes. We package the full platform assets with a Helm chart for quick deployment on multiple setups. For example we can easily deploy the platform on the IBM Cloud or on-premise in an IBM Cloud Private instance, both on x86-and POWER-based nodes.", "prov": [ { @@ -83855,13 +83707,13 @@ } ], "sref": "#/texts/113", + "subj_hash": 14738723905055920039, "text": "The primary deployment target for the CPS is a cloud environment orchestrated via Kubernetes. We package the full platform assets with a Helm chart for quick deployment on multiple setups. For example we can easily deploy the platform on the IBM Cloud or on-premise in an IBM Cloud Private instance, both on x86-and POWER-based nodes.", "text-hash": 1485721651435830494, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/117", - "hash": 5699550326698755904, "orig": "In Figure 5, we show the high-level cloud design of the CPS. The platform allows to manage and instrument the corpus processing in a multitenant fashion, that is, it handles multiple knowledge ingestion pipelines and it serves multiple knowledge graphs. We call each unit a Knowledge Graph Space (KGS), which consists of a dedicated instance of the graph engine, a dedicated MongoDB database and a bucket on a cloud object store (COS). A dashboard allows each project owner to manage the access and the usage of resources. The KGS can be launched into multiple flavors to optimally balance the utilization of the cluster. These flavors range from a virtual machine with small amount of memory to a full dedicated node including hardware acceleration with GPUs. Once a KGS is created, it can be paused and rescaled without loss of data or downtime.", "prov": [ { @@ -83869,13 +83721,13 @@ } ], "sref": "#/texts/114", + "subj_hash": 5699550326698755904, "text": "In Figure 5, we show the high-level cloud design of the CPS. The platform allows to manage and instrument the corpus processing in a multitenant fashion, that is, it handles multiple knowledge ingestion pipelines and it serves multiple knowledge graphs. We call each unit a Knowledge Graph Space (KGS), which consists of a dedicated instance of the graph engine, a dedicated MongoDB database and a bucket on a cloud object store (COS). A dashboard allows each project owner to manage the access and the usage of resources. The KGS can be launched into multiple flavors to optimally balance the utilization of the cluster. These flavors range from a virtual machine with small amount of memory to a full dedicated node including hardware acceleration with GPUs. Once a KGS is created, it can be paused and rescaled without loss of data or downtime.", "text-hash": 10750023430231115131, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/118", - "hash": 11609131422778723150, "orig": "For the KG creation pipeline, we implemented an asynchronous compute scheme we already use in our CCS solution. 1 The system is exposed to the user via an API frontend which communicates to the compute workers through a message broker and a result backend. The workers operate on the data, which is hosted on a NoSQL database and a cloud object store for data blobs. These workers are dynamically scaled by the cloud orchestrator to best match the current load of the platform.", "prov": [ { @@ -83883,13 +83735,13 @@ } ], "sref": "#/texts/115", + "subj_hash": 11609131422778723150, "text": "For the KG creation pipeline, we implemented an asynchronous compute scheme we already use in our CCS solution. 1 The system is exposed to the user via an API frontend which communicates to the compute workers through a message broker and a result backend. The workers operate on the data, which is hosted on a NoSQL database and a cloud object store for data blobs. These workers are dynamically scaled by the cloud orchestrator to best match the current load of the platform.", "text-hash": 9163968380151462261, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/119", - "hash": 788128893109726279, "orig": "The processing of the KG creation typically starts with the user submitting the DF to the frontend API. The DAG of operations is then interpreted as described in the previous section and fine-grained tasks are submitted to the broker, for example, the whole corpus is split in many independent chunks. The user receives an overall status from the API and is notified when the DF processing has completed.", "prov": [ { @@ -83897,13 +83749,13 @@ } ], "sref": "#/texts/116", + "subj_hash": 788128893109726279, "text": "The processing of the KG creation typically starts with the user submitting the DF to the frontend API. The DAG of operations is then interpreted as described in the previous section and fine-grained tasks are submitted to the broker, for example, the whole corpus is split in many independent chunks. The user receives an overall status from the API and is notified when the DF processing has completed.", "text-hash": 15724564631854553726, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/120", - "hash": 7029344862946908483, "orig": "The KG data are distributed between three storage solutions: a NoSQL database, a cloud object storage (COS) and the KGS. Each node is represented as a document in a NoSQL database which contains all the properties attached to the node, for example, the text of a paragraph. If there is a binary object attached to the node, for example, the PDF document or an image, this is stored on the COS. The KGS contains only the minimal information needed to execute the queries, that is, the connectivity of the graph and the properties which are indexed for filtering and search.", "prov": [ { @@ -83911,13 +83763,13 @@ } ], "sref": "#/texts/117", + "subj_hash": 7029344862946908483, "text": "The KG data are distributed between three storage solutions: a NoSQL database, a cloud object storage (COS) and the KGS. Each node is represented as a document in a NoSQL database which contains all the properties attached to the node, for example, the text of a paragraph. If there is a binary object attached to the node, for example, the PDF document or an image, this is stored on the COS. The KGS contains only the minimal information needed to execute the queries, that is, the connectivity of the graph and the properties which are indexed for filtering and search.", "text-hash": 13806805648097199994, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/121", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -83925,13 +83777,13 @@ } ], "sref": "#/texts/118", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/122", - "hash": 2144926686518491811, "orig": "11of15", "prov": [ { @@ -83939,13 +83791,13 @@ } ], "sref": "#/texts/119", + "subj_hash": 2144926686518491811, "text": "11of15", "text-hash": 16380805707549272026, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/124", - "hash": 18333396269095847693, "orig": "The KGS is exposed to the user via a REST API which is able to aggregate results collected from the different storage sources. To ensure decent performance when serving queries of multiple users, the graph engine can be dynamically scaled horizontally. Most workflow queries execute fast enough such that they can be responded from a synchronous request. Others, especially the graph analytics computations, are more expensive and return large amounts of data. Thus, these queries are executed through an asynchronous API and the results are paginated and streamed back to the user on completion.", "prov": [ { @@ -83953,13 +83805,13 @@ } ], "sref": "#/texts/120", + "subj_hash": 18333396269095847693, "text": "The KGS is exposed to the user via a REST API which is able to aggregate results collected from the different storage sources. To ensure decent performance when serving queries of multiple users, the graph engine can be dynamically scaled horizontally. Most workflow queries execute fast enough such that they can be responded from a synchronous request. Others, especially the graph analytics computations, are more expensive and return large amounts of data. Thus, these queries are executed through an asynchronous API and the results are paginated and streamed back to the user on completion.", "text-hash": 5024699355629880632, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/125", - "hash": 4030998538427149966, "orig": "5 | CASE STUDY: OIL AND GAS EXPLORATION", "prov": [ { @@ -83967,13 +83819,13 @@ } ], "sref": "#/texts/121", + "subj_hash": 4030998538427149966, "text": "5 | CASE STUDY: OIL AND GAS EXPLORATION", "text-hash": 956984534850296757, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/126", - "hash": 10295608624766759271, "orig": "Oil and gas exploration is a complex, technical field of expertise. Unfortunately, the data of many geological processes and entities is scattered across databases (public and proprietary) and corpora of documents, where it is often deeply embedded in text, tables, and figures. This is a serious impediment for efficient exploration of new oil and gas opportunities. For example, geographic information of geological structures can be found in NaturalEarthData, \u2021\u2021\u2021 while their history, evolution, and components (eg, formations with their age, rock-composition, and depth) are discussed in reports (governmental and proprietary) and scientific articles. As such, experts in oil and gas exploration often need to read many documents in order to find all the information of a certain geographic area and get a good understanding of its underlying geology.", "prov": [ { @@ -83981,13 +83833,13 @@ } ], "sref": "#/texts/122", + "subj_hash": 10295608624766759271, "text": "Oil and gas exploration is a complex, technical field of expertise. Unfortunately, the data of many geological processes and entities is scattered across databases (public and proprietary) and corpora of documents, where it is often deeply embedded in text, tables, and figures. This is a serious impediment for efficient exploration of new oil and gas opportunities. For example, geographic information of geological structures can be found in NaturalEarthData, \u2021\u2021\u2021 while their history, evolution, and components (eg, formations with their age, rock-composition, and depth) are discussed in reports (governmental and proprietary) and scientific articles. As such, experts in oil and gas exploration often need to read many documents in order to find all the information of a certain geographic area and get a good understanding of its underlying geology.", "text-hash": 6212506812498931614, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/127", - "hash": 10633780781731536747, "orig": "The main tasks of the experts working in oil and gas exploration are to identify potential new exploration sites. This is typically done by describing a basin or one of its sub-regions. In practice, ' describing a basin ' boils down to identifying all geological formations with their properties in the basin and investigating if these formations constitute a petroleum system. 19 In its most minimalistic form, a petroleum system is defined by three components: source, reservoir, and seal. The source is the rock formation in which the oil or gas was created. Once created, the oil or gas typically migrates to a porous reservoir rock, which holds the oil and gas. In order for the oil and gas not to escape, the reservoir needs to be covered by an impermeable rock formation which is called the seal. Each one of these components is comprised of one or more formations, with a certain age and rock composition. To identify a petroleum system in a certain geographical area, one has to find a candidate formation for each component (ie, reservoir, seal, and source) and observe that the properties of these components satisfy some well-established constraints. For example, the reservoir", "prov": [ { @@ -83995,13 +83847,13 @@ } ], "sref": "#/texts/123", + "subj_hash": 10633780781731536747, "text": "The main tasks of the experts working in oil and gas exploration are to identify potential new exploration sites. This is typically done by describing a basin or one of its sub-regions. In practice, ' describing a basin ' boils down to identifying all geological formations with their properties in the basin and investigating if these formations constitute a petroleum system. 19 In its most minimalistic form, a petroleum system is defined by three components: source, reservoir, and seal. The source is the rock formation in which the oil or gas was created. Once created, the oil or gas typically migrates to a porous reservoir rock, which holds the oil and gas. In order for the oil and gas not to escape, the reservoir needs to be covered by an impermeable rock formation which is called the seal. Each one of these components is comprised of one or more formations, with a certain age and rock composition. To identify a petroleum system in a certain geographical area, one has to find a candidate formation for each component (ie, reservoir, seal, and source) and observe that the properties of these components satisfy some well-established constraints. For example, the reservoir", "text-hash": 8189171326047604114, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/128", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -84009,13 +83861,13 @@ } ], "sref": "#/texts/124", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/129", - "hash": 1080447728722590413, "orig": "12", "prov": [ { @@ -84023,13 +83875,13 @@ } ], "sref": "#/texts/125", + "subj_hash": 1080447728722590413, "text": "12", "text-hash": 15441160910541481976, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/130", - "hash": 4361549257087816853, "orig": "of 15", "prov": [ { @@ -84037,13 +83889,13 @@ } ], "sref": "#/texts/126", + "subj_hash": 4361549257087816853, "text": "of 15", "text-hash": 329104161717916080, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/132", - "hash": 10195664788154887804, "orig": "formation has to have a lower depth than the seal formation. Another example of such constraints is that the age of the seal and reservoir has to be older than the source.", "prov": [ { @@ -84051,13 +83903,13 @@ } ], "sref": "#/texts/127", + "subj_hash": 10195664788154887804, "text": "formation has to have a lower depth than the seal formation. Another example of such constraints is that the age of the seal and reservoir has to be older than the source.", "text-hash": 5965659969661688967, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/133", - "hash": 7538054744015619336, "orig": "In order for the CPS platform to help the oil and gas explorationalists in their day-to-day job effectively, it needs to meet two objectives. On the one hand, it needs to create a consistent Knowledge Graph from a document corpus. This Knowledge Graph has to contain all geological formations with their respective properties (eg, geographical locations, depth, age, and rock composition). On the other hand, CPS needs to provide fast query responses, such that one can automatically retrieve potential components of petroleum systems and apply the constraints to filter out promising candidates.", "prov": [ { @@ -84065,13 +83917,13 @@ } ], "sref": "#/texts/128", + "subj_hash": 7538054744015619336, "text": "In order for the CPS platform to help the oil and gas explorationalists in their day-to-day job effectively, it needs to meet two objectives. On the one hand, it needs to create a consistent Knowledge Graph from a document corpus. This Knowledge Graph has to contain all geological formations with their respective properties (eg, geographical locations, depth, age, and rock composition). On the other hand, CPS needs to provide fast query responses, such that one can automatically retrieve potential components of petroleum systems and apply the constraints to filter out promising candidates.", "text-hash": 13307027925001159475, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/134", - "hash": 12426662601736619109, "orig": "During the development and implementation of custom NLU annotators in CPS for oil and gas exploration, the client team worked hand in hand with the IBM Research team to set up a controlled accuracy benchmark in which the key capabilities of the CPS can be quantified. The goal of the benchmark was to test the entire pipeline depicted in Figure 6, that is, from PDF document ingestion to a final, queryable KG. The key components of this specific pipeline are,", "prov": [ { @@ -84079,13 +83931,13 @@ } ], "sref": "#/texts/129", + "subj_hash": 12426662601736619109, "text": "During the development and implementation of custom NLU annotators in CPS for oil and gas exploration, the client team worked hand in hand with the IBM Research team to set up a controlled accuracy benchmark in which the key capabilities of the CPS can be quantified. The goal of the benchmark was to test the entire pipeline depicted in Figure 6, that is, from PDF document ingestion to a final, queryable KG. The key components of this specific pipeline are,", "text-hash": 8341863300316693152, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/135", - "hash": 4162783521620221579, "orig": "1. the conversion of PDF documents into JSON through CCS,", "prov": [ { @@ -84093,13 +83945,13 @@ } ], "sref": "#/texts/130", + "subj_hash": 4162783521620221579, "text": "1. the conversion of PDF documents into JSON through CCS,", "text-hash": 527957687390948274, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/136", - "hash": 5135259059216244866, "orig": "2. the creation of the KG in the CPS from the JSON documents, and", "prov": [ { @@ -84107,13 +83959,13 @@ } ], "sref": "#/texts/131", + "subj_hash": 5135259059216244866, "text": "2. the creation of the KG in the CPS from the JSON documents, and", "text-hash": 11300804242294087097, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/137", - "hash": 16998817296948099535, "orig": "3. the querying of the KG served by CPS to identify petroleum systems elements with their properties.", "prov": [ { @@ -84121,13 +83973,13 @@ } ], "sref": "#/texts/132", + "subj_hash": 16998817296948099535, "text": "3. the querying of the KG served by CPS to identify petroleum systems elements with their properties.", "text-hash": 4121058581451712246, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/138", - "hash": 1205649569241141618, "orig": "On the suggestion of the experts in the client team, the entire pipeline was run on the 1051 Field Evaluation Reports from the C&C Reservoirs \u00a7\u00a7\u00a7 dataset. The advantage of using this dataset for an accuracy benchmark is that each report includes two parts. One part is verbose text describing the history, evolution, and composition of the fields. The language used is of similar complexity to standard geological publications and thus a realistic challenge for our KG creation pipeline. The second part at the end of each report is comprised of tables which summarize the text and provide us the elements of the petroleum systems with their properties. Therefore, we ingest these reports into CCS and extract both text and tables. Then, by generating a KG only from the text and keeping the tables as ground-truth to compare answers of the KG queries against, we obtain a well-controlled, end-to-end accuracy benchmark.", "prov": [ { @@ -84135,13 +83987,13 @@ } ], "sref": "#/texts/133", + "subj_hash": 1205649569241141618, "text": "On the suggestion of the experts in the client team, the entire pipeline was run on the 1051 Field Evaluation Reports from the C&C Reservoirs \u00a7\u00a7\u00a7 dataset. The advantage of using this dataset for an accuracy benchmark is that each report includes two parts. One part is verbose text describing the history, evolution, and composition of the fields. The language used is of similar complexity to standard geological publications and thus a realistic challenge for our KG creation pipeline. The second part at the end of each report is comprised of tables which summarize the text and provide us the elements of the petroleum systems with their properties. Therefore, we ingest these reports into CCS and extract both text and tables. Then, by generating a KG only from the text and keeping the tables as ground-truth to compare answers of the KG queries against, we obtain a well-controlled, end-to-end accuracy benchmark.", "text-hash": 17333577132913364873, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/139", - "hash": 12257840490666828590, "orig": "For step (1) of the pipeline, we ingested all 1051 PDFs into CCS and visually annotated the document structure on 300 (out of 46 019) pages. This yielded a page model which accurately converted all documents to JSON format with a 99.7% recall and 99.3% precision in the converted structure. These numbers are in line with those reported in our previous works. 1 Importantly, very accurate conversion results are key to the resulting quality, since otherwise the language annotators will process incomplete data and eventually the relevance of query results will suffer.", "prov": [ { @@ -84149,13 +84001,13 @@ } ], "sref": "#/texts/134", + "subj_hash": 12257840490666828590, "text": "For step (1) of the pipeline, we ingested all 1051 PDFs into CCS and visually annotated the document structure on 300 (out of 46 019) pages. This yielded a page model which accurately converted all documents to JSON format with a 99.7% recall and 99.3% precision in the converted structure. These numbers are in line with those reported in our previous works. 1 Importantly, very accurate conversion results are key to the resulting quality, since otherwise the language annotators will process incomplete data and eventually the relevance of query results will suffer.", "text-hash": 8803415231465414997, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/140", - "hash": 7040847965650746591, "orig": "In step (2), we create the Knowledge Graph by executing a DF that will generate all the entities and relationships relevant to the geology domain. Our language annotator models trained for geology extract geographic areas, geological structures (eg, basins), formations, ages, rocks, petroleum systems, and their elements (PSE) (eg, seal, source, and reservoir). Overall, we extracted a total of 4597 PSEs, 8811 formations, 471 geological ages, and 64 rock types (relevant to the PSEs). The full processing performed at an average rate of 130 ms per page per worker core, on a system with three worker nodes each using four cores. Eventually, the KG included 679 296 edges connecting 116 662 nodes.", "prov": [ { @@ -84163,13 +84015,13 @@ } ], "sref": "#/texts/135", + "subj_hash": 7040847965650746591, "text": "In step (2), we create the Knowledge Graph by executing a DF that will generate all the entities and relationships relevant to the geology domain. Our language annotator models trained for geology extract geographic areas, geological structures (eg, basins), formations, ages, rocks, petroleum systems, and their elements (PSE) (eg, seal, source, and reservoir). Overall, we extracted a total of 4597 PSEs, 8811 formations, 471 geological ages, and 64 rock types (relevant to the PSEs). The full processing performed at an average rate of 130 ms per page per worker core, on a system with three worker nodes each using four cores. Eventually, the KG included 679 296 edges connecting 116 662 nodes.", "text-hash": 13799731378750663142, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/141", - "hash": 7927601225025519287, "orig": "In step (3), we query the Knowledge Graph using a tailored evaluation workflow. This workflow allows us to identify PSEs and their connected properties in the Knowledge Graph, for example, their age, formation and rock", "prov": [ { @@ -84177,13 +84029,13 @@ } ], "sref": "#/texts/136", + "subj_hash": 7927601225025519287, "text": "In step (3), we query the Knowledge Graph using a tailored evaluation workflow. This workflow allows us to identify PSEs and their connected properties in the Knowledge Graph, for example, their age, formation and rock", "text-hash": 13120217128072555470, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/142", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -84191,13 +84043,13 @@ } ], "sref": "#/texts/137", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/143", - "hash": 1080447728722590402, "orig": "13", "prov": [ { @@ -84205,13 +84057,13 @@ } ], "sref": "#/texts/138", + "subj_hash": 1080447728722590402, "text": "13", "text-hash": 15441160910541481977, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/144", - "hash": 4361549257087816853, "orig": "of 15", "prov": [ { @@ -84219,13 +84071,13 @@ } ], "sref": "#/texts/139", + "subj_hash": 4361549257087816853, "text": "of 15", "text-hash": 329104161717916080, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/147", - "hash": 8207961846673301043, "orig": "composition. In Figure 7, we visualize the DAG of this workflow. The final node weights are accumulated throughout the branches on the workflow and represent the relevance score of each node.", "prov": [ { @@ -84233,13 +84085,13 @@ } ], "sref": "#/texts/140", + "subj_hash": 8207961846673301043, "text": "composition. In Figure 7, we visualize the DAG of this workflow. The final node weights are accumulated throughout the branches on the workflow and represent the relevance score of each node.", "text-hash": 14933956665806015562, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/148", - "hash": 11998199584890640594, "orig": "To evaluate the correctness of the predicted PSE properties, we follow the standard practice of reporting the top-k accuracy. This is computed as the percentage in which any of the k highest ranked answers matches the expected answer, over all documents. In Table 1, we show the top-1, top-2, top-3, and top-5 accuracy for all properties of each petroleum system element. One can make two distinct observations. First, the top-1 numbers are in the range of 0.75-0.9, meaning that for 3 in 4 cases, the most relevant result predicted by the KG was correct (precision). Secondly, we observe that the top-5 numbers are very high (\u2265 0.97), showing that the system was able detect and aggregate most of the PSEs and their properties (recall). Thus, the recall of the language annotators in the KG creation pipeline was very satisfactory.", "prov": [ { @@ -84247,13 +84099,13 @@ } ], "sref": "#/texts/141", + "subj_hash": 11998199584890640594, "text": "To evaluate the correctness of the predicted PSE properties, we follow the standard practice of reporting the top-k accuracy. This is computed as the percentage in which any of the k highest ranked answers matches the expected answer, over all documents. In Table 1, we show the top-1, top-2, top-3, and top-5 accuracy for all properties of each petroleum system element. One can make two distinct observations. First, the top-1 numbers are in the range of 0.75-0.9, meaning that for 3 in 4 cases, the most relevant result predicted by the KG was correct (precision). Secondly, we observe that the top-5 numbers are very high (\u2265 0.97), showing that the system was able detect and aggregate most of the PSEs and their properties (recall). Thus, the recall of the language annotators in the KG creation pipeline was very satisfactory.", "text-hash": 9121677663017059817, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/149", - "hash": 16446129547721407877, "orig": "6 | CONCLUSIONS", "prov": [ { @@ -84261,13 +84113,13 @@ } ], "sref": "#/texts/142", + "subj_hash": 16446129547721407877, "text": "6 | CONCLUSIONS", "text-hash": 4326952903809379008, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/150", - "hash": 6720443978031524294, "orig": "With the introduction of the CPS platform, we demonstrate substantial benefit for domain experts and data scientists in exercising deep exploration of published knowledge in a fully integrated, yet modular cloud solution. CPS seamlessly connects to the CSS, complementing it with a highly scalable, automated pipeline to build consistent domain knowledge models and an intuitive, powerful approach to explorational queries and graph-scale analytics. This is accomplished through three fundamental design considerations: (1) We do not require manual data curation or annotation; (2) We built a scalable, efficient architecture to support the ingestion, processing and query workloads, all embedded in", "prov": [ { @@ -84275,13 +84127,13 @@ } ], "sref": "#/texts/143", + "subj_hash": 6720443978031524294, "text": "With the introduction of the CPS platform, we demonstrate substantial benefit for domain experts and data scientists in exercising deep exploration of published knowledge in a fully integrated, yet modular cloud solution. CPS seamlessly connects to the CSS, complementing it with a highly scalable, automated pipeline to build consistent domain knowledge models and an intuitive, powerful approach to explorational queries and graph-scale analytics. This is accomplished through three fundamental design considerations: (1) We do not require manual data curation or annotation; (2) We built a scalable, efficient architecture to support the ingestion, processing and query workloads, all embedded in", "text-hash": 11733208797674542845, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/151", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -84289,13 +84141,13 @@ } ], "sref": "#/texts/144", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/152", - "hash": 2144926730621142072, "orig": "14of15", "prov": [ { @@ -84303,13 +84155,13 @@ } ], "sref": "#/texts/145", + "subj_hash": 2144926730621142072, "text": "14of15", "text-hash": 16380805732317250115, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/153", - "hash": 14222671032550229818, "orig": "a single platform; and (3) We expose the capabilities through an intuitively consumable API and complementary UI tools.", "prov": [ { @@ -84317,13 +84169,13 @@ } ], "sref": "#/texts/146", + "subj_hash": 14222671032550229818, "text": "a single platform; and (3) We expose the capabilities through an intuitively consumable API and complementary UI tools.", "text-hash": 1925144237473465665, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/154", - "hash": 17486770941839589126, "orig": "In our oil and gas case study, we successfully verified our solution for a real-world application with the help of subject matter experts from a client team. Currently, CCS and CPS are actively used in more than five client engagements, most notably in the oil and gas industry as well as in the material science industry.", "prov": [ { @@ -84331,13 +84183,13 @@ } ], "sref": "#/texts/147", + "subj_hash": 17486770941839589126, "text": "In our oil and gas case study, we successfully verified our solution for a real-world application with the help of subject matter experts from a client team. Currently, CCS and CPS are actively used in more than five client engagements, most notably in the oil and gas industry as well as in the material science industry.", "text-hash": 5943448246547541309, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/155", - "hash": 16574813224778118841, "orig": "Future work will focus on processing public repositories such as the arXiv.org library, USPTO, and PubMed in order to make their content available to deep data exploration.", "prov": [ { @@ -84345,13 +84197,13 @@ } ], "sref": "#/texts/148", + "subj_hash": 16574813224778118841, "text": "Future work will focus on processing public repositories such as the arXiv.org library, USPTO, and PubMed in order to make their content available to deep data exploration.", "text-hash": 4472913868502496196, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/156", - "hash": 3356142343274371864, "orig": "DATA AVAILABILITY STATEMENT", "prov": [ { @@ -84359,13 +84211,13 @@ } ], "sref": "#/texts/149", + "subj_hash": 3356142343274371864, "text": "DATA AVAILABILITY STATEMENT", "text-hash": 17772737780533561635, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/157", - "hash": 4778022085288441371, "orig": "Data subject to third party restrictions.", "prov": [ { @@ -84373,13 +84225,13 @@ } ], "sref": "#/texts/150", + "subj_hash": 4778022085288441371, "text": "Data subject to third party restrictions.", "text-hash": 11662592888764396578, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/158", - "hash": 4361549257598904601, "orig": "ORCID", "prov": [ { @@ -84387,13 +84239,13 @@ } ], "sref": "#/texts/151", + "subj_hash": 4361549257598904601, "text": "ORCID", "text-hash": 329104162230294308, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/159", - "hash": 3523281823889115814, "orig": "Peter W. J. Staar https://orcid.org/0000-0002-8088-0823 Michele Dolfi https://orcid.org/0000-0001-7216-8505 Christoph Auer https://orcid.org/0000-0001-5761-0422", "prov": [ { @@ -84401,13 +84253,13 @@ } ], "sref": "#/texts/152", + "subj_hash": 3523281823889115814, "text": "Peter W. J. Staar https://orcid.org/0000-0002-8088-0823 Michele Dolfi https://orcid.org/0000-0001-7216-8505 Christoph Auer https://orcid.org/0000-0001-5761-0422", "text-hash": 1167445296370300893, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/160", - "hash": 8500729849894221215, "orig": "ENDNOTES", "prov": [ { @@ -84415,13 +84267,13 @@ } ], "sref": "#/texts/153", + "subj_hash": 8500729849894221215, "text": "ENDNOTES", "text-hash": 14650266124350583462, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/161", - "hash": 7813503946963688644, "orig": "* For example, ElasticSearch (https://www.elastic.co) and ApacheLucene (https://lucene.apache.org).", "prov": [ { @@ -84429,13 +84281,13 @@ } ], "sref": "#/texts/154", + "subj_hash": 7813503946963688644, "text": "* For example, ElasticSearch (https://www.elastic.co) and ApacheLucene (https://lucene.apache.org).", "text-hash": 12950565807350876671, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/162", - "hash": 9230987401345399746, "orig": "\u2020 Most language entities from a technical field are typically represented in a very specific, rigorous way that can be easily captured by regular expressions. We found that in practice, regular expressions often outperform DL models, since we can simply encode these representations.", "prov": [ { @@ -84443,13 +84295,13 @@ } ], "sref": "#/texts/155", + "subj_hash": 9230987401345399746, "text": "\u2020 Most language entities from a technical field are typically represented in a very specific, rigorous way that can be easily captured by regular expressions. We found that in practice, regular expressions often outperform DL models, since we can simply encode these representations.", "text-hash": 6930355155738437881, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/163", - "hash": 1997735398126013155, "orig": "\u2021 https://www.nltk.org", "prov": [ { @@ -84457,13 +84309,13 @@ } ], "sref": "#/texts/156", + "subj_hash": 1997735398126013155, "text": "\u2021 https://www.nltk.org", "text-hash": 16829787344811603994, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/164", - "hash": 13566764974477978642, "orig": "\u00a7 We follow the standard JSON-schema for references.", "prov": [ { @@ -84471,13 +84323,13 @@ } ], "sref": "#/texts/157", + "subj_hash": 13566764974477978642, "text": "\u00a7 We follow the standard JSON-schema for references.", "text-hash": 9498574747519310377, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/165", - "hash": 4925537010788978399, "orig": "\u00b6 A rather simple similarity metric is to perform a fuzzy comparison of the names of the newly found entities (ie, the name field found in Listing 1). A more sophisticated approach is to use word embeddings to identify if two concepts are similar.", "prov": [ { @@ -84485,13 +84337,13 @@ } ], "sref": "#/texts/158", + "subj_hash": 4925537010788978399, "text": "\u00b6 A rather simple similarity metric is to perform a fuzzy comparison of the names of the newly found entities (ie, the name field found in Listing 1). A more sophisticated approach is to use word embeddings to identify if two concepts are similar.", "text-hash": 11235784383716113382, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/166", - "hash": 16552665876195410077, "orig": "** For example Neo4J, Titan, JanusGraph, Amazon Neptune, and Arangodb.", "prov": [ { @@ -84499,13 +84351,13 @@ } ], "sref": "#/texts/159", + "subj_hash": 16552665876195410077, "text": "** For example Neo4J, Titan, JanusGraph, Amazon Neptune, and Arangodb.", "text-hash": 4287966239864749480, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/167", - "hash": 17579390613842440572, "orig": "\u2020\u2020 This memory architecture is clearly documented for Titan (http://s3.thinkaurelius.com/docs/titan/current/data-model.html) and Neo4J (http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html).", "prov": [ { @@ -84513,13 +84365,13 @@ } ], "sref": "#/texts/160", + "subj_hash": 17579390613842440572, "text": "\u2020\u2020 This memory architecture is clearly documented for Titan (http://s3.thinkaurelius.com/docs/titan/current/data-model.html) and Neo4J (http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html).", "text-hash": 5855266272999108487, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/168", - "hash": 722212543953276862, "orig": "\u2021\u2021 We chose Neo4J as a reference since it is currently the most popular graph database solution, see https://db-engines.com/en/ranking_ trend/graph+dbms", "prov": [ { @@ -84527,13 +84379,13 @@ } ], "sref": "#/texts/161", + "subj_hash": 722212543953276862, "text": "\u2021\u2021 We chose Neo4J as a reference since it is currently the most popular graph database solution, see https://db-engines.com/en/ranking_ trend/graph+dbms", "text-hash": 15713827668903361733, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/169", - "hash": 11085577343317113173, "orig": "\u00a7\u00a7 http://graph500.org/", "prov": [ { @@ -84541,13 +84393,13 @@ } ], "sref": "#/texts/162", + "subj_hash": 11085577343317113173, "text": "\u00a7\u00a7 http://graph500.org/", "text-hash": 7449211522826545008, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/170", - "hash": 1792096630133661292, "orig": "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html", "prov": [ { @@ -84555,13 +84407,13 @@ } ], "sref": "#/texts/163", + "subj_hash": 1792096630133661292, "text": "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html", "text-hash": 16747146533825186967, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/171", - "hash": 11462638369524745676, "orig": "*** We assume the weight can be represented by a float value.", "prov": [ { @@ -84569,13 +84421,13 @@ } ], "sref": "#/texts/164", + "subj_hash": 11462638369524745676, "text": "*** We assume the weight can be represented by a float value.", "text-hash": 7288340874592977655, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/172", - "hash": 16611805225457383637, "orig": "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", "prov": [ { @@ -84583,13 +84435,13 @@ } ], "sref": "#/texts/165", + "subj_hash": 16611805225457383637, "text": "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", "text-hash": 4512570954370983408, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/173", - "hash": 1531505125666754945, "orig": "\u2021\u2021\u2021 https://www.naturalearthdata.com/", "prov": [ { @@ -84597,13 +84449,13 @@ } ], "sref": "#/texts/166", + "subj_hash": 1531505125666754945, "text": "\u2021\u2021\u2021 https://www.naturalearthdata.com/", "text-hash": 16922240937803157180, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/174", - "hash": 15684389308320953629, "orig": "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/", "prov": [ { @@ -84611,13 +84463,13 @@ } ], "sref": "#/texts/167", + "subj_hash": 15684389308320953629, "text": "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/", "text-hash": 2845896203864732456, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/175", - "hash": 14590754343934702701, "orig": "REFERENCES", "prov": [ { @@ -84625,13 +84477,13 @@ } ], "sref": "#/texts/168", + "subj_hash": 14590754343934702701, "text": "REFERENCES", "text-hash": 1858797456585454232, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/176", - "hash": 10480452763767134455, "orig": "1. Staar Peter WJ, Michele D, Christoph A, Costas B. Corpus conversion service: a machine learning platform to ingest documents at scale. KDD '18. New York, NY: ACM; 2018:774-782.", "prov": [ { @@ -84639,13 +84491,13 @@ } ], "sref": "#/texts/169", + "subj_hash": 10480452763767134455, "text": "1. Staar Peter WJ, Michele D, Christoph A, Costas B. Corpus conversion service: a machine learning platform to ingest documents at scale. KDD '18. New York, NY: ACM; 2018:774-782.", "text-hash": 7982224532612302350, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/177", - "hash": 11866471329779366855, "orig": "2. Staar Peter WJ, Kl BP, Roxana I, et al. Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance. Chicago, IL: IEEE; 2016:812-821.", "prov": [ { @@ -84653,13 +84505,13 @@ } ], "sref": "#/texts/170", + "subj_hash": 11866471329779366855, "text": "2. Staar Peter WJ, Kl BP, Roxana I, et al. Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance. Chicago, IL: IEEE; 2016:812-821.", "text-hash": 8969674542364551422, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/178", - "hash": 6016885898370676469, "orig": "3. Matteo M, Christoph A, Val'ery W, et al. An information extraction and knowledge graph platform for accelerating biochemical discoveries. ArXiv.abs/1907.08400; 2019.", "prov": [ { @@ -84667,13 +84519,13 @@ } ], "sref": "#/texts/171", + "subj_hash": 6016885898370676469, "text": "3. Matteo M, Christoph A, Val'ery W, et al. An information extraction and knowledge graph platform for accelerating biochemical discoveries. ArXiv.abs/1907.08400; 2019.", "text-hash": 12797055744904705040, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/179", - "hash": 13946275785662847920, "orig": "4. Paolo R, Marco P, Floriana B, Peter S, Costas B. Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019). Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10. https://doi. org/10.2118/197610-MS.", "prov": [ { @@ -84681,13 +84533,13 @@ } ], "sref": "#/texts/172", + "subj_hash": 13946275785662847920, "text": "4. Paolo R, Marco P, Floriana B, Peter S, Costas B. Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019). Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10. https://doi. org/10.2118/197610-MS.", "text-hash": 2278118371277588683, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/180", - "hash": 7693798302433367973, "orig": "5. Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D. Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics; 2016.", "prov": [ { @@ -84695,13 +84547,13 @@ } ], "sref": "#/texts/173", + "subj_hash": 7693798302433367973, "text": "5. Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D. Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics; 2016.", "text-hash": 13426003943449777376, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/181", - "hash": 3109792572574236398, "orig": "6. Chiu Jason PC, Eric N. Named entity recognition with bidirectional LSTM-CNNs. TACL. 2016;4:357-370.", "prov": [ { @@ -84709,13 +84561,13 @@ } ], "sref": "#/texts/174", + "subj_hash": 3109792572574236398, "text": "6. Chiu Jason PC, Eric N. Named entity recognition with bidirectional LSTM-CNNs. TACL. 2016;4:357-370.", "text-hash": 17942512882695875605, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/182", - "hash": 8111170387462350170, "orig": "7. Matthew H, Ines M. spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing. To appear. 2017.", "prov": [ { @@ -84723,13 +84575,13 @@ } ], "sref": "#/texts/175", + "subj_hash": 8111170387462350170, "text": "7. Matthew H, Ines M. spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing. To appear. 2017.", "text-hash": 15035325662489879393, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/183", - "hash": 14682702346227170925, "orig": "8. Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!), a significant petroleum system, northern Grand Banks area, offshore eastern Canada. Am Assoc Pet Geol Bull. 2005;89(9):1203-1237.", "prov": [ { @@ -84737,13 +84589,13 @@ } ], "sref": "#/texts/176", + "subj_hash": 14682702346227170925, "text": "8. Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!), a significant petroleum system, northern Grand Banks area, offshore eastern Canada. Am Assoc Pet Geol Bull. 2005;89(9):1203-1237.", "text-hash": 1825488956803771544, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/184", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -84751,13 +84603,13 @@ } ], "sref": "#/texts/177", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/185", - "hash": 11430385775112165283, "orig": "9. Estrada E. Subgraph centrality in complex networks. Phys Rev E. 2005;71(5):056103.", "prov": [ { @@ -84765,13 +84617,13 @@ } ], "sref": "#/texts/178", + "subj_hash": 11430385775112165283, "text": "9. Estrada E. Subgraph centrality in complex networks. Phys Rev E. 2005;71(5):056103.", "text-hash": 7383629567386653914, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/186", - "hash": 5825495964576843004, "orig": "10. Estrada Ernesto, Higham Desmond J. (2010). Network Properties Revealed through Matrix Functions. SIAM Review, 52, (4), 696-714. http://dx.doi.org/10.1137/090761070.", "prov": [ { @@ -84779,13 +84631,13 @@ } ], "sref": "#/texts/179", + "subj_hash": 5825495964576843004, "text": "10. Estrada Ernesto, Higham Desmond J. (2010). Network Properties Revealed through Matrix Functions. SIAM Review, 52, (4), 696-714. http://dx.doi.org/10.1137/090761070.", "text-hash": 12713726337853489671, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/187", - "hash": 5698421097735371040, "orig": "11. Labs Redis. Benchmarking RedisGraph 1.0. 2019.", "prov": [ { @@ -84793,13 +84645,13 @@ } ], "sref": "#/texts/180", + "subj_hash": 5698421097735371040, "text": "11. Labs Redis. Benchmarking RedisGraph 1.0. 2019.", "text-hash": 10746649133789046619, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/188", - "hash": 5870535063942256428, "orig": "12. TigerGraph. Real-Time Deep Link Analytics. 2018.", "prov": [ { @@ -84807,13 +84659,13 @@ } ], "sref": "#/texts/181", + "subj_hash": 5870535063942256428, "text": "12. TigerGraph. Real-Time Deep Link Analytics. 2018.", "text-hash": 12596629408176592215, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/189", - "hash": 18196767266655606709, "orig": "13. Jeremy K, John G. Graph Algorithms in the Language of Linear Algebra. Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011.", "prov": [ { @@ -84821,13 +84673,13 @@ } ], "sref": "#/texts/182", + "subj_hash": 18196767266655606709, "text": "13. Jeremy K, John G. Graph Algorithms in the Language of Linear Algebra. Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011.", "text-hash": 4940703957630358736, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/190", - "hash": 3623403683642367845, "orig": "14. Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning (2015). Graphs, Matrices, and the GraphBLAS: Seven Good Reasons. Procedia Computer Science, 51, 2453-2462. http://dx.doi.org/10.1016/j.procs.2015.05.353.", "prov": [ { @@ -84835,13 +84687,13 @@ } ], "sref": "#/texts/183", + "subj_hash": 3623403683642367845, "text": "14. Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning (2015). Graphs, Matrices, and the GraphBLAS: Seven Good Reasons. Procedia Computer Science, 51, 2453-2462. http://dx.doi.org/10.1016/j.procs.2015.05.353.", "text-hash": 1288017376570396064, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/191", - "hash": 13936866850854297069, "orig": "15. Aydin B, Gilbert John R. The combinatorial BLAS: design, implementation, and applications. Int J High Perform Comput Appl. 2011;25 (4):496-509.", "prov": [ { @@ -84849,13 +84701,13 @@ } ], "sref": "#/texts/184", + "subj_hash": 13936866850854297069, "text": "15. Aydin B, Gilbert John R. The combinatorial BLAS: design, implementation, and applications. Int J High Perform Comput Appl. 2011;25 (4):496-509.", "text-hash": 2215522210708998936, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/192", - "hash": 8497015665124263236, "orig": "16. Jeremy K, Peter A, Bader David A, et al. Mathematical foundations of the GraphBLAS. 2016 IEEE HPEC. 2016; 1-9.", "prov": [ { @@ -84863,13 +84715,13 @@ } ], "sref": "#/texts/185", + "subj_hash": 8497015665124263236, "text": "16. Jeremy K, Peter A, Bader David A, et al. Mathematical foundations of the GraphBLAS. 2016 IEEE HPEC. 2016; 1-9.", "text-hash": 14644960259055240063, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/193", - "hash": 15947529491299956047, "orig": "17. Ariful A, Mathias J, Aydin B, Ng Esmond G. The reverse Cuthill-McKee algorithm in distributed-memory. 2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 2017: 22-31.", "prov": [ { @@ -84877,13 +84729,13 @@ } ], "sref": "#/texts/186", + "subj_hash": 15947529491299956047, "text": "17. Ariful A, Mathias J, Aydin B, Ng Esmond G. The reverse Cuthill-McKee algorithm in distributed-memory. 2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 2017: 22-31.", "text-hash": 2515131343544103798, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/194", - "hash": 14843401725435831033, "orig": "18. Rukhsana S, Anila U, Chughtai IR. Review of storage techniques for sparse matrices. 2005 Pakistan Section Multitopic Conference. 2005 1-7.", "prov": [ { @@ -84891,13 +84743,13 @@ } ], "sref": "#/texts/187", + "subj_hash": 14843401725435831033, "text": "18. Rukhsana S, Anila U, Chughtai IR. Review of storage techniques for sparse matrices. 2005 Pakistan Section Multitopic Conference. 2005 1-7.", "text-hash": 1389998498969001988, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/195", - "hash": 16676439669743530711, "orig": "19. Welte DH, Horsfield B, Baker DR. Petroleum and Basin Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling, Berlin Heidelberg: Springer-Verlag; 1997.", "prov": [ { @@ -84905,13 +84757,13 @@ } ], "sref": "#/texts/188", + "subj_hash": 16676439669743530711, "text": "19. Welte DH, Horsfield B, Baker DR. Petroleum and Basin Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling, Berlin Heidelberg: Springer-Verlag; 1997.", "text-hash": 4375808543141490670, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/196", - "hash": 2986547206451163051, "orig": "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora. Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", "prov": [ { @@ -84919,13 +84771,13 @@ } ], "sref": "#/texts/189", + "subj_hash": 2986547206451163051, "text": "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora. Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", "text-hash": 17781974298360978642, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/197", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "prov": [ { @@ -84933,6 +84785,7 @@ } ], "sref": "#/texts/190", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", "text-hash": 4975885909619128714, "type": "paragraph" diff --git a/tests/data/texts/references.nlp.jsonl b/tests/data/texts/references.nlp.jsonl index 0e900436..45fb5b21 100644 --- a/tests/data/texts/references.nlp.jsonl +++ b/tests/data/texts/references.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 14523797031010145779, "instances": {"data": [["reference", "citation-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, null, null, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3089690646178643593, 8143668872857370346, null, null, 4, 16, 4, 16, 3, 6, true, "J. Nagamatsu", "J. Nagamatsu"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 4958313984110456424, 12758216704979571657, null, null, 18, 29, 18, 29, 7, 10, true, "N. Nakagawa", "N. Nakagawa"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 11365016642846088308, 8423174012903247560, null, null, 31, 42, 31, 42, 11, 14, true, "T. Muranaka", "T. Muranaka"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3078907282781552519, 13717640772378057531, null, null, 44, 55, 44, 55, 15, 18, true, "Y. Zenitani", "Y. Zenitani"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 1988581422311921121, 4091673062415006471, null, null, 61, 72, 61, 72, 20, 23, true, "J. Akimitsu", "J. Akimitsu"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164561, null, null, 74, 120, 74, 120, 24, 32, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, null, null, 122, 128, 122, 128, 33, 34, true, "Nature", "Nature"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 4183773491823524238, "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, null, null, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, null, null, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, null, null, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, null, null, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, null, null, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, null, null, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, null, null, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, null, null, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, null, null, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, null, null, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, null, null, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, null, null, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, null, null, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["reference", "citation-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, null, null, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3089690646178643593, 8143668872857370346, null, null, 4, 16, 4, 16, 3, 6, true, "J. Nagamatsu", "J. Nagamatsu"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 4958313984110456424, 12758216704979571657, null, null, 18, 29, 18, 29, 7, 10, true, "N. Nakagawa", "N. Nakagawa"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 11365016642846088308, 8423174012903247560, null, null, 31, 42, 31, 42, 11, 14, true, "T. Muranaka", "T. Muranaka"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3078907282781552519, 13717640772378057531, null, null, 44, 55, 44, 55, 15, 18, true, "Y. Zenitani", "Y. Zenitani"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 1988581422311921121, 4091673062415006471, null, null, 61, 72, 61, 72, 20, 23, true, "J. Akimitsu", "J. Akimitsu"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164561, null, null, 74, 120, 74, 120, 24, 31, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, null, null, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, null, null, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14523797031010145779, "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, null, null, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, null, null, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, null, null, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, null, null, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, null, null, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, null, null, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, null, null, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, null, null, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, null, null, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, null, null, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, null, null, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, null, null, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, null, null, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, null, null, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4183773491823524238, "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/semantics.nlp.jsonl b/tests/data/texts/semantics.nlp.jsonl index dab1249f..80ea6157 100644 --- a/tests/data/texts/semantics.nlp.jsonl +++ b/tests/data/texts/semantics.nlp.jsonl @@ -1,7 +1,7 @@ -{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 7759316032128614217, "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", 7759316032128614217, "TEXT", "#", "reference", 0.48]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 14339562343989983509, "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", 14339562343989983509, "TEXT", "#", "meta-data", 0.88]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 18143996061359107703, "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", 18143996061359107703, "TEXT", "#", "meta-data", 0.71]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 11035282656876697300, "model-application": {"message": "success", "success": true}, "orig": "ABSTRACT", "properties": {"data": [["semantic", 11035282656876697300, "TEXT", "#", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 14993488697470108654, "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, null, null, 447, 449, 447, 449, 81, 83, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, null, null, 599, 602, 599, 602, 107, 110, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", 14993488697470108654, "TEXT", "#", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 164218115435155290, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 14523797031010145779, "instances": {"data": [["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, null, null, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481728, 16379900111711101126, null, null, 95, 97, 95, 97, 26, 28, true, "39", "39"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, null, null, 129, 132, 129, 132, 34, 37, true, "410", "410"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, null, null, 134, 136, 134, 136, 38, 40, true, "63", "63"], ["numval", "year", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, null, null, 138, 142, 138, 142, 41, 45, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "#", "hash": 4183773491823524238, "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, null, null, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, null, null, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, null, null, 223, 225, 222, 224, 64, 66, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, null, null, 227, 233, 226, 232, 67, 73, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, null, null, 235, 239, 234, 238, 74, 78, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", 7759316032128614217, "TEXT", "#", "reference", 0.48]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 7759316032128614217, "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", 14339562343989983509, "TEXT", "#", "meta-data", 0.88]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14339562343989983509, "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", 18143996061359107703, "TEXT", "#", "meta-data", 0.71]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 18143996061359107703, "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "ABSTRACT", "properties": {"data": [["semantic", 11035282656876697300, "TEXT", "#", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 11035282656876697300, "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, null, null, 447, 449, 447, 449, 81, 82, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, null, null, 599, 602, 599, 602, 106, 107, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", 14993488697470108654, "TEXT", "#", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14993488697470108654, "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 164218115435155290, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, null, null, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481728, 16379900111711101126, null, null, 95, 97, 95, 97, 26, 27, true, "39", "39"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, null, null, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, null, null, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["numval", "year", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, null, null, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14523797031010145779, "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, null, null, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, null, null, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, null, null, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, null, null, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, null, null, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4183773491823524238, "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/terms.nlp.jsonl b/tests/data/texts/terms.nlp.jsonl index 2054336e..b0c73917 100644 --- a/tests/data/texts/terms.nlp.jsonl +++ b/tests/data/texts/terms.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "hash": 9818235231875948258, "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, null, null, 0, 177, 0, 164, 0, 38, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, null, null, 7, 31, 7, 26, 1, 9, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, null, null, 16, 26, 16, 23, 4, 7, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206575305750373, 3269040892355287555, null, null, 16, 25, 16, 22, 4, 6, true, "[f\u0281\u0251\u0303s", "[f\u0281\u0251\u0303s"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, null, null, 27, 30, 24, 25, 7, 8, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, null, null, 48, 63, 43, 58, 12, 14, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, null, null, 64, 122, 59, 109, 14, 24, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, null, null, 73, 95, 68, 88, 17, 19, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, null, null, 96, 106, 89, 98, 19, 21, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, null, null, 107, 121, 99, 108, 21, 23, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, null, null, 123, 127, 110, 114, 25, 29, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, null, null, 124, 126, 111, 113, 26, 28, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, null, null, 128, 130, 115, 117, 29, 30, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, null, null, 133, 140, 120, 127, 31, 32, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, null, null, 141, 158, 128, 145, 32, 34, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, null, null, 159, 161, 146, 148, 34, 35, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, null, null, 162, 176, 149, 163, 35, 37, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, null, null, 170, 176, 157, 163, 36, 37, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, null, null, 178, 375, 165, 362, 38, 74, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, null, null, 186, 194, 173, 181, 40, 41, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, null, null, 195, 211, 182, 198, 41, 43, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, null, null, 204, 227, 191, 214, 42, 45, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, null, null, 216, 227, 203, 214, 44, 45, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, null, null, 228, 234, 215, 221, 45, 47, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, null, null, 235, 243, 222, 230, 47, 48, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 820203855428083856, 16279894764651307170, null, null, 252, 280, 239, 267, 50, 55, true, "Atlantic, Pacific and Indian", "Atlantic, Pacific and Indian"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, null, null, 252, 260, 239, 247, 50, 51, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, null, null, 262, 269, 249, 256, 52, 53, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 4553045173532721202, 17291436396596241777, null, null, 274, 287, 261, 274, 54, 56, true, "Indian oceans", "Indian oceans"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, null, null, 281, 293, 268, 280, 55, 60, true, "oceans,[XII]", "oceans,[XII]"], ["parenthesis", "square brackets", 9818235231875948258, "TEXT", "#", 1.0, 329104147687597164, 12284735790511259080, null, null, 288, 293, 275, 280, 57, 60, true, "[XII]", "[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895542235, 10796895691287030884, null, null, 289, 292, 276, 279, 58, 59, true, "XII", "XII"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 14650940714797320124, 6236592394333508229, null, null, 292, 300, 279, 287, 59, 61, true, "] giving", "] giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, null, null, 308, 314, 295, 301, 63, 65, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, null, null, 315, 361, 302, 348, 65, 70, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, null, null, 362, 368, 349, 355, 70, 72, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, null, null, 369, 374, 356, 361, 72, 73, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, null, null, 376, 637, 363, 624, 74, 125, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, null, null, 376, 410, 363, 397, 74, 78, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, null, null, 389, 395, 376, 382, 75, 76, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, null, null, 411, 415, 398, 402, 78, 79, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, null, null, 416, 438, 403, 425, 79, 82, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 79, 80, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 79, 80, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 81, 82, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 81, 82, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, null, null, 439, 445, 426, 432, 82, 84, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, null, null, 446, 451, 433, 438, 84, 85, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 86, 87, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 86, 87, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, null, null, 461, 467, 448, 454, 87, 89, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 92, 93, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 92, 93, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, null, null, 492, 498, 479, 485, 93, 95, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, null, null, 505, 521, 492, 508, 97, 100, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 97, 98, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 97, 98, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, null, null, 515, 521, 502, 508, 99, 100, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, null, null, 522, 528, 509, 515, 100, 102, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, null, null, 541, 558, 528, 545, 105, 108, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 105, 106, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 105, 106, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 107, 108, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 107, 108, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, null, null, 559, 565, 546, 552, 108, 110, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, null, null, 566, 571, 553, 558, 110, 111, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, null, null, 579, 594, 566, 581, 114, 116, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, null, null, 595, 603, 582, 590, 116, 118, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 118, 120, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 118, 120, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, null, null, 619, 625, 606, 612, 120, 122, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, null, null, 626, 636, 613, 623, 122, 124, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, null, null, 638, 961, 625, 948, 125, 183, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, null, null, 642, 659, 629, 646, 126, 128, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, null, null, 660, 667, 647, 654, 128, 129, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, null, null, 668, 676, 655, 663, 129, 131, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, null, null, 677, 682, 664, 669, 131, 132, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, null, null, 683, 689, 670, 676, 132, 134, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 134, 136, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 134, 136, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, null, null, 709, 717, 696, 704, 137, 139, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 139, 141, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 139, 141, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, null, null, 736, 742, 723, 729, 141, 143, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 143, 145, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 143, 145, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 147, 149, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 147, 149, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, null, null, 778, 798, 765, 785, 150, 152, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, null, null, 799, 806, 786, 793, 152, 153, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, null, null, 807, 820, 794, 807, 153, 155, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, null, null, 821, 823, 808, 810, 155, 156, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, null, null, 824, 864, 811, 851, 156, 163, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 156, 158, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 156, 158, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, null, null, 839, 851, 826, 838, 159, 161, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, null, null, 856, 864, 843, 851, 162, 163, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, null, null, 865, 871, 852, 858, 163, 165, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, null, null, 872, 886, 859, 873, 165, 167, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, null, null, 892, 910, 879, 897, 169, 172, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, null, null, 916, 928, 903, 915, 174, 176, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, null, null, 929, 931, 916, 918, 176, 177, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 177, 178, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 177, 178, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 180, 182, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 180, 182, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, null, null, 962, 1384, 949, 1371, 183, 289, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, null, null, 966, 991, 953, 978, 184, 187, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, null, null, 992, 1020, 979, 1007, 187, 194, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, null, null, 998, 1000, 985, 987, 189, 190, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, null, null, 1007, 1019, 994, 1006, 191, 193, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, null, null, 1021, 1025, 1008, 1012, 194, 195, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, null, null, 1028, 1036, 1015, 1023, 196, 197, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, null, null, 1037, 1041, 1024, 1028, 197, 198, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, null, null, 1042, 1044, 1029, 1031, 198, 199, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, null, null, 1045, 1052, 1032, 1039, 199, 206, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, null, null, 1053, 1056, 1040, 1043, 206, 208, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486414, 16516410147586311652, null, null, 1053, 1055, 1040, 1042, 206, 207, true, "km", "km"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235162, 2654033242220620585, null, null, 1055, 1056, 1042, 1043, 207, 208, true, "2", "2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, null, null, 1057, 1072, 1044, 1059, 208, 219, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, null, null, 1058, 1065, 1045, 1052, 209, 216, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, null, null, 1066, 1071, 1053, 1058, 216, 218, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, null, null, 1077, 1081, 1064, 1068, 220, 221, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, null, null, 1084, 1100, 1071, 1087, 222, 224, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, null, null, 1101, 1103, 1088, 1090, 224, 225, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, null, null, 1104, 1108, 1091, 1095, 225, 226, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, null, null, 1109, 1111, 1096, 1098, 226, 228, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, null, null, 1112, 1119, 1099, 1106, 228, 229, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, null, null, 1120, 1122, 1107, 1109, 229, 230, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, null, null, 1123, 1125, 1110, 1112, 230, 231, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, null, null, 1126, 1133, 1113, 1120, 231, 232, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, null, null, 1134, 1145, 1121, 1132, 232, 243, true, "2023.[5][8]", "2023.[5][8]"], ["numval", "year", 9818235231875948258, "TEXT", "#", 1.0, 389609625548777251, 4871157181485963100, null, null, 1134, 1138, 1121, 1125, 232, 236, true, "2023", "2023"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577901, 10796892691399633238, null, null, 1139, 1142, 1126, 1129, 237, 240, true, "[5]", "[5]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235157, 2654033131002543179, null, null, 1140, 1141, 1127, 1128, 238, 239, true, "5", "5"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577838, 10796892702691935623, null, null, 1142, 1145, 1129, 1132, 240, 243, true, "[8]", "[8]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235152, 2654033132467492508, null, null, 1143, 1144, 1130, 1131, 241, 242, true, "8", "8"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, null, null, 1146, 1152, 1133, 1139, 243, 244, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, null, null, 1153, 1155, 1140, 1142, 244, 245, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14782540711164886662, 14111360077134393327, null, null, 1158, 1170, 1145, 1157, 246, 248, true, "unitary semi", "unitary semi"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, null, null, 1166, 1183, 1153, 1170, 247, 250, true, "semi-presidential", "semi-presidential"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9493572096187311884, 17586523526652496832, null, null, 1171, 1192, 1158, 1179, 249, 251, true, "presidential republic", "presidential republic"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, null, null, 1193, 1197, 1180, 1184, 251, 252, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, null, null, 1202, 1209, 1189, 1196, 253, 254, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, null, null, 1210, 1212, 1197, 1199, 254, 255, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, null, null, 1213, 1218, 1200, 1205, 255, 256, true, "Paris", "Paris"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, null, null, 1224, 1233, 1211, 1220, 258, 261, true, "countrys", "country's"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263873511, null, null, 1224, 1231, 1211, 1218, 258, 259, true, "country", "country"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13491731564569135959, 5310634626438687925, null, null, 1232, 1246, 1219, 1233, 260, 263, true, "s largest city", "s largest city"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, null, null, 1251, 1286, 1238, 1273, 264, 269, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, null, null, 1269, 1286, 1256, 1273, 267, 269, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, null, null, 1288, 1311, 1275, 1298, 270, 274, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, null, null, 1312, 1319, 1299, 1306, 274, 275, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, null, null, 1320, 1383, 1307, 1370, 275, 288, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, null, null, 1320, 1329, 1307, 1316, 275, 276, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, null, null, 1331, 1335, 1318, 1322, 277, 278, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, null, null, 1337, 1345, 1324, 1332, 279, 280, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, null, null, 1347, 1352, 1334, 1339, 281, 282, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, null, null, 1354, 1362, 1341, 1349, 283, 284, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, null, null, 1364, 1374, 1351, 1361, 285, 286, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, null, null, 1379, 1383, 1366, 1370, 287, 288, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.93], ["semantic", 9818235231875948258, "TEXT", "#", "text", 0.82]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "hash": 4522339299074192207, "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, null, null, 0, 188, 0, 188, 0, 40, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, null, null, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, null, null, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, null, null, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301128091, null, null, 24, 33, 24, 33, 5, 6, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587921185, null, null, 34, 41, 34, 41, 6, 7, true, "pairing", "pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, null, null, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, null, null, 45, 53, 45, 53, 8, 11, true, "two-band", "two-band"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206570221100871, 2911818818181444888, null, null, 49, 55, 49, 55, 10, 12, true, "band s", "band s"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, null, null, 54, 60, 54, 60, 11, 14, true, "s-wave", "s-wave"], ["term", "enum-term-mark-2", 4522339299074192207, "TEXT", "#", 1.0, 8560127426779937860, 4026994879422986240, null, null, 56, 66, 56, 66, 13, 16, true, "wave and d", "wave and d"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625633602560, 14144633872330801396, null, null, 56, 60, 56, 60, 13, 14, true, "wave", "wave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, null, null, 65, 71, 65, 71, 15, 18, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5267005535915851615, 13852357345485708038, null, null, 67, 87, 67, 87, 17, 19, true, "wave superconductors", "wave superconductors"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, null, null, 88, 92, 88, 92, 19, 20, true, "with", "with"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, null, null, 93, 96, 93, 96, 20, 23, true, "D4h", "D4h"], ["numval", "ival", 4522339299074192207, "TEXT", "#", 1.0, 17767354399704235156, 8513040951015345484, null, null, 94, 95, 94, 95, 21, 22, true, "4", "4"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 2516792725790519961, 10765065347046652233, null, null, 95, 105, 95, 105, 22, 24, true, "h symmetry", "h symmetry"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, null, null, 106, 113, 106, 113, 24, 26, true, "in both", "in both"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, null, null, 114, 127, 114, 127, 26, 29, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183561878, null, null, 114, 118, 114, 118, 26, 27, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1366921581602115232, 15058186165846257397, null, null, 119, 137, 119, 137, 28, 30, true, "reversal invariant", "reversal invariant"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, null, null, 146, 148, 146, 148, 32, 33, true, "as", "as"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, null, null, 149, 162, 149, 162, 33, 36, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183567675, null, null, 149, 153, 149, 153, 33, 34, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16155708024079339904, 14846007814114510811, null, null, 154, 171, 154, 171, 35, 37, true, "reversal symmetry", "reversal symmetry"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, null, null, 172, 180, 172, 180, 37, 38, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, null, null, 181, 187, 181, 187, 38, 39, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, null, null, 189, 384, 189, 384, 40, 75, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, null, null, 193, 201, 193, 201, 41, 42, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, null, null, 202, 204, 202, 204, 42, 43, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, null, null, 205, 214, 205, 214, 43, 44, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, null, null, 215, 244, 215, 244, 44, 47, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, null, null, 249, 264, 249, 264, 48, 50, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, null, null, 265, 271, 265, 271, 50, 52, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, null, null, 272, 286, 272, 286, 52, 53, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, null, null, 288, 293, 288, 293, 54, 55, true, "nodes", "nodes"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, null, null, 298, 309, 298, 309, 56, 60, true, "(dis)appear", "(dis)appear"], ["parenthesis", "round brackets", 4522339299074192207, "TEXT", "#", 1.0, 329104053577713079, 7302082272979819201, null, null, 298, 303, 298, 303, 56, 59, true, "(dis)", "(dis)"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 12178341415895452094, 8713100074317547395, null, null, 299, 302, 299, 302, 57, 58, true, "dis", "dis"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 16381206574684919940, 8627590102959499799, null, null, 303, 309, 303, 309, 59, 60, true, "appear", "appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, null, null, 311, 316, 311, 316, 61, 62, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, null, null, 322, 327, 322, 327, 64, 65, true, "leave", "leave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, null, null, 328, 341, 328, 341, 65, 68, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4859188827321755536, 9887725278734779219, null, null, 333, 351, 333, 351, 67, 69, true, "symmetry locations", "symmetry locations"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, null, null, 357, 374, 357, 374, 70, 72, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, null, null, 375, 383, 375, 383, 72, 74, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, null, null, 385, 594, 385, 594, 75, 114, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, null, null, 398, 404, 398, 404, 77, 79, true, "in the", "in the"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, null, null, 405, 411, 405, 411, 79, 82, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 3545604367994270661, 11829255560935036292, null, null, 407, 416, 407, 416, 81, 83, true, "wave case", "wave case"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, null, null, 421, 425, 421, 425, 85, 86, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, null, null, 426, 430, 426, 430, 86, 87, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, null, null, 440, 454, 440, 454, 89, 91, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, null, null, 455, 475, 455, 475, 91, 93, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, null, null, 481, 490, 481, 490, 94, 95, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, null, null, 491, 498, 491, 498, 95, 96, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, null, null, 499, 508, 499, 508, 96, 97, true, "increases", "increases"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, null, null, 515, 526, 515, 526, 99, 102, true, "zero-energy", "zero-energy"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1885602650026083434, 12476719833465444023, null, null, 520, 534, 520, 534, 101, 103, true, "energy Andreev", "energy Andreev"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104159325585799, 66191664906118763, null, null, 535, 540, 535, 540, 103, 104, true, "bound", "bound"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433796974, null, null, 541, 547, 541, 547, 104, 105, true, "states", "states"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, null, null, 548, 555, 548, 555, 105, 107, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, null, null, 560, 570, 560, 570, 108, 109, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, null, null, 571, 573, 571, 573, 109, 110, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, null, null, 574, 593, 574, 593, 110, 113, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.87], ["semantic", 4522339299074192207, "TEXT", "#", "text", 0.97]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} +{"applied_models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, null, null, 0, 177, 0, 164, 0, 37, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, null, null, 7, 31, 7, 26, 1, 9, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, null, null, 16, 26, 16, 23, 4, 7, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206575305750373, 3269040892355287555, null, null, 16, 25, 16, 22, 4, 6, true, "[f\u0281\u0251\u0303s", "[f\u0281\u0251\u0303s"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, null, null, 27, 30, 24, 25, 7, 8, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, null, null, 48, 63, 43, 58, 12, 14, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, null, null, 64, 122, 59, 109, 14, 24, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, null, null, 73, 95, 68, 88, 17, 19, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, null, null, 96, 106, 89, 98, 19, 21, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, null, null, 107, 121, 99, 108, 21, 23, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, null, null, 123, 127, 110, 114, 25, 28, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, null, null, 124, 126, 111, 113, 26, 27, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, null, null, 128, 130, 115, 117, 28, 29, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, null, null, 133, 140, 120, 127, 30, 31, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, null, null, 141, 158, 128, 145, 31, 33, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, null, null, 159, 161, 146, 148, 33, 34, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, null, null, 162, 176, 149, 163, 34, 36, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, null, null, 170, 176, 157, 163, 35, 36, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, null, null, 178, 375, 165, 362, 37, 73, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, null, null, 186, 194, 173, 181, 39, 40, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, null, null, 195, 211, 182, 198, 40, 42, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, null, null, 204, 227, 191, 214, 41, 44, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, null, null, 216, 227, 203, 214, 43, 44, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, null, null, 228, 234, 215, 221, 44, 46, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, null, null, 235, 243, 222, 230, 46, 47, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 820203855428083856, 16279894764651307170, null, null, 252, 280, 239, 267, 49, 54, true, "Atlantic, Pacific and Indian", "Atlantic, Pacific and Indian"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, null, null, 252, 260, 239, 247, 49, 50, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, null, null, 262, 269, 249, 256, 51, 52, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 4553045173532721202, 17291436396596241777, null, null, 274, 287, 261, 274, 53, 55, true, "Indian oceans", "Indian oceans"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, null, null, 281, 293, 268, 280, 54, 59, true, "oceans,[XII]", "oceans,[XII]"], ["parenthesis", "square brackets", 9818235231875948258, "TEXT", "#", 1.0, 329104147687597164, 12284735790511259080, null, null, 288, 293, 275, 280, 56, 59, true, "[XII]", "[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895542235, 10796895691287030884, null, null, 289, 292, 276, 279, 57, 58, true, "XII", "XII"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 14650940714797320124, 6236592394333508229, null, null, 292, 300, 279, 287, 58, 60, true, "] giving", "] giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, null, null, 308, 314, 295, 301, 62, 64, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, null, null, 315, 361, 302, 348, 64, 69, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, null, null, 362, 368, 349, 355, 69, 71, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, null, null, 369, 374, 356, 361, 71, 72, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, null, null, 376, 637, 363, 624, 73, 124, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, null, null, 376, 410, 363, 397, 73, 77, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, null, null, 389, 395, 376, 382, 74, 75, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, null, null, 411, 415, 398, 402, 77, 78, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, null, null, 416, 438, 403, 425, 78, 81, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 78, 79, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 78, 79, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 80, 81, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 80, 81, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, null, null, 439, 445, 426, 432, 81, 83, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, null, null, 446, 451, 433, 438, 83, 84, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 85, 86, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 85, 86, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, null, null, 461, 467, 448, 454, 86, 88, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 91, 92, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 91, 92, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, null, null, 492, 498, 479, 485, 92, 94, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, null, null, 505, 521, 492, 508, 96, 99, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 96, 97, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 96, 97, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, null, null, 515, 521, 502, 508, 98, 99, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, null, null, 522, 528, 509, 515, 99, 101, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, null, null, 541, 558, 528, 545, 104, 107, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 104, 105, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 104, 105, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 106, 107, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 106, 107, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, null, null, 559, 565, 546, 552, 107, 109, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, null, null, 566, 571, 553, 558, 109, 110, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, null, null, 579, 594, 566, 581, 113, 115, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, null, null, 595, 603, 582, 590, 115, 117, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 117, 119, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 117, 119, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, null, null, 619, 625, 606, 612, 119, 121, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, null, null, 626, 636, 613, 623, 121, 123, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, null, null, 638, 961, 625, 948, 124, 182, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, null, null, 642, 659, 629, 646, 125, 127, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, null, null, 660, 667, 647, 654, 127, 128, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, null, null, 668, 676, 655, 663, 128, 130, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, null, null, 677, 682, 664, 669, 130, 131, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, null, null, 683, 689, 670, 676, 131, 133, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 133, 135, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 133, 135, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, null, null, 709, 717, 696, 704, 136, 138, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 138, 140, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 138, 140, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, null, null, 736, 742, 723, 729, 140, 142, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 142, 144, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 142, 144, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 146, 148, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 146, 148, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, null, null, 778, 798, 765, 785, 149, 151, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, null, null, 799, 806, 786, 793, 151, 152, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, null, null, 807, 820, 794, 807, 152, 154, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, null, null, 821, 823, 808, 810, 154, 155, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, null, null, 824, 864, 811, 851, 155, 162, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 155, 157, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 155, 157, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, null, null, 839, 851, 826, 838, 158, 160, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, null, null, 856, 864, 843, 851, 161, 162, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, null, null, 865, 871, 852, 858, 162, 164, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, null, null, 872, 886, 859, 873, 164, 166, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, null, null, 892, 910, 879, 897, 168, 171, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, null, null, 916, 928, 903, 915, 173, 175, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, null, null, 929, 931, 916, 918, 175, 176, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 176, 177, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 176, 177, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 179, 181, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 179, 181, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, null, null, 962, 1384, 949, 1371, 182, 276, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, null, null, 966, 991, 953, 978, 183, 186, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, null, null, 992, 1020, 979, 1007, 186, 193, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, null, null, 998, 1000, 985, 987, 188, 189, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, null, null, 1007, 1019, 994, 1006, 190, 192, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, null, null, 1021, 1025, 1008, 1012, 193, 194, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, null, null, 1028, 1036, 1015, 1023, 195, 196, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, null, null, 1037, 1041, 1024, 1028, 196, 197, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, null, null, 1042, 1044, 1029, 1031, 197, 198, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, null, null, 1045, 1052, 1032, 1039, 198, 201, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, null, null, 1053, 1056, 1040, 1043, 201, 203, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486414, 16516410147586311652, null, null, 1053, 1055, 1040, 1042, 201, 202, true, "km", "km"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235162, 2654033242220620585, null, null, 1055, 1056, 1042, 1043, 202, 203, true, "2", "2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, null, null, 1057, 1072, 1044, 1059, 203, 210, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, null, null, 1058, 1065, 1045, 1052, 204, 207, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, null, null, 1066, 1071, 1053, 1058, 207, 209, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, null, null, 1077, 1081, 1064, 1068, 211, 212, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, null, null, 1084, 1100, 1071, 1087, 213, 215, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, null, null, 1101, 1103, 1088, 1090, 215, 216, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, null, null, 1104, 1108, 1091, 1095, 216, 217, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, null, null, 1109, 1111, 1096, 1098, 217, 218, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, null, null, 1112, 1119, 1099, 1106, 218, 219, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, null, null, 1120, 1122, 1107, 1109, 219, 220, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, null, null, 1123, 1125, 1110, 1112, 220, 221, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, null, null, 1126, 1133, 1113, 1120, 221, 222, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, null, null, 1134, 1145, 1121, 1132, 222, 230, true, "2023.[5][8]", "2023.[5][8]"], ["numval", "year", 9818235231875948258, "TEXT", "#", 1.0, 389609625548777251, 4871157181485963100, null, null, 1134, 1138, 1121, 1125, 222, 223, true, "2023", "2023"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577901, 10796892691399633238, null, null, 1139, 1142, 1126, 1129, 224, 227, true, "[5]", "[5]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235157, 2654033131002543179, null, null, 1140, 1141, 1127, 1128, 225, 226, true, "5", "5"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577838, 10796892702691935623, null, null, 1142, 1145, 1129, 1132, 227, 230, true, "[8]", "[8]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235152, 2654033132467492508, null, null, 1143, 1144, 1130, 1131, 228, 229, true, "8", "8"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, null, null, 1146, 1152, 1133, 1139, 230, 231, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, null, null, 1153, 1155, 1140, 1142, 231, 232, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14782540711164886662, 14111360077134393327, null, null, 1158, 1170, 1145, 1157, 233, 235, true, "unitary semi", "unitary semi"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, null, null, 1166, 1183, 1153, 1170, 234, 237, true, "semi-presidential", "semi-presidential"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9493572096187311884, 17586523526652496832, null, null, 1171, 1192, 1158, 1179, 236, 238, true, "presidential republic", "presidential republic"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, null, null, 1193, 1197, 1180, 1184, 238, 239, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, null, null, 1202, 1209, 1189, 1196, 240, 241, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, null, null, 1210, 1212, 1197, 1199, 241, 242, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, null, null, 1213, 1218, 1200, 1205, 242, 243, true, "Paris", "Paris"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, null, null, 1224, 1233, 1211, 1220, 245, 248, true, "countrys", "country's"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263873511, null, null, 1224, 1231, 1211, 1218, 245, 246, true, "country", "country"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13491731564569135959, 5310634626438687925, null, null, 1232, 1246, 1219, 1233, 247, 250, true, "s largest city", "s largest city"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, null, null, 1251, 1286, 1238, 1273, 251, 256, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, null, null, 1269, 1286, 1256, 1273, 254, 256, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, null, null, 1288, 1311, 1275, 1298, 257, 261, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, null, null, 1312, 1319, 1299, 1306, 261, 262, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, null, null, 1320, 1383, 1307, 1370, 262, 275, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, null, null, 1320, 1329, 1307, 1316, 262, 263, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, null, null, 1331, 1335, 1318, 1322, 264, 265, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, null, null, 1337, 1345, 1324, 1332, 266, 267, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, null, null, 1347, 1352, 1334, 1339, 268, 269, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, null, null, 1354, 1362, 1341, 1349, 270, 271, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, null, null, 1364, 1374, 1351, 1361, 272, 273, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, null, null, 1379, 1383, 1366, 1370, 274, 275, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.93], ["semantic", 9818235231875948258, "TEXT", "#", "text", 0.82]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 9818235231875948258, "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} +{"applied_models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, null, null, 0, 188, 0, 188, 0, 40, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, null, null, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, null, null, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, null, null, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301128091, null, null, 24, 33, 24, 33, 5, 6, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587921185, null, null, 34, 41, 34, 41, 6, 7, true, "pairing", "pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, null, null, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, null, null, 45, 53, 45, 53, 8, 11, true, "two-band", "two-band"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206570221100871, 2911818818181444888, null, null, 49, 55, 49, 55, 10, 12, true, "band s", "band s"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, null, null, 54, 60, 54, 60, 11, 14, true, "s-wave", "s-wave"], ["term", "enum-term-mark-2", 4522339299074192207, "TEXT", "#", 1.0, 8560127426779937860, 4026994879422986240, null, null, 56, 66, 56, 66, 13, 16, true, "wave and d", "wave and d"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625633602560, 14144633872330801396, null, null, 56, 60, 56, 60, 13, 14, true, "wave", "wave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, null, null, 65, 71, 65, 71, 15, 18, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5267005535915851615, 13852357345485708038, null, null, 67, 87, 67, 87, 17, 19, true, "wave superconductors", "wave superconductors"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, null, null, 88, 92, 88, 92, 19, 20, true, "with", "with"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, null, null, 93, 96, 93, 96, 20, 23, true, "D4h", "D4h"], ["numval", "ival", 4522339299074192207, "TEXT", "#", 1.0, 17767354399704235156, 8513040951015345484, null, null, 94, 95, 94, 95, 21, 22, true, "4", "4"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 2516792725790519961, 10765065347046652233, null, null, 95, 105, 95, 105, 22, 24, true, "h symmetry", "h symmetry"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, null, null, 106, 113, 106, 113, 24, 26, true, "in both", "in both"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, null, null, 114, 127, 114, 127, 26, 29, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183561878, null, null, 114, 118, 114, 118, 26, 27, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1366921581602115232, 15058186165846257397, null, null, 119, 137, 119, 137, 28, 30, true, "reversal invariant", "reversal invariant"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, null, null, 146, 148, 146, 148, 32, 33, true, "as", "as"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, null, null, 149, 162, 149, 162, 33, 36, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183567675, null, null, 149, 153, 149, 153, 33, 34, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16155708024079339904, 14846007814114510811, null, null, 154, 171, 154, 171, 35, 37, true, "reversal symmetry", "reversal symmetry"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, null, null, 172, 180, 172, 180, 37, 38, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, null, null, 181, 187, 181, 187, 38, 39, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, null, null, 189, 384, 189, 384, 40, 75, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, null, null, 193, 201, 193, 201, 41, 42, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, null, null, 202, 204, 202, 204, 42, 43, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, null, null, 205, 214, 205, 214, 43, 44, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, null, null, 215, 244, 215, 244, 44, 47, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, null, null, 249, 264, 249, 264, 48, 50, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, null, null, 265, 271, 265, 271, 50, 52, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, null, null, 272, 286, 272, 286, 52, 53, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, null, null, 288, 293, 288, 293, 54, 55, true, "nodes", "nodes"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, null, null, 298, 309, 298, 309, 56, 60, true, "(dis)appear", "(dis)appear"], ["parenthesis", "round brackets", 4522339299074192207, "TEXT", "#", 1.0, 329104053577713079, 7302082272979819201, null, null, 298, 303, 298, 303, 56, 59, true, "(dis)", "(dis)"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 12178341415895452094, 8713100074317547395, null, null, 299, 302, 299, 302, 57, 58, true, "dis", "dis"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 16381206574684919940, 8627590102959499799, null, null, 303, 309, 303, 309, 59, 60, true, "appear", "appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, null, null, 311, 316, 311, 316, 61, 62, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, null, null, 322, 327, 322, 327, 64, 65, true, "leave", "leave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, null, null, 328, 341, 328, 341, 65, 68, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4859188827321755536, 9887725278734779219, null, null, 333, 351, 333, 351, 67, 69, true, "symmetry locations", "symmetry locations"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, null, null, 357, 374, 357, 374, 70, 72, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, null, null, 375, 383, 375, 383, 72, 74, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, null, null, 385, 594, 385, 594, 75, 114, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, null, null, 398, 404, 398, 404, 77, 79, true, "in the", "in the"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, null, null, 405, 411, 405, 411, 79, 82, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 3545604367994270661, 11829255560935036292, null, null, 407, 416, 407, 416, 81, 83, true, "wave case", "wave case"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, null, null, 421, 425, 421, 425, 85, 86, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, null, null, 426, 430, 426, 430, 86, 87, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, null, null, 440, 454, 440, 454, 89, 91, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, null, null, 455, 475, 455, 475, 91, 93, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, null, null, 481, 490, 481, 490, 94, 95, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, null, null, 491, 498, 491, 498, 95, 96, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, null, null, 499, 508, 499, 508, 96, 97, true, "increases", "increases"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, null, null, 515, 526, 515, 526, 99, 102, true, "zero-energy", "zero-energy"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1885602650026083434, 12476719833465444023, null, null, 520, 534, 520, 534, 101, 103, true, "energy Andreev", "energy Andreev"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104159325585799, 66191664906118763, null, null, 535, 540, 535, 540, 103, 104, true, "bound", "bound"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433796974, null, null, 541, 547, 541, 547, 104, 105, true, "states", "states"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, null, null, 548, 555, 548, 555, 105, 107, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, null, null, 560, 570, 560, 570, 108, 109, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, null, null, 571, 573, 571, 573, 109, 110, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, null, null, 574, 593, 574, 593, 110, 113, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.87], ["semantic", 4522339299074192207, "TEXT", "#", "text", 0.97]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4522339299074192207, "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} diff --git a/tests/data/texts/test_02A_text_01.jsonl b/tests/data/texts/test_02A_text_01.jsonl index fd5d3ffe..9f1f1038 100644 --- a/tests/data/texts/test_02A_text_01.jsonl +++ b/tests/data/texts/test_02A_text_01.jsonl @@ -1 +1 @@ -{"applied-models": ["cite", "expression", "language", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "#", "hash": 253473544312511038, "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, null, null, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, null, null, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, null, null, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"applied_models": ["cite", "expression", "language", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "#", "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, null, null, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, null, null, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, null, null, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 253473544312511038, "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} diff --git a/tests/data/texts/test_02B_text_01.jsonl b/tests/data/texts/test_02B_text_01.jsonl index 7f30acab..65eb53cd 100644 --- a/tests/data/texts/test_02B_text_01.jsonl +++ b/tests/data/texts/test_02B_text_01.jsonl @@ -1 +1 @@ -{"dloc": "#", "hash": 253473544312511038, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 253473544312511038, "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} diff --git a/tests/test_nlp.py b/tests/test_nlp.py index cf8a406a..d64837c3 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -147,14 +147,14 @@ def test_03B(): with open("./tests/data/docs/1806.02284.json") as fr: doc = json.load(fr) - filters = ["applied-models", "properties"] + filters = ["applied_models", "properties"] model = init_nlp_model("sentence;language;term;reference", filters) res = model.apply_on_doc(doc) res = round_floats(res) - for label in ["dloc", "applied-models", + for label in ["dloc", "applied_models", "description", "body", "meta", "page-elements", "texts", "tables", "figures", "properties"]: @@ -180,7 +180,7 @@ def test_03C(): res = model.apply_on_doc(doc) res = round_floats(res) - extract_references_from_doc(res) + #extract_references_from_doc(res) fw = open(target, "w") fw.write(json.dumps(res, indent=2)+"\n") From 640e3e516ac4511cecc936a50b9bbff55e3cc8ea Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Wed, 29 Nov 2023 11:25:25 +0100 Subject: [PATCH 18/22] small updates Signed-off-by: Peter Staar --- .../tooling/structs/elements/text_element.h | 31 +- tests/data/docs/1806.02284.nlp.json | 290 ++++++------ tests/data/docs/doc_01.nlp.json | 442 +++++++++--------- tests/data/texts/references.nlp.jsonl | 4 +- tests/data/texts/semantics.nlp.jsonl | 14 +- tests/data/texts/terms.nlp.jsonl | 4 +- tests/data/texts/test_02A_text_01.jsonl | 2 +- tests/data/texts/test_02B_text_01.jsonl | 2 +- 8 files changed, 397 insertions(+), 392 deletions(-) diff --git a/src/andromeda/tooling/structs/elements/text_element.h b/src/andromeda/tooling/structs/elements/text_element.h index 232eee44..2adbd38c 100644 --- a/src/andromeda/tooling/structs/elements/text_element.h +++ b/src/andromeda/tooling/structs/elements/text_element.h @@ -11,8 +11,13 @@ namespace andromeda typedef std::tuple candidate_type; - const static inline std::string char_tokens_lbl = "char-tokens"; - const static inline std::string word_tokens_lbl = "word-tokens"; + const static inline std::string text_lbl = "text"; + const static inline std::string orig_lbl = "orig"; + + const static inline std::string text_hash_lbl = "text_hash"; + + const static inline std::string char_tokens_lbl = "char_tokens"; + const static inline std::string word_tokens_lbl = "word_tokens"; public: @@ -116,15 +121,15 @@ namespace andromeda { nlohmann::json elem = nlohmann::json::object({}); - elem["text"] = text; - elem["orig"] = orig; + elem[text_lbl] = text; + elem[orig_lbl] = orig; - elem["text-hash"] = text_hash; + elem[text_hash_lbl] = text_hash; // in the default setting, word-tokens will not be dumped - if(filters.count("word-tokens")) + if(filters.count(word_tokens_lbl)) { - elem["word-tokens"] = andromeda::to_json(word_tokens, text); + elem[word_tokens_lbl] = andromeda::to_json(word_tokens, text); } return elem; @@ -136,14 +141,14 @@ namespace andromeda this->clear(); - if(elem.count("orig")) + if(elem.count(orig_lbl)) { - auto ctext = elem.at("orig").get(); + auto ctext = elem.at(orig_lbl).get(); result = set_text(ctext); } - else if(elem.count("text")) + else if(elem.count(text_lbl)) { - auto ctext = elem.at("text").get(); + auto ctext = elem.at(text_lbl).get(); result = set_text(ctext); } else @@ -154,9 +159,9 @@ namespace andromeda return false; } - if(elem.count("word-tokens")) + if(elem.count(word_tokens_lbl)) { - const nlohmann::json& json_word_tokens = elem.at("word-tokens"); + const nlohmann::json& json_word_tokens = elem.at(word_tokens_lbl); andromeda::from_json(word_tokens, json_word_tokens); } diff --git a/tests/data/docs/1806.02284.nlp.json b/tests/data/docs/1806.02284.nlp.json index 67011a98..c39f002e 100644 --- a/tests/data/docs/1806.02284.nlp.json +++ b/tests/data/docs/1806.02284.nlp.json @@ -442,7 +442,7 @@ "sref": "#/figures/0/captions/0", "subj_hash": 16535999405521191333, "text": "Figure 1: A diagram of the conversion pipeline in the Corpus Conversion Service platform. It consists of 5 components: (1) Parsing of the document and its contained bitmap images, (2) Annotating the text of the parsed documents with layout semantic labels, (3) Training models based on the ground-truth acquired by the annotations, (4) Applying machine learned models on the parsed documents to determine the layout semantic label of each cell and finally (5) Assembling the document into a structured data format (e. g. JSON). The main conversion pipeline is depicted in blue and allows you to process and convert documents at scale into a structured data format. The green and orange sections can be used optionally, in order to process scanned documents (green) or train new models based on human annotation (orange).", - "text-hash": 9615465947839001361, + "text_hash": 9615465947839001361, "type": "caption" } ], @@ -473,7 +473,7 @@ "sref": "#/figures/1/captions/0", "subj_hash": 9115121388992506886, "text": "Figure 3: The labelled cells annotated on the title page of a poster abstract about the CCS [11]. Here, the title, authors, affiliation, subtitle, main-text, caption and picture labels are represented respectively as red, green, purple, dark-red, yellow, orange and ivory.", - "text-hash": 17324714532994059892, + "text_hash": 17324714532994059892, "type": "caption" } ], @@ -504,7 +504,7 @@ "sref": "#/figures/2/captions/0", "subj_hash": 14775249782836392461, "text": "Figure 2: The cells obtained for the title page of a poster abstract about the CCS [11] after the parsing stage. During the parsing, we extract all bounding boxes of the text (or cells) in such a way that they all have: (1) a maximum width, (2) are only single line and (3) split into multiple cells in case of listidentifiers, multi-columns or crossing vertical lines (such as in tables).", - "text-hash": 6754994759646241897, + "text_hash": 6754994759646241897, "type": "caption" } ], @@ -535,7 +535,7 @@ "sref": "#/figures/3/captions/0", "subj_hash": 7479698582664857938, "text": "Figure 4: The annotation rate of pages for two different collections (Physical Review B and Elsevier papers) as a function of the number of annotated pages. As one can observe, the mean annotation rate is increasing after each training (depicted by a vertical dashed red line). After the first training, the human annotator is presented a pre-annotated page, using the predictions from the latest model. As the predictions become better with increasing size of the ground-truth, less corrections need to be made and hence more pages can be annotated in similar time intervals.", - "text-hash": 504280783932681152, + "text_hash": 504280783932681152, "type": "caption" } ], @@ -566,7 +566,7 @@ "sref": "#/figures/4/captions/0", "subj_hash": 17801697261174341699, "text": "Figure 5: A typical image of a parsed PDF page that is fed to the default models. In red, we show the detection of the tables combined with the confidence of the model. The results displayed here originate from the YOLOv2 model.", - "text-hash": 8628591081653072559, + "text_hash": 8628591081653072559, "type": "caption" } ], @@ -597,7 +597,7 @@ "sref": "#/figures/5/captions/0", "subj_hash": 3206590615695639432, "text": "Figure 6: Diagram of the architecture of our platform. The architecture is composed from 4 layers: an interface layer with REST-API and frontend, an orchestration layer with a message broker and results backend, a compute layer consisting out of a variable number of asynchronous workers and finally a storage layer providing a NoSQL database and an object store. The NoSQL database stores the queryable meta-data of each file that is stored in the object store.", - "text-hash": 4488590919374042342, + "text_hash": 4488590919374042342, "type": "paragraph" } ], @@ -628,7 +628,7 @@ "sref": "#/figures/6/captions/0", "subj_hash": 6667504298804810757, "text": "Figure 7: Evolution of number of users and number of PDF pages on the platform. The jumps in the number of pages originates from big ingestions of documents performed by some users. This proves that the CCS platform is also able to accomodate these short burst of extreme activity.", - "text-hash": 14863303056159196785, + "text_hash": 14863303056159196785, "type": "caption" } ], @@ -659,7 +659,7 @@ "sref": "#/figures/7/captions/0", "subj_hash": 16175086861512378818, "text": "Figure 8: Speedup in the pipeline components as a function of the number of worker nodes (each with four cores, running four local worker processes).", - "text-hash": 9976536719025941296, + "text_hash": 9976536719025941296, "type": "caption" } ], @@ -742,7 +742,7 @@ "sref": "#/footnotes/0", "subj_hash": 13109829297289816265, "text": "Permission to make digital or hard copies of all or part of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for components of this work owned by others than the author(s) must be honored. Abstracting with credit is permitted. To copy otherwise, or republish, to post on servers or to redistribute to lists, requires prior specific permission and/or a fee. Request permissions from permissions@acm.org.", - "text-hash": 13032800243621120549, + "text_hash": 13032800243621120549, "type": "footnote" }, { @@ -756,7 +756,7 @@ "sref": "#/footnotes/1", "subj_hash": 6056950725387475159, "text": "KDD \u201918, August 19-23, 2018, London, United Kingdom", - "text-hash": 15473297532078357059, + "text_hash": 15473297532078357059, "type": "footnote" }, { @@ -770,7 +770,7 @@ "sref": "#/footnotes/2", "subj_hash": 82667377498161992, "text": "\u00a9 2018 Copyright held by the owner/author(s). Publication rights licensed to ACM. ACM ISBN 978-1-4503-5552-0/18/08...$15.00", - "text-hash": 3001373187661149606, + "text_hash": 3001373187661149606, "type": "footnote" }, { @@ -784,7 +784,7 @@ "sref": "#/footnotes/3", "subj_hash": 4157740687705909538, "text": "https://doi.org/10.1145/3219819.3219834", - "text-hash": 3547103316902677392, + "text_hash": 3547103316902677392, "type": "footnote" }, { @@ -798,7 +798,7 @@ "sref": "#/footnotes/4", "subj_hash": 11592315251976452419, "text": "$^{1}$This number originates from a keynote talk by Phil Ydens, Adobe\u2019s VP Engineering for Document Cloud. A video of the presentation can be found here: https://youtu.be/ 5Axw6OGPYHw", - "text-hash": 14549584251446631343, + "text_hash": 14549584251446631343, "type": "footnote" }, { @@ -812,7 +812,7 @@ "sref": "#/footnotes/5", "subj_hash": 14606262418347792388, "text": "$^{2}$This is clearly the case on the popular arXiv scientific online repository: https://arxiv. org/help/stats/2012_by_area/index", - "text-hash": 7221931865252575858, + "text_hash": 7221931865252575858, "type": "footnote" }, { @@ -826,7 +826,7 @@ "sref": "#/footnotes/6", "subj_hash": 7599391434737032939, "text": "$^{3}$https://www.xpdfreader.com", - "text-hash": 104933780092600391, + "text_hash": 104933780092600391, "type": "footnote" }, { @@ -840,7 +840,7 @@ "sref": "#/footnotes/7", "subj_hash": 9645151231942484724, "text": "$^{4}$http://tabula.technology/", - "text-hash": 11894228156061308002, + "text_hash": 11894228156061308002, "type": "footnote" }, { @@ -854,7 +854,7 @@ "sref": "#/footnotes/8", "subj_hash": 4601317523235901886, "text": "$^{5}$https://www.abbyy.com/", - "text-hash": 3391629868238619420, + "text_hash": 3391629868238619420, "type": "footnote" }, { @@ -868,7 +868,7 @@ "sref": "#/footnotes/9", "subj_hash": 1678429643964197526, "text": "$^{6}$https://www.nuance.com/", - "text-hash": 1693441792396921860, + "text_hash": 1693441792396921860, "type": "footnote" }, { @@ -882,7 +882,7 @@ "sref": "#/footnotes/10", "subj_hash": 9599864648545137978, "text": "$^{7}$https://www.ibm.com/us-en/marketplace/data-capture-and-imaging", - "text-hash": 11939931591922575256, + "text_hash": 11939931591922575256, "type": "footnote" }, { @@ -896,7 +896,7 @@ "sref": "#/footnotes/11", "subj_hash": 11599600757439696813, "text": "$^{8}$a line of text might be printed character-by-character, word-by-word or the entire text snippet.", - "text-hash": 14551310605717713161, + "text_hash": 14551310605717713161, "type": "footnote" }, { @@ -910,7 +910,7 @@ "sref": "#/footnotes/12", "subj_hash": 8672351490975826115, "text": "$^{9}$http://qpdf.sourceforge.net/", - "text-hash": 17478669388996915759, + "text_hash": 17478669388996915759, "type": "footnote" }, { @@ -924,7 +924,7 @@ "sref": "#/footnotes/13", "subj_hash": 13163501967272675186, "text": "$^{10}$It is important to notice that there is no restriction on the number of labels nor the semantic meaning of these labels. The only limitation one has is that the set of semantic labels needs to be consistent across the dataset, but this is evidently true for any type of ML algorithm.", - "text-hash": 13266614683838167520, + "text_hash": 13266614683838167520, "type": "footnote" }, { @@ -938,7 +938,7 @@ "sref": "#/footnotes/14", "subj_hash": 16307739621375260129, "text": "$^{11}$All the data is coming from the bulk data download https://arxiv.org/help/bulk_data_s3", - "text-hash": 10131428201408538445, + "text_hash": 10131428201408538445, "type": "footnote" }, { @@ -952,7 +952,7 @@ "sref": "#/footnotes/15", "subj_hash": 16584453941359713372, "text": "$^{12}$https://journals.aps.org/prb", - "text-hash": 9846388834475228858, + "text_hash": 9846388834475228858, "type": "footnote" }, { @@ -966,7 +966,7 @@ "sref": "#/footnotes/16", "subj_hash": 7152618592130781617, "text": "$^{13}$https://www.openapis.org/", - "text-hash": 831347610428179229, + "text_hash": 831347610428179229, "type": "footnote" }, { @@ -980,7 +980,7 @@ "sref": "#/footnotes/17", "subj_hash": 6593099618554401757, "text": "$^{14}$https://www.rabbitmq.com/", - "text-hash": 15235037228412732729, + "text_hash": 15235037228412732729, "type": "footnote" }, { @@ -994,7 +994,7 @@ "sref": "#/footnotes/18", "subj_hash": 7200807455610600839, "text": "$^{15}$https://www.redis.io/", - "text-hash": 782710111840296691, + "text_hash": 782710111840296691, "type": "footnote" }, { @@ -1008,7 +1008,7 @@ "sref": "#/footnotes/19", "subj_hash": 1602196689966359724, "text": "$^{16}$http://www.celeryproject.org/", - "text-hash": 1778492971410642442, + "text_hash": 1778492971410642442, "type": "footnote" }, { @@ -1022,7 +1022,7 @@ "sref": "#/footnotes/20", "subj_hash": 4503261997707320357, "text": "$^{17}$https://www.mongodb.com/", - "text-hash": 3489272016069066385, + "text_hash": 3489272016069066385, "type": "footnote" }, { @@ -1036,7 +1036,7 @@ "sref": "#/footnotes/21", "subj_hash": 2838531283607966593, "text": "$^{18}$https://kubernetes.io/", - "text-hash": 5145030134774826221, + "text_hash": 5145030134774826221, "type": "footnote" }, { @@ -1050,7 +1050,7 @@ "sref": "#/footnotes/22", "subj_hash": 3398848297472714606, "text": "$^{19}$ibm.biz/privatecloud", - "text-hash": 4585077909629360588, + "text_hash": 4585077909629360588, "type": "footnote" }, { @@ -1064,7 +1064,7 @@ "sref": "#/footnotes/23", "subj_hash": 6724984968154270143, "text": "$^{20}$We don\u2019t show the number of documents, since the number of pages in a document can range from 1 to well above 1000. Consequently, the number of pages is a more robust metric to measure the scaling with regard to the corpus size.", - "text-hash": 14814952417700014875, + "text_hash": 14814952417700014875, "type": "footnote" } ], @@ -94054,7 +94054,7 @@ "sref": "#/tables/0/captions/0", "subj_hash": 9160199179916979172, "text": "Table 1: Time-to-solution and performance results for the Faster RCNN and YOLOv2 models. The training of the models was done on 25000 PDF pages. The prediction (per page) and performance numbers (Recall= \u211b and Precision= \ud835\udcab) were obtained on 5000 page images, where the prediction confidence cutoff was tuned to yield the maximum F1 score for each. All time-to-solution measurements for training were obtained on a POWER8 node with a single Pascal P100 GPU.", - "text-hash": 17279509228359814482, + "text_hash": 17279509228359814482, "type": "paragraph" } ], @@ -94663,7 +94663,7 @@ "sref": "#/tables/1/captions/0", "subj_hash": 18354136439820865774, "text": "Table 3: Comparison for two different journal templates showing the aggregated precision and recall averaged over all labels. Each model has been independently trained on a dataset of 400 pages each. The results show that the ML algorithm proves to perform very well for the multiple document templates, simply by providing a different dataset to train on.", - "text-hash": 8085176655901164108, + "text_hash": 8085176655901164108, "type": "paragraph" } ], @@ -97523,7 +97523,7 @@ "sref": "#/texts/0", "subj_hash": 7377574370756688828, "text": "arXiv:1806.02284v1 [cs.DL] 24 May 2018", - "text-hash": 605943372629925146, + "text_hash": 605943372629925146, "type": "paragraph" }, { @@ -97537,7 +97537,7 @@ "sref": "#/texts/1", "subj_hash": 10227328696767902037, "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", - "text-hash": 11303007895399162817, + "text_hash": 11303007895399162817, "type": "title" }, { @@ -97551,7 +97551,7 @@ "sref": "#/texts/2", "subj_hash": 8770494724746327817, "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", - "text-hash": 17380979703907035493, + "text_hash": 17380979703907035493, "type": "paragraph" }, { @@ -97565,7 +97565,7 @@ "sref": "#/texts/3", "subj_hash": 18258237174351515285, "text": "taa,dol,cau,bek@zurich.ibm.com", - "text-hash": 7883794643982446593, + "text_hash": 7883794643982446593, "type": "paragraph" }, { @@ -97579,7 +97579,7 @@ "sref": "#/texts/4", "subj_hash": 5704354110496947297, "text": "IBM Research", - "text-hash": 16114797969310195405, + "text_hash": 16114797969310195405, "type": "paragraph" }, { @@ -97593,7 +97593,7 @@ "sref": "#/texts/5", "subj_hash": 11056873211244709904, "text": "Rueschlikon, Switzerland", - "text-hash": 10483037511456664190, + "text_hash": 10483037511456664190, "type": "paragraph" }, { @@ -97607,7 +97607,7 @@ "sref": "#/texts/6", "subj_hash": 11788868678004267702, "text": "ABSTRACT", - "text-hash": 14650435066888584228, + "text_hash": 14650435066888584228, "type": "subtitle-level-1" }, { @@ -97621,7 +97621,7 @@ "sref": "#/texts/7", "subj_hash": 3624246356859711021, "text": "1 INTRODUCTION", - "text-hash": 4359834464932974729, + "text_hash": 4359834464932974729, "type": "subtitle-level-1" }, { @@ -97635,7 +97635,7 @@ "sref": "#/texts/8", "subj_hash": 17999848460847860039, "text": "Over the past few decades, the amount of scientific articles and technical literature has increased exponentially in size. Consequently, there is a great need for systems that can ingest these documents at scale and make the contained knowledge discoverable. Unfortunately, both the format of these documents (e.g. the PDF format or bitmap images) as well as the presentation of the data (e.g. complex tables) make the extraction of qualitative and quantitive data extremely challenging. In this paper, we present a modular, cloud-based platform to ingest documents at scale. This platform, called the Corpus Conversion Service (CCS), implements a pipeline which allows users to parse and annotate documents (i.e. collect ground-truth), train machine-learning classification algorithms and ultimately convert any type of PDF or bitmap-documents to a structured content representation format. We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", - "text-hash": 8142196169563728819, + "text_hash": 8142196169563728819, "type": "paragraph" }, { @@ -97649,7 +97649,7 @@ "sref": "#/texts/9", "subj_hash": 14387482728083328702, "text": "ACM Reference Format:", - "text-hash": 7430992009485070364, + "text_hash": 7430992009485070364, "type": "subtitle-level-1" }, { @@ -97663,7 +97663,7 @@ "sref": "#/texts/10", "subj_hash": 11222145795862225841, "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas. 2018. Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.. In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23, 2018, London, United Kingdom. ACM, New York, NY, USA, 9 pages. https://doi.org/10. 1145/3219819.3219834", - "text-hash": 10605881125688857885, + "text_hash": 10605881125688857885, "type": "paragraph" }, { @@ -97677,7 +97677,7 @@ "sref": "#/texts/11", "subj_hash": 16923207262044929933, "text": "It is estimated that there are roughly 2.5 trillion PDF documents currently in circulation$^{1}$. These documents range from manuals for appliances, annual reports of companies, all the way to research papers, detailing a specific scientific discovery. It is needless to say that valuable qualitative and quantitative information is contained in many of them. However, content encoded in PDF is by its nature reduced to streams of printing instructions purposed to faithfully present a pleasing visual layout. Both the data representation and the enormous variability of layouts across these documents make it extremely challenging to access content and transform it into a representation that enables knowledge discovery. In addition to the sheer current quantity of documents, the submission rate of published documents in the scientific domain is also growing exponentially$^{2}$. This poses a real problem, since more and more information published in the PDF documents is going dark. In order to make the content of these documents searchable (e.g. find me a phase-diagram of material XYZ), one needs essentially two components. First, you need to ingest documents from a variety of formats (with the PDF format being the most prevalent one) and convert these documents to structured data files with a structured format such as JSON or XML. Second, you need a query engine that is able to deal with a large variety of concepts (documents, images, authors, tables, etc) extracted from these documents and put these into context.", - "text-hash": 9516638039579926761, + "text_hash": 9516638039579926761, "type": "paragraph" }, { @@ -97691,7 +97691,7 @@ "sref": "#/texts/12", "subj_hash": 3749305213430885773, "text": "In this paper, we focus entirely on the first component, the ingestion of documents and their conversion into structured data files. The solution we propose is thought of as a platform, which at its core has trainable machine learning algorithms. This platform, called Corpus Conversion Service (CCS), consists out of a set of microservices organized in five main components. Each of these microservices can be consumed by its own REST API. This approach not only allows us to build complex pipelines to process documents automatically, but also allows us to develop new microservices against the platform. In order to make this platform scalable, all microservices are integrated through asynchronous communication protocols, which gives us many benefits: It allows to do proper resource management, eliminates strong dependencies and makes the platform robust against single task failures.", - "text-hash": 3945867624210419433, + "text_hash": 3945867624210419433, "type": "paragraph" }, { @@ -97705,7 +97705,7 @@ "sref": "#/texts/13", "subj_hash": 3409470577915009676, "text": "To obtain a thorough understanding of what our platform can do and how well it performs, we have structured this paper as follows: In Section 2, we briefly review the current state-of-the-art document", - "text-hash": 4583103017707584490, + "text_hash": 4583103017707584490, "type": "paragraph" }, { @@ -97719,7 +97719,7 @@ "sref": "#/texts/14", "subj_hash": 17187299362680072378, "text": "processing solutions. In Section 3, we present the design of the platform and its components. In Section 4, we discuss the architecture, the deployment methods, and how well the platform scales with regard to volume (both in users and content) and compute resources, respectively. Finally, in Section 5, we discuss the open questions w.r.t. research and possible next steps in the development of the platform.", - "text-hash": 9243393324994873880, + "text_hash": 9243393324994873880, "type": "paragraph" }, { @@ -97733,7 +97733,7 @@ "sref": "#/texts/15", "subj_hash": 697648145931166262, "text": "2 STATE OF THE ART", - "text-hash": 2385816824895853732, + "text_hash": 2385816824895853732, "type": "subtitle-level-1" }, { @@ -97747,7 +97747,7 @@ "sref": "#/texts/16", "subj_hash": 7935233310532930917, "text": "The task of converting PDF documents and automatic content reconstruction has been an outstanding problem for over three decades [3, 4]. Broadly speaking, there are two types of approaches to this problem. In the first approach, documents are converted with the goal to represent the content as close as possible to the original visual layout of the document. This can be done through a conversion from PDF towards HTML or MS Word for example. The second approach attempts to convert the document into a format that can be easily processed programmatically, i.e. a representation of the document which is not preserving the layout, yet contains all the content from the original document in a structured format. For example, this could be a JSON/XML file with a particular schema. Since our Corpus Conversion Service is thought of as a first step towards a knowledge discovery platform for documents, we have opted for the second approach in our solution.", - "text-hash": 57757550267838417, + "text_hash": 57757550267838417, "type": "paragraph" }, { @@ -97761,7 +97761,7 @@ "sref": "#/texts/17", "subj_hash": 2762070725424637531, "text": "Many solutions have already been developed that tackle the problem of document conversion. There are well known open-source programs such as Xpdf 3 and Tabula$^{4}$. There are also proprietary solutions, such as Abby$^{5}$, Nuance 6 or DataCap$^{7}$. In contrast to the open-source solutions, all three proprietary solutions support also", - "text-hash": 5230489225511983287, + "text_hash": 5230489225511983287, "type": "paragraph" }, { @@ -97775,7 +97775,7 @@ "sref": "#/texts/18", "subj_hash": 7536915191196259776, "text": "extraction from scanned documents. Besides the well known opensource and proprietary solutions, there are also countless academic solutions as well as libraries. For example, the challenge of segmenting complex page layouts is actively addressed by recurring competitions posed by ICDAR, as in Ref. [1] and previous editions.", - "text-hash": 167221319977518894, + "text_hash": 167221319977518894, "type": "paragraph" }, { @@ -97789,7 +97789,7 @@ "sref": "#/texts/19", "subj_hash": 11495493007651807568, "text": "3 PLATFORM DESIGN", - "text-hash": 10322960049580053438, + "text_hash": 10322960049580053438, "type": "subtitle-level-1" }, { @@ -97803,7 +97803,7 @@ "sref": "#/texts/20", "subj_hash": 7650015170039242996, "text": "Given the plethora of existing solutions, we would like to point out how our solution differs from these, and thus approaches the problem of document conversion in a new way.", - "text-hash": 333520156392116834, + "text_hash": 333520156392116834, "type": "paragraph" }, { @@ -97817,7 +97817,7 @@ "sref": "#/texts/21", "subj_hash": 14959508657858158650, "text": "The key idea is that we do not write any rule-based conversion algorithms, but rather utilize generic machine learning algorithms which produce models that can be easily and quickly trained on ground-truth acquired via human annotation. This flexible mechanism allows us to adapt very quickly to certain templates of documents, achieve very accurate results and ultimately eliminates the time-consuming and costly tuning of traditional rule-based conversion algorithms. This approach is in stark contrast to the previously mentioned state of the art conversion systems, which are all rulebased.", - "text-hash": 6868109665737773720, + "text_hash": 6868109665737773720, "type": "paragraph" }, { @@ -97831,7 +97831,7 @@ "sref": "#/texts/22", "subj_hash": 10379300903412882972, "text": "While the approach of swapping rule based solutions with machine learning solutions might appear very natural in the current era of artificial intelligence, it has some serious consequences with regard to its design. First of all, one can not think anymore at the level of a single document. Rather, one should think at the level of a collection of documents (or a corpus of documents). A machine learned model for a single document is not very useful, but a machine learned model for a certain type of documents (e.g. scientific articles, patents, regulations, contracts, etc.) obviously is. This is the first big distinction between the current existing solutions and ours: Existing solutions take one document at a time (no matter its origin) and convert it to a desired output format. Our solution can ingest an entire collection of documents and build machine learned models on top of that. Of course, once the the model is trained, one can convert documents one at a time, too.", - "text-hash": 11150916691880738938, + "text_hash": 11150916691880738938, "type": "paragraph" }, { @@ -97845,7 +97845,7 @@ "sref": "#/texts/23", "subj_hash": 4994395008195818594, "text": "A second discriminator between the existing solutions and ours is that we need to provide the tools to gather ground-truth, since no model can be trained without it. Hence, not only do we need the ability to manage collections of documents, we also need the ability for people to annotate documents and store these annotations in an efficient way. These annotations are then used as ground-truth data to train models. It is clear then that ML models add an extra level of complexity: One has to provide the ability to store a collection of documents, annotate these documents, store the annotations, train a model and ultimately apply this model on unseen documents. For the authors of this paper, it was therefore evident that our solution cannot be a monolithic application. It fits much better the concept of a cloud-based platform that can execute the previously mentioned tasks in an efficient and scalable way.", - "text-hash": 16536368219630364368, + "text_hash": 16536368219630364368, "type": "paragraph" }, { @@ -97859,7 +97859,7 @@ "sref": "#/texts/24", "subj_hash": 4203835122307823579, "text": "3.1 Components", - "text-hash": 3789103236857293111, + "text_hash": 3789103236857293111, "type": "subtitle-level-1" }, { @@ -97873,7 +97873,7 @@ "sref": "#/texts/25", "subj_hash": 13520362244078084911, "text": "Our platform implements a processing pipeline to ingest, manage, parse, annotate, train and eventually convert the data contained in any type of format (scanned or programmatically created PDF, bitmap images, Word documents, etc.) into a structured data format (e.g. JSON or XML).", - "text-hash": 12910497814715733387, + "text_hash": 12910497814715733387, "type": "paragraph" }, { @@ -97887,7 +97887,7 @@ "sref": "#/texts/26", "subj_hash": 1749622367305947670, "text": "This processing pipeline is formed by five components as depicted in Figure 1: (1) parsing of documents into an internal format optimised for ML, (2) Annotation of the label ground-truth in parsed documents (3) training ML models from the acquired annotations, (4) applying the custom ML model(s), (5) assembling the document(s) into a structured data format. If a trained model is available, only components 1, 4 and 5 are needed to convert the documents. If no template-specific machine learned model is available yet, we provide two additional components 2 and 3, that allow users to gather ground-truth and train custom models. It is important to note that the platform comes with default models, so annotation and training are advised to retrieve the best quality output, yet they are optional.", - "text-hash": 1334541935326461060, + "text_hash": 1334541935326461060, "type": "paragraph" }, { @@ -97901,7 +97901,7 @@ "sref": "#/texts/27", "subj_hash": 11083736481641202939, "text": "Let us now elaborate on what each of the five components deliver in the rest of this section.", - "text-hash": 10456209429844276823, + "text_hash": 10456209429844276823, "type": "paragraph" }, { @@ -97915,7 +97915,7 @@ "sref": "#/texts/28", "subj_hash": 15403141463083979171, "text": "3.2 Parsing of Documents", - "text-hash": 6127225399482532623, + "text_hash": 6127225399482532623, "type": "subtitle-level-1" }, { @@ -97929,7 +97929,7 @@ "sref": "#/texts/29", "subj_hash": 12234429517419341922, "text": "In the parsing component of the pipeline, we solve the following straightforward but non-trivial task: Find the bounding boxes of all text-snippets that appear on each PDF page. For simplicity, we will refer to the bounding boxes of the text-snippets as cells in the remainder of the paper. There are two reasons why we are interested in these cells. First, they provide us with the crucial geometric features which are later used in the machine learning models to determine the layout semantic label. Second, the concept of a cell can be easily transferred to scanned documents. In Figure 2, we show the cells obtained from an example PDF page after the parsing stage.", - "text-hash": 13908173772261346000, + "text_hash": 13908173772261346000, "type": "paragraph" }, { @@ -97943,7 +97943,7 @@ "sref": "#/texts/30", "subj_hash": 16957857111665886816, "text": "While the task of finding the cells might appear intuitive from a conceptual point of view, it is not in practice, since there does not exist a unique, precise definition of the cells. This lack of a precise definition has its origins not only in the ISO-standard 8 detailing the PDF document code but also in the variability of the quality of PDFs. Older PDFs which were created from scanned images using OCR typically return cells for each word, while more recent PDFs allow us to create cells for full text-lines. This variability in the geometric features of the cell (e.g. the width of the cell) can negatively impact the performance of later machine learning models. As a consequence, we reduce the variability of the geometric features as much as possible. The more consistent and homogeneous the geometric features of a cell are, the better the machine learning algorithms can do predictions.", - "text-hash": 9481411723883903182, + "text_hash": 9481411723883903182, "type": "paragraph" }, { @@ -97957,7 +97957,7 @@ "sref": "#/texts/31", "subj_hash": 10390915169360946497, "text": "For programmatic PDFs, the text cells are contructed from raw streams of symbols and transforms defined in the PDF document. This operation relies on the iterators provided by the QPDF library$^{9}$.", - "text-hash": 11149022357700220845, + "text_hash": 11149022357700220845, "type": "paragraph" }, { @@ -97971,7 +97971,7 @@ "sref": "#/texts/32", "subj_hash": 15254383206256494278, "text": "For scanned PDFs, we use a two step approach to find the cells by first running all bitmap resources in the PDF through an OCR engine and then merging the extracted text-snippets from the images with the remaining cells from the programmatically created content. Eventually, all the created cells and line paths are stored in an internal JSON format, which also keeps references to the bitmap resources embedded in the PDF document. From this point, all further processing does not need to distinguish between scanned or programmatic sources.", - "text-hash": 6573226034038831156, + "text_hash": 6573226034038831156, "type": "paragraph" }, { @@ -97985,7 +97985,7 @@ "sref": "#/texts/33", "subj_hash": 17759618186065566858, "text": "3.3 Ground-truth gathering through human-annotation", - "text-hash": 8679681341332585960, + "text_hash": 8679681341332585960, "type": "subtitle-level-1" }, { @@ -97999,7 +97999,7 @@ "sref": "#/texts/34", "subj_hash": 11638821473906997927, "text": "In this component, we collect ground-truth for the custom machine learning models to be trained on. Representative ground-truth data is of paramount importance to obtain machine learned models with excellent recall and precision. Unfortunately, it is often very hard to obtain lots of representative ground-truth data, primarily due the the enormous variability across the layout of documents. As a consequence, the concept of annotators for documents were incorporated into the platform from the very beginning. The purpose of these annotators is two-fold.", - "text-hash": 14503768930839698451, + "text_hash": 14503768930839698451, "type": "paragraph" }, { @@ -98013,7 +98013,7 @@ "sref": "#/texts/35", "subj_hash": 13020065077657899116, "text": "First and foremost, the annotators on the platform allow us to gather ground-truth at scale using a crowd-sourcing approach. In each annotation task, we retrieve the original PDF page and its associated parsed components, containing the cells (see Figure 2). We then ask the (human) annotator to assign each cell a layout semantic label. Examples of semantic labels are: Title, Abstract, Authors, Subtitle, Text, Table, Figure, List, etc$^{10}$. In the annotator tool, each layout semantic label is visually represented by a colour. By assigning a colour to each semantic label, the task of semantic annotation is translated into a colouring-task, as can be seen in Figure 3. Since humans are very efficient in visual recognition, this task comes very natural to us. The required time spent to annotate a single page starting from the parsing output has shown to average at 30 seconds over various annotation campaigns.", - "text-hash": 13130850271187616458, + "text_hash": 13130850271187616458, "type": "paragraph" }, { @@ -98027,7 +98027,7 @@ "sref": "#/texts/36", "subj_hash": 10103841011442966464, "text": "The second purpose of the annotators is to visually inspect the quality of our machine learned models. The goal of the models is to emulate the action of the annotators, i.e. to assign a layout semantic label to each cell. Clearly, the result of a prediction for each page can therefore be displayed as if it were an annotated page. This allows the users to directly inspect the results of the models on unseen pages. A direct consequence of this inspection capability in the annotators is that the annotation task can be transformed easily into a correction task, i.e. the human annotators only need to correct the incorrectly predicted labels. Of course, as the models become better over time, the number of corrections needed to be made become less and less. This allows us to significantly reduce the annotation time per document. Since annotations are typically created by professionals with a high hourly rate, the colouring technique allowed us to significantly reduce the cost of ground-truth gathering.", - "text-hash": 11435379797753757998, + "text_hash": 11435379797753757998, "type": "paragraph" }, { @@ -98041,7 +98041,7 @@ "sref": "#/texts/37", "subj_hash": 10982401368140758581, "text": "In Figure 3, we show the annotation-rate in number-of-annotatedpages per minute. The vertical red lines indicate that a training was performed on the annotated pages, and a new, improved model is", - "text-hash": 10548529097098469537, + "text_hash": 10548529097098469537, "type": "paragraph" }, { @@ -98055,7 +98055,7 @@ "sref": "#/texts/38", "subj_hash": 887751753527930563, "text": "used from that point to predict the labels. Since the corrections become less and less, the rate of annotation goes up. It is needless to say that this inter-leaving of training models (based on annotated ground-truth) and annotation benefits directly from our platform approach, since each task (submitting page-annotations, training the model, applying the model for predicting the labels) comes down to an asynchronous call to a microservice. The accelerated annotation leads to a speed-up of a factor of 10 for ground-truth collection.", - "text-hash": 2205427981859754031, + "text_hash": 2205427981859754031, "type": "paragraph" }, { @@ -98069,7 +98069,7 @@ "sref": "#/texts/39", "subj_hash": 4695688617288377564, "text": "3.4 Machine Learning: Training models & Applying models", - "text-hash": 16834670239362291258, + "text_hash": 16834670239362291258, "type": "subtitle-level-1" }, { @@ -98083,7 +98083,7 @@ "sref": "#/texts/40", "subj_hash": 3275001812318455279, "text": "In the CCS, there are essentially two types of machine-learning models. On the one hand, we have default models, which are designed to be layout independent. They take a raster image of the page to identify and locate basic objects, such as tables, figures, formulas, etc. On the other hand, we also support the training of custom, templatespecific models, which are designed to specialize on a particular layout template and allow us to convert and extract the data out of documents with very high precision and recall. They will classify each cell in the page with regard to their layout semantic label.", - "text-hash": 4429706140044408651, + "text_hash": 4429706140044408651, "type": "paragraph" }, { @@ -98097,7 +98097,7 @@ "sref": "#/texts/41", "subj_hash": 15354930767839681193, "text": "3.4.1 Metrics. Before discussing the performance of the models, let us first define the precision and recall metrics used to evaluate the results. The first observation is that the output of a machine learned model is exactly the same of what a human annotator would produce, i.e. it will assign a text cell a semantic label. The correctness of this label is what we aim to measure with the recall and precision metrics. The second observation is that we deal with a", - "text-hash": 6184852591532473349, + "text_hash": 6184852591532473349, "type": "paragraph" }, { @@ -98111,7 +98111,7 @@ "sref": "#/texts/42", "subj_hash": 6337233386759158728, "text": "multi-class classification problem, i.e. we don't have only two labels, but many possible semantic labels, hence the performance result will be the average of the recall and precision for each label.", - "text-hash": 15490331838172880166, + "text_hash": 15490331838172880166, "type": "paragraph" }, { @@ -98125,7 +98125,7 @@ "sref": "#/texts/43", "subj_hash": 2249972239307071508, "text": "The recall (= \u211b) and precision (= \ud835\udcab) for a given label on a page is defined by the standard formulas", - "text-hash": 1131271437908497026, + "text_hash": 1131271437908497026, "type": "paragraph" }, { @@ -98139,7 +98139,7 @@ "sref": "#/texts/44", "subj_hash": 12383805870947794174, "text": "\u211b = t$_{p}$ t$_{p}$ + f$_{p}$ , \ud835\udcab = t$_{p}$ t$_{p}$ + f$_{n}$ , (1)", - "text-hash": 14055366495763095132, + "text_hash": 14055366495763095132, "type": "equation" }, { @@ -98153,7 +98153,7 @@ "sref": "#/texts/45", "subj_hash": 7053654953998543393, "text": "where t$_{p}$, f$_{p}$ and f$_{n}$ represent respectively true positive, false positive and false negative predicted labels.", - "text-hash": 642098605774556301, + "text_hash": 642098605774556301, "type": "paragraph" }, { @@ -98167,7 +98167,7 @@ "sref": "#/texts/46", "subj_hash": 15921044595687116426, "text": "3.4.2 Default Models. The aim of the default models is to identify specific, ubiquitous objects in documents. Examples of such objects are tables, figures with their captions, mathematical formulas, etc. Due to the high variability in both the document layout as well as in the representation of these objects, we need very robust object detection methods. Currently, the most robust methods for detecting objects are deep neural networks such as R-CNNs (and their derivatives Fast-and Faster-R-CNN) [5, 6, 10], the YOLO architecture [8, 9] and the SSD networks [7]. On our platform, we have the Faster-R-CNN [10] and the YOLOv2 [9] networks available as individual microservices, both for training and predictions.", - "text-hash": 5618307884355612648, + "text_hash": 5618307884355612648, "type": "paragraph" }, { @@ -98181,7 +98181,7 @@ "sref": "#/texts/47", "subj_hash": 12234068400463628788, "text": "In this paper, we will focus only on the detection of table objects, but the same principles described in the following analysis are also applied for other type of objects.", - "text-hash": 13907813772802190178, + "text_hash": 13907813772802190178, "type": "paragraph" }, { @@ -98195,7 +98195,7 @@ "sref": "#/texts/48", "subj_hash": 4628466594790006384, "text": "The networks available on our platform have been trained on arXiv data$^{11}$. We have annotated 30000 PDF pages and know the", - "text-hash": 16911352314006995166, + "text_hash": 16911352314006995166, "type": "paragraph" }, { @@ -98209,7 +98209,7 @@ "sref": "#/texts/49", "subj_hash": 9651706913678711778, "text": "location of at least one table on each page. From these 30000 pages, we have used 25000 pages as training data and kept the other 5000 pages for evaluation. Due to the large size of the dataset, we did not need to employ any data-augmentation technique, which is usually necessary for object-detection or image-classification algorithms.", - "text-hash": 11888191065829014864, + "text_hash": 11888191065829014864, "type": "paragraph" }, { @@ -98223,7 +98223,7 @@ "sref": "#/texts/50", "subj_hash": 1363251178266051349, "text": "We do not locate the table directly on the image of the original PDF page but rather on an image representation of the parsed PDF page with cell boxes. The reasoning behind this is to reduce the variability between all input PDF pages as much as possible and thus increase the effectiveness of the deep neural networks. An example of such an image can be seen in Figure 5. The red bounding boxes around the tables are a result of the prediction using YOLOv2 and are absent in the image on which the model predicts. Note that the visualisation of the text cells visible in Figure 5 does not include any text of the original document, but only its geometrical definition. This is important when one compares for example Asian documents with Japanese, Chinese or Korean characters versus European languages with the roman alphabet. We do not want the deep neural network to focus on the specific characters, but rather on the layout of the cells in the page.", - "text-hash": 2009046567395259777, + "text_hash": 2009046567395259777, "type": "paragraph" }, { @@ -98237,7 +98237,7 @@ "sref": "#/texts/51", "subj_hash": 18259197018396996238, "text": "Let us now discuss both deep neural network training microservices on the platform. In Table 1, we show the time-to-solution for training and predicting a single page as well as the performance in terms of recall and precision. In the training phase, we ensure that both algorithms ran each 100 epochs, i.e. all 25000 page images were fed to the network 100 times. We observe that the out-ofthe-box Faster R-CNN from Tensorflow does not implement any batching during the training phase, while YOLOv2 batches 8 images at a time, thanks to an image resizing which is automatically applied. We believe that this is the main origin for the discrepancy of time-to-solution for the training phase. The same holds true for the prediction. Therefore, from the point of view of the platform, the YOLOv2 architecture seems better suited for deployment, as it allows to have a much higher throughput (\u2248 10 pages/sec/node).", - "text-hash": 7883278994224882668, + "text_hash": 7883278994224882668, "type": "paragraph" }, { @@ -98251,7 +98251,7 @@ "sref": "#/texts/52", "subj_hash": 14663676516964431047, "text": "For the performance analysis, let us outline one pre-processing stage which is needed before computing the metrics described previously. The object-detection networks predict a set of bounding boxes with a confidence level between 0 and 1. We use these bounding boxes to associate with each cell a label, which is in this particular case either Table or Not-Table, depending on whether they overlap", - "text-hash": 7164504172498806323, + "text_hash": 7164504172498806323, "type": "paragraph" }, { @@ -98265,7 +98265,7 @@ "sref": "#/texts/53", "subj_hash": 4577067829072175096, "text": "Table 2: Performance results for the template specific model of the Physical Review B journals. The confusion matrix highlights the huge imbalance between the number of text cells with different labels. The usage of ensemble machine learning methods allows to achieve a very high accuracy over all label types.", - "text-hash": 3406859306294395222, + "text_hash": 3406859306294395222, "type": "paragraph" }, { @@ -98279,7 +98279,7 @@ "sref": "#/texts/54", "subj_hash": 2569392033451362672, "text": "with the predicted bounding box. The corresponding recall and precision are then computed for this dual-class classification problem. In order to do a fair comparison of the two networks, we optimise the precision and recall metrics with regard to the predicted confidence. For YOLOv2 we observe that the recall goes down and the precision goes up as the confidence is increased, obtaining a maximum F1 score of 98.7% at a confidence level of 0. 5. The Faster R-CNN method is also performing quite well, but has slightly lower precision and recall numbers. We believe this originates from the selective search algorithm which is used to determine regions of interest. The images we feed it are not typical photographic images (made with a camera) but layout visualisations. The selective search algorithm in Faster R-CNN might not be optimal for such type of objects.", - "text-hash": 5414143675771382750, + "text_hash": 5414143675771382750, "type": "paragraph" }, { @@ -98293,7 +98293,7 @@ "sref": "#/texts/55", "subj_hash": 14539041145469267811, "text": "3.4.3 Template specific Models. The goal of template specific models is to obtain a better extraction quality by specializing the model on a specific template. This is necessary in many technical fields, where the accuracy of the extracted data is of paramount importance. Furthermore, many technical documents in a specific field typically appear in a certain template and it often makes sense to take advantage of this template to improve extraction quality.", - "text-hash": 6991735551340401103, + "text_hash": 6991735551340401103, "type": "paragraph" }, { @@ -98307,7 +98307,7 @@ "sref": "#/texts/56", "subj_hash": 8607014065143641201, "text": "For an algorithm to fit in the interactive platform design we identified a few key requirements. First, it is crucial that the model can generate good results with a limited set of pages. In practice this means the algorithm needs to perform well for 100-400 annotated pages, or the equivalent of a couple of man-hours for annotation. Second it must be robust against extreme imbalance of the labeled data. It is clear that cells of the label Title will be much more uncommon than cells with the label of Text. Last, the model needs to be very quick in training and predicting, since it will support the interactive annotation process.", - "text-hash": 17832237182951286493, + "text_hash": 17832237182951286493, "type": "paragraph" }, { @@ -98321,7 +98321,7 @@ "sref": "#/texts/57", "subj_hash": 1994904537764312371, "text": "For these reasons, we chose random forest [2] as a machine learning algorithm for template specific models. Random forest algorithms are known to be trained fast and can produce very accurate results on limited, but relatively structured data. In our case,", - "text-hash": 1377511684573734815, + "text_hash": 1377511684573734815, "type": "paragraph" }, { @@ -98335,7 +98335,7 @@ "sref": "#/texts/58", "subj_hash": 7742256726079628058, "text": "this structure originates of course from the template. Furthermore, random forest is an ensemble method, meaning that they learn on the distribution function of the features, and not individual dataelements. As a consequence, they are typically more robust against imbalance of the labeled data, since the distribution functions are renormalised.", - "text-hash": 250119056806139256, + "text_hash": 250119056806139256, "type": "paragraph" }, { @@ -98349,7 +98349,7 @@ "sref": "#/texts/59", "subj_hash": 8810233123818174294, "text": "The random forest method is applied to each cell of the page based on a feature vector representing all of its properties. For example, the feature vector contains information as the page number, the size of the text cell, its position, as well as the distance from the neighbouring cells. Additionally to pure geometrical information we include the text style (normal, italic, or bold) and some text statistics, as the fraction of numeric characters. We then improve the obtained results by performing subsequent iterations with other random forest methods, which operate on an enlarged feature space including the previously predicted labels of the neighbourhood around the current cell.", - "text-hash": 17619932035192809924, + "text_hash": 17619932035192809924, "type": "paragraph" }, { @@ -98363,7 +98363,7 @@ "sref": "#/texts/60", "subj_hash": 16446711449286912460, "text": "It is important to realize that almost all of these features are purely geometrical. This allows us to apply exactly the same machine learning methods on both scanned and programmatic PDF documents.", - "text-hash": 9704353849744984874, + "text_hash": 9704353849744984874, "type": "paragraph" }, { @@ -98377,7 +98377,7 @@ "sref": "#/texts/61", "subj_hash": 9558434107504657973, "text": "In Table 2, we illustrate the performance results of the models for a particular scientific journal, Physical Review B$^{12}$. We randomly chose 100 open-access papers and annotated 400 pages of them with 6 semantic labels. Tables 2 shows the confusion matrix between the true and the predicted labels as well as the derived recall and precision metrics for each label. We observe that the recall and precision numbers are excellent, with most of them above 99%. This is not surprising, since we are building models that specialise for a particular template.", - "text-hash": 11971893452237256865, + "text_hash": 11971893452237256865, "type": "paragraph" }, { @@ -98391,7 +98391,7 @@ "sref": "#/texts/62", "subj_hash": 18349896906192842040, "text": "Moreover, the same ML algorithm proves to perform very well on different document templates, as is evident from the numbers shown in Table 3, simply by providing it with different datasets to train on. The latter is the power of our platform: we can re-use the same machine-learning algorithm to generate different models solely based on the data gathered by the annotation on the platform. We do not need to define rules and heuristics or update code in order to deal with new types of documents. We only need to gather more data.", - "text-hash": 8080940474762743702, + "text_hash": 8080940474762743702, "type": "paragraph" }, { @@ -98405,7 +98405,7 @@ "sref": "#/texts/63", "subj_hash": 10082834006373808153, "text": "3.5 Assembly", - "text-hash": 11736313095563614837, + "text_hash": 11736313095563614837, "type": "subtitle-level-1" }, { @@ -98419,7 +98419,7 @@ "sref": "#/texts/64", "subj_hash": 15253541252152665681, "text": "In this component, we build a structured data file in JSON or XML format, which contains all the text and objects (e.g. tables) from the original document, retaining the layout semantics. This structured data file is constructed by assembling all the cells from the parsed file", - "text-hash": 6565628665194191037, + "text_hash": 6565628665194191037, "type": "paragraph" }, { @@ -98433,7 +98433,7 @@ "sref": "#/texts/65", "subj_hash": 3904142170608486950, "text": "Listing 1: Excerpt from the JSON output of the Corpus Conversion Service after conversion of this paper.", - "text-hash": 4079383948124449940, + "text_hash": 4079383948124449940, "type": "paragraph" }, { @@ -98447,7 +98447,7 @@ "sref": "#/texts/66", "subj_hash": 6410818076508661508, "text": "{ 'description ': { 'title ': 'Corpus Conversion Service: A machine learning platform to ingest documents at scale. ',, \u2192 'abstract ': 'Over the past few decades, the amount of scientific articles [...] ',, \u2192 'affiliations ': 'IBM Research Rueschlikon, Switzerland ', 'authors ': 'Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas ', \u2192 }, 'main-text ': [{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...], 'tables ': [{...},...], 'images ': [{...},...] }", - "text-hash": 15129105844666734962, + "text_hash": 15129105844666734962, "type": "paragraph" }, { @@ -98461,7 +98461,7 @@ "sref": "#/texts/67", "subj_hash": 12813875992986832439, "text": "in combination with their associated predicted (or human-annotated) layout semantic labels. It should be noted that no machine learning is used in this component. It is purely rule based and therefore completely deterministic.", - "text-hash": 13337022012432085155, + "text_hash": 13337022012432085155, "type": "paragraph" }, { @@ -98475,7 +98475,7 @@ "sref": "#/texts/68", "subj_hash": 11030869010407626539, "text": "The assembly phase is a two step process. First, one gathers all the cells with their associated layout semantic label and sorts them according to reading order. Then, the text of all cells that have the same label is contracted into a temporary document objects. Third, we build the internal structure of the temporary document objects, based on the information provided by the models. The latter is only applicable for internally structured objects, such as tables. An example of the generated JSON output is shown in Listing 1.", - "text-hash": 10508897272021404039, + "text_hash": 10508897272021404039, "type": "paragraph" }, { @@ -98489,7 +98489,7 @@ "sref": "#/texts/69", "subj_hash": 2142320548375900929, "text": "4 ARCHITECTURE AND ORCHESTRATION OF CLOUD BASED MICROSERVICES", - "text-hash": 950718827856471405, + "text_hash": 950718827856471405, "type": "subtitle-level-1" }, { @@ -98503,7 +98503,7 @@ "sref": "#/texts/70", "subj_hash": 12747011194397783283, "text": "In this section, we describe how the microservices in each of the components of the platform are deployed and orchestrated. Before discussing the technical details, we would like to point out our requirements for the architecture of the platform. These requirements are all related to scaling. Specifically, we would like the platform to scale with the number of documents, the number of users and last but not least the number of cloud based compute resources. In other words, we want a service that can ingest millions of documents, serve potentially thousands of users and scale its compute resources such that the time-to-solution is reasonable at all times for any operation. It is clear that the architecture of such a service is heavily influenced by these requirements.", - "text-hash": 13395059553653450335, + "text_hash": 13395059553653450335, "type": "paragraph" }, { @@ -98517,7 +98517,7 @@ "sref": "#/texts/71", "subj_hash": 174789262945188010, "text": "4.1 Platform layers", - "text-hash": 3197077882590976520, + "text_hash": 3197077882590976520, "type": "subtitle-level-1" }, { @@ -98531,7 +98531,7 @@ "sref": "#/texts/72", "subj_hash": 7228893318503650455, "text": "In Figure 1, we have shown a diagram of our pipeline on the platform to process documents. In Figure 6, we show a sketch of its", - "text-hash": 475277818666452483, + "text_hash": 475277818666452483, "type": "paragraph" }, { @@ -98545,7 +98545,7 @@ "sref": "#/texts/73", "subj_hash": 9230667184712205690, "text": "architecture. As one can observe, we have grouped the service into four layers. These layers are:", - "text-hash": 12309253064221915096, + "text_hash": 12309253064221915096, "type": "paragraph" }, { @@ -98559,7 +98559,7 @@ "sref": "#/texts/74", "subj_hash": 17419815751432442882, "text": "(1) An interface layer which implements a REST-API and a user frontend: The user frontend is an AngularJS application build on top of the REST-API and implements the annotators for ground-truth gathering. The REST-API is built and documented using the OpenAPI specifications 13 and is implemented in Python.", - "text-hash": 8731693174932948592, + "text_hash": 8731693174932948592, "type": "paragraph" }, { @@ -98573,7 +98573,7 @@ "sref": "#/texts/75", "subj_hash": 11194226403360998426, "text": "(2) An orchestration layer that schedules the tasks for the microservices, stores their execution status and final result. The task scheduling is done with the Message Broker RabbitMQ$^{14}$. The results are stored in the in-memory data store Redis$^{15}$. In order to perform certain consecutive tasks (e. g. parsing a PDF page with embedded scanned images requires first a parsing of the programmatic PDF page to extract the images and then an OCR service to extract the cells from these images) we can directly chain tasks, such that subsequent steps are only executed if the previous terminated successfully. This approach allows for a very robust, fault-tolerant service with very little downtime.", - "text-hash": 10633901501381588600, + "text_hash": 10633901501381588600, "type": "paragraph" }, { @@ -98587,7 +98587,7 @@ "sref": "#/texts/76", "subj_hash": 9005324696118733701, "text": "(3) A compute layer that implements the microservices detailed in section 3: Each of the workers in this layer executes the available microservices (e.g. parsing, training, predictions, assembly, etc). In order to scale with regard to resources, we have encapsulated each microservice into a distributed task queue using the Celery library$^{16}$. This allows us to dynamically scale the compute resources, since each worker can be spawned automatically on the cluster and register itself to the broker. The workers are not only consumers of tasks, but may also produce new ones. This is the case for the requests", - "text-hash": 17146307233289309425, + "text_hash": 17146307233289309425, "type": "paragraph" }, { @@ -98601,7 +98601,7 @@ "sref": "#/texts/77", "subj_hash": 8082547756621048511, "text": "operating on the whole corpus. Whenever possible we parallelise the compute-heavy operations at the page (or document) level.", - "text-hash": 18059523399368641563, + "text_hash": 18059523399368641563, "type": "paragraph" }, { @@ -98615,7 +98615,7 @@ "sref": "#/texts/78", "subj_hash": 7791113385466815951, "text": "(4) A storage layer that stores all documents as well as the results from the microservices: The storage layer is composed out of two services: an object-store that stores all documents and processed stages (e. g. the parsed PDF pages, trained models, etc) and a queryable NoSQL database that stores the metadata of each file in the object-store. The object-store allows us to easily scale the storage with regard to the number of processed documents. However, it is not build to be queried efficiently, which is why we put a NoSQL database (in our case we use MongoDB$^{17}$) on top to manage the storage and act as an access-layer.", - "text-hash": 18360382746077681451, + "text_hash": 18360382746077681451, "type": "paragraph" }, { @@ -98629,7 +98629,7 @@ "sref": "#/texts/79", "subj_hash": 2845012065511066307, "text": "By design, all the microservices in the compute layer are stateless, i.e. they don't manage any data, but only operate on it. This allows us to trust the additional stability and data safety concerns to the state-of-the-art tools that we have chosen, such as MongoDB, Redis and RabbitMQ. Being a cloud-based platform, our solution allows for these software assets to be detached from the main deployment and to be served by specialised vendors services which are certified to the latest industry requirements such as data-at-rest encryption, high availability, etc.", - "text-hash": 5147922161190726703, + "text_hash": 5147922161190726703, "type": "paragraph" }, { @@ -98643,7 +98643,7 @@ "sref": "#/texts/80", "subj_hash": 15072914837937068796, "text": "The choice of the services plays also a crucial role in addressing the scaling requirements for the platform. From the sketch (Fig. 6), it is clear that the compute layer has a considerable amount of communication with these external services. During the development we evaluated multiple options and, e. g. we had to replace some services because of inadequate performance or scaling bottlenecks. For example other result-backends didn't offer the auto-cleaning functionality offered by Redis and, before opting for a custom solution mixing MongoDB with an object storage, we evaluated other solutions as", - "text-hash": 6457975667604208730, + "text_hash": 6457975667604208730, "type": "paragraph" }, { @@ -98657,7 +98657,7 @@ "sref": "#/texts/81", "subj_hash": 15263283599394646155, "text": "the GridFS storage, but it didn't fit to the constraints of typical cloud environments.", - "text-hash": 6564180200469858791, + "text_hash": 6564180200469858791, "type": "paragraph" }, { @@ -98671,7 +98671,7 @@ "sref": "#/texts/82", "subj_hash": 11417717357379295278, "text": "4.2 Deployment", - "text-hash": 10410411375713696396, + "text_hash": 10410411375713696396, "type": "subtitle-level-1" }, { @@ -98685,7 +98685,7 @@ "sref": "#/texts/83", "subj_hash": 9031137420247852045, "text": "Our platform is deployable on Kubernetes clusters 18 available on many cloud providers or even on-premise installations, e. g. using the IBM Cloud Private 19 distribution. Depending on the requirements, the storage services are launched inside the same cluster or linked to externally hosted endpoints.", - "text-hash": 17120327512656828009, + "text_hash": 17120327512656828009, "type": "paragraph" }, { @@ -98699,7 +98699,7 @@ "sref": "#/texts/84", "subj_hash": 18436578077535696718, "text": "The common parts of all deployments are the interface and the compute layer. The compute layer is designed for dynamically adapt the number of resources on the current load. For example, more parsing-microservice instances could be spawned when a large document is uploaded and they can automatically scaled down at the end of the task, such that the resources are free for other components, like training and assembling the processed documents.", - "text-hash": 8003240278028347820, + "text_hash": 8003240278028347820, "type": "paragraph" }, { @@ -98713,7 +98713,7 @@ "sref": "#/texts/85", "subj_hash": 11734907767490759865, "text": "The components running in the compute layer are further organized in different queues, such that we can control the fraction of resources allocated for each different component depending on their computational requirements. The parse component is indeed more demanding than the simple annotation components.", - "text-hash": 14704352826439757333, + "text_hash": 14704352826439757333, "type": "paragraph" }, { @@ -98727,7 +98727,7 @@ "sref": "#/texts/86", "subj_hash": 7845460979782401889, "text": "Currently, our main system operates on 5 Kubernetes nodes with 4 CPU cores and 8 GB of main memory each, and additionally one POWER 8 node with four GPUs is dedicated to the deep learning training and prediction tasks. Here, the flexible binding of microservices to specific nodes is a great advantage of the Kubernetes deployment. Moreover, 5 other virtual machines are employed to host the services in the orchestration and store layer.", - "text-hash": 18296438351865061837, + "text_hash": 18296438351865061837, "type": "paragraph" }, { @@ -98741,7 +98741,7 @@ "sref": "#/texts/87", "subj_hash": 17769988780693768120, "text": "4.3 Scaling benchmarks", - "text-hash": 8669715371308316950, + "text_hash": 8669715371308316950, "type": "subtitle-level-1" }, { @@ -98755,7 +98755,7 @@ "sref": "#/texts/88", "subj_hash": 12387489643011067991, "text": "Let us now discuss some scaling results on our platform. As we pointed out in the beginning of the section, our requirements for the platform were scaling with regard to the number of users, the number of processed documents and compute resources. In Figure 7, we show the number of users and the number of processed PDF", - "text-hash": 14043220598855238339, + "text_hash": 14043220598855238339, "type": "paragraph" }, { @@ -98769,7 +98769,7 @@ "sref": "#/texts/89", "subj_hash": 10375772475809458895, "text": "pages 20 as a function of time. As one can see, the number of users and processed PDF pages has been increasing steadily over time since the launch of our service in April 2017. It is however interesting to see that there are sharp steps, indicating that some users have been uploading massive amounts of documents into the service in a very small amount of time. Due to our design, it was not a problem to accommodate these peaks and our service was able to handle these short burst of extreme activity.", - "text-hash": 11451664978555915307, + "text_hash": 11451664978555915307, "type": "paragraph" }, { @@ -98783,7 +98783,7 @@ "sref": "#/texts/90", "subj_hash": 7054726458191881751, "text": "In Figure 8, we show the scaling of the three main pipeline microservices (i.e. the parsing of PDF documents, applying machine learned models and conversion of documents to JSON) on the platform with regard to compute resources. We show this scaling by displaying the speedup versus the number of worker nodes available. Here, we chose to have four workers serving each pipeline microservice, since each worker is running on a node with four cores. As one can observe, the speedup in the parse and ML apply tasks scales linearly with the the number of workers, and thus the nodes. Notably, we can even observe a slightly better-than-linear speedup, which appears due to bandwidth constraints on the baseline with one worker. The speedup on the assemble tasks, in comparison, flattens off sooner, as this task can only be parallelised on the document and not on the page level. The variability in the length of documents is reflected in a load imbalance between the worker nodes, however this averages out with sufficiently large corpus sizes. Consequently, we are able to scale the compute resources in order to keep the time-to-solution constant for any job-size.", - "text-hash": 641132783909312643, + "text_hash": 641132783909312643, "type": "paragraph" }, { @@ -98797,7 +98797,7 @@ "sref": "#/texts/91", "subj_hash": 7794115281016062068, "text": "5 CONCLUSION", - "text-hash": 18347902420476900066, + "text_hash": 18347902420476900066, "type": "subtitle-level-1" }, { @@ -98811,7 +98811,7 @@ "sref": "#/texts/92", "subj_hash": 7038163015905900647, "text": "We have presented a scalable, cloud based platform, which can ingest, parse and annotate documents, and particularly, train & apply advanced machine learning models in order to extract the content of the ingested documents and convert it into a structured data representation.", - "text-hash": 657005981473069779, + "text_hash": 657005981473069779, "type": "paragraph" }, { @@ -98825,7 +98825,7 @@ "sref": "#/texts/93", "subj_hash": 1508626318915838319, "text": "The fundamental design choices in our solution have proven to enable scaling in three elementary ways. First, it can service multiple users concurrently. Second, it can ingest, parse and apply machine learned models on many documents at the same time. Third, it can scale its compute resources for different tasks on the platform according to their respective load so the conversion of documents on the platform is at all times bounded in time, given enough resources.", - "text-hash": 1575427749670982603, + "text_hash": 1575427749670982603, "type": "paragraph" }, { @@ -98839,7 +98839,7 @@ "sref": "#/texts/94", "subj_hash": 17247086344435786796, "text": "In the future, we plan to extend the platform in two major areas. First, we would like to extend the number of microservices, especially with regard to image understanding. The number of types of images is enormous (e.g. line & scatterplot, histograms, pie-charts, geographic maps, etc). The goal here would be to extract the data out of these individual type of images after a successful identification with an image-classifier. Second, we would like to improve the quality and performance of our default models. We strongly believe that the results can be greatly improved since the neural networks we currently use are optimised for photographic images, and not images of parsed document pages (as is shown in Figure 5). To leverage this growing use of deep learning models, we will additionally introduce", - "text-hash": 9192771730962863754, + "text_hash": 9192771730962863754, "type": "paragraph" }, { @@ -98853,7 +98853,7 @@ "sref": "#/texts/95", "subj_hash": 10287541089279789496, "text": "specialised data-parallelism in order to speed up the training and provide interactive user-customisation capabilities.", - "text-hash": 11530911151361059606, + "text_hash": 11530911151361059606, "type": "paragraph" }, { @@ -98867,7 +98867,7 @@ "sref": "#/texts/96", "subj_hash": 7819882792760965882, "text": "ACKNOWLEDGMENTS", - "text-hash": 18322720810464861272, + "text_hash": 18322720810464861272, "type": "subtitle-level-1" }, { @@ -98881,7 +98881,7 @@ "sref": "#/texts/97", "subj_hash": 15983582675278266440, "text": "The authors would like to thank Roxana Istrate and Matthieu Mottet for their contribution to the development of the CCS system.", - "text-hash": 5556222901900980902, + "text_hash": 5556222901900980902, "type": "paragraph" }, { @@ -98895,7 +98895,7 @@ "sref": "#/texts/98", "subj_hash": 12711351442546714716, "text": "This work was supported by the NCCR MARVEL (http://nccr-marvel. ch), funded by the Swiss National Science Foundation. MD was supported by the FORCE project, funded by Horizon 2020 under NMBP-23-2016 call with Grant agreement number 721027 (http://the-force-project.eu).", - "text-hash": 13431247303555599034, + "text_hash": 13431247303555599034, "type": "paragraph" }, { @@ -98909,7 +98909,7 @@ "sref": "#/texts/99", "subj_hash": 1225384713519841338, "text": "REFERENCES", - "text-hash": 1858797456585454232, + "text_hash": 1858797456585454232, "type": "subtitle-level-1" }, { @@ -98923,7 +98923,7 @@ "sref": "#/texts/100", "subj_hash": 1712774266196702392, "text": "[1] A. Antonacopoulos, C. Clausner, C. Papadopoulos, and S. Pletschacher. 2015. ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015. In Proceedings of the 13th International Conference on Document Analysis and Recognition (ICDAR2015). Nancy, 1151-1155.", - "text-hash": 1659105420801451542, + "text_hash": 1659105420801451542, "type": "paragraph" }, { @@ -98937,7 +98937,7 @@ "sref": "#/texts/101", "subj_hash": 14718288547983000340, "text": "[2] Leo Breiman. 2001. Random Forests. Machine Learning 45, 1 (01 Oct 2001), 5-32. https://doi.org/10.1023/A:1010933404324", - "text-hash": 6812664208788567426, + "text_hash": 6812664208788567426, "type": "paragraph" }, { @@ -98951,7 +98951,7 @@ "sref": "#/texts/102", "subj_hash": 16943780574244090186, "text": "[3] R. Cattoni, T. Coianiz, S. Messelodi, and C. M. Modena. 1998. Geometric layout analysis techniques for document image understanding: a review. Technical Report.", - "text-hash": 9486476535199015848, + "text_hash": 9486476535199015848, "type": "paragraph" }, { @@ -98965,7 +98965,7 @@ "sref": "#/texts/103", "subj_hash": 8004985786049140169, "text": "[4] Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier. 2005. From Legacy Documents to XML: A Conversion Framework. Springer Berlin Heidelberg, Berlin, Heidelberg, 92-103. https://doi.org/10.1007/11551362_9", - "text-hash": 18434854666592634661, + "text_hash": 18434854666592634661, "type": "paragraph" }, { @@ -98979,7 +98979,7 @@ "sref": "#/texts/104", "subj_hash": 12744546813104546377, "text": "[5] Ross Girshick. 2015. Fast R-CNN. In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15). IEEE Computer Society, Washington, DC, USA, 1440-1448. https://doi.org/10.1109/ICCV.2015.169", - "text-hash": 13406949228208477349, + "text_hash": 13406949228208477349, "type": "paragraph" }, { @@ -98993,7 +98993,7 @@ "sref": "#/texts/105", "subj_hash": 16061746189176848219, "text": "[6] Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik. 2013. Rich feature hierarchies for accurate object detection and semantic segmentation. CoRR abs/1311.2524 (2013). arXiv:1311.2524 http://arxiv.org/abs/1311.2524", - "text-hash": 5756829059313082807, + "text_hash": 5756829059313082807, "type": "paragraph" }, { @@ -99007,7 +99007,7 @@ "sref": "#/texts/106", "subj_hash": 11872392946390819176, "text": "[7] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C. Berg. 2016. SSD: Single Shot MultiBox Detector. Springer International Publishing, Cham, 21-37. https://doi.org/10. 1007/978-3-319-46448-0_2", - "text-hash": 14270091870781297606, + "text_hash": 14270091870781297606, "type": "paragraph" }, { @@ -99021,7 +99021,7 @@ "sref": "#/texts/107", "subj_hash": 2956849475535726296, "text": "[8] Joseph Redmon, Santosh Kumar Divvala, Ross B. Girshick, and Ali Farhadi. 2016. You Only Look Once: Unified, Real-Time Object Detection. 2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR) (2016), 779-788.", - "text-hash": 4738468948628789302, + "text_hash": 4738468948628789302, "type": "paragraph" }, { @@ -99035,7 +99035,7 @@ "sref": "#/texts/108", "subj_hash": 6623297047995432604, "text": "[9] Joseph Redmon and Ali Farhadi. 2016. YOLO9000: Better, Faster, Stronger. arXiv preprint arXiv:1612.08242 (2016).", - "text-hash": 15195146357792776186, + "text_hash": 15195146357792776186, "type": "paragraph" }, { @@ -99049,7 +99049,7 @@ "sref": "#/texts/109", "subj_hash": 2507285765516108280, "text": "[10] Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun. 2015. Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks. In Advances in Neural Information Processing Systems 28, C. Cortes, N. D. Lawrence, D. D. Lee, M. Sugiyama, and R. Garnett (Eds.). Curran Associates, Inc., 91-99. http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks. pdf", - "text-hash": 5476658171803931478, + "text_hash": 5476658171803931478, "type": "paragraph" }, { @@ -99063,7 +99063,7 @@ "sref": "#/texts/110", "subj_hash": 14905276480471286920, "text": "[11] Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas. 2018. Corpus Conversion Service poster at the SysML conference. http://www.sysml.cc/doc/ 76.pdf", - "text-hash": 6922174983558886886, + "text_hash": 6922174983558886886, "type": "paragraph" } ] diff --git a/tests/data/docs/doc_01.nlp.json b/tests/data/docs/doc_01.nlp.json index 3756a87a..1c6d5450 100644 --- a/tests/data/docs/doc_01.nlp.json +++ b/tests/data/docs/doc_01.nlp.json @@ -669,7 +669,7 @@ "sref": "#/figures/0/captions/0", "subj_hash": 5929648907277899214, "text": "FIGURE1 Schematic of a data flow for the creation of a Knowledge Graph. The data flow consists of three main task types: extraction of document elements (abstracts, paragraphs, tables, figures, etc.), annotation of these elements to detect entities and their relationships and finally aggregation of these entities and their relationships. For every task, we keep complete provenance, such that we can always trace back to a specific document or element that embeds a certain entity or relationship", - "text-hash": 12816755167354360565, + "text_hash": 12816755167354360565, "type": "caption" } ], @@ -716,7 +716,7 @@ "sref": "#/figures/2/captions/0", "subj_hash": 13588295264109661534, "text": "FIGURE 3 The time-to-solution for k-hop graph traversal for Neo4J and our new graph engine. The results were obtained for the graph500 and twitter benchmark graphs. The 10th and 90th percentiles are represented by the shaded regions; the median is shown by the markers", - "text-hash": 9558113653035301733, + "text_hash": 9558113653035301733, "type": "caption" } ], @@ -747,7 +747,7 @@ "sref": "#/figures/3/captions/0", "subj_hash": 5867845979623066511, "text": "FIGURE 4 Visual workflow editor for deep queries in the CPS platform. The interface exhibits a left toolbar to pick specific graph operations, a main drawing area for the workflow DAG and a right panel to inspect and define parameters of each graph operation. Colors indicate different operation types such as input node-retrieval (blue), traversal (red), logical operators (green) and transform functions (yellow). Valid workflows can be executed using the ' play ' button", - "text-hash": 12590315652817418422, + "text_hash": 12590315652817418422, "type": "caption" } ], @@ -794,7 +794,7 @@ "sref": "#/figures/5/captions/0", "subj_hash": 3722064109667835816, "text": "FIGURE5 The architectural design of the CPS platform. On the left, we show the data flow processing architecture orchestrated through an asynchronous REST API. On the right, we sketch the multitenant KG serving facility which provides a dedicated environment for each project", - "text-hash": 1256907401557265619, + "text_hash": 1256907401557265619, "type": "caption" } ], @@ -825,7 +825,7 @@ "sref": "#/figures/6/captions/0", "subj_hash": 5492278710328857395, "text": "FIGURE 6 Sketch of the entire pipeline to perform deep data exploration on large corpora", - "text-hash": 10669134213704159562, + "text_hash": 10669134213704159562, "type": "caption" } ], @@ -856,7 +856,7 @@ "sref": "#/figures/7/captions/0", "subj_hash": 14119822239274862236, "text": "FIGURE 7 The evaluation workflow to identify the petroleum system elements (PSE) in an article and infer its properties. It starts by searching for all petroleum system elements of a certain type (eg, source, reservoir or seal) and a particular report (worktasks 1 and 2). By successive graph traversals (worktasks 3-5, 7-9, 11, 12) along specific edges and logical operations (worktasks 6, 10, 13, 14), we are able to obtain a list of candidate formations (worktask 15), ages (worktask 16) and rocks (worktask 17), ranked by their accumulated weight. Execution of this query takes less than 18 ms on average", - "text-hash": 2397375916393726887, + "text_hash": 2397375916393726887, "type": "paragraph" } ], @@ -985,7 +985,7 @@ "sref": "#/footnotes/0", "subj_hash": 4934591159529761265, "text": "This is an open access article under the terms of the Creative Commons Attribution License, which permits use, distribution and reproduction in any medium, provided the original work is properly cited.", - "text-hash": 11226800603937609484, + "text_hash": 11226800603937609484, "type": "footnote" }, { @@ -999,7 +999,7 @@ "sref": "#/footnotes/1", "subj_hash": 16070682594069297502, "text": "\u00a9 2020 The Authors. Applied AI Letters published by John Wiley & Sons Ltd.", - "text-hash": 2671219352918255461, + "text_hash": 2671219352918255461, "type": "footnote" } ], @@ -77044,7 +77044,7 @@ "sref": "#/page-footers/0", "subj_hash": 12400883656433726216, "text": "Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", - "text-hash": 8372141692634509619, + "text_hash": 8372141692634509619, "type": "page-footer" }, { @@ -77058,7 +77058,7 @@ "sref": "#/page-footers/1", "subj_hash": 10244115652970867690, "text": "wileyonlinelibrary.com/journal/ail2 1of15", - "text-hash": 6196517219334265105, + "text_hash": 6196517219334265105, "type": "page-footer" } ], @@ -77074,7 +77074,7 @@ "sref": "#/page-headers/0", "subj_hash": 1841431076736563689, "text": "Received: 15 September 2020", - "text-hash": 16688788223092401940, + "text_hash": 16688788223092401940, "type": "page-header" }, { @@ -77088,7 +77088,7 @@ "sref": "#/page-headers/1", "subj_hash": 3915126318503464014, "text": "Revised: 23 November 2020", - "text-hash": 1000711515083668085, + "text_hash": 1000711515083668085, "type": "page-header" }, { @@ -77102,7 +77102,7 @@ "sref": "#/page-headers/2", "subj_hash": 1727876228376027809, "text": "Accepted: 25 November 2020", - "text-hash": 17099649843681009628, + "text_hash": 17099649843681009628, "type": "page-header" }, { @@ -77116,7 +77116,7 @@ "sref": "#/page-headers/3", "subj_hash": 4558221577189246496, "text": "DOI: 10.1002/ail2.20", - "text-hash": 348625343742526555, + "text_hash": 348625343742526555, "type": "page-header" }, { @@ -77130,7 +77130,7 @@ "sref": "#/page-headers/4", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" }, { @@ -77144,7 +77144,7 @@ "sref": "#/page-headers/5", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" }, { @@ -77158,7 +77158,7 @@ "sref": "#/page-headers/6", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" }, { @@ -77172,7 +77172,7 @@ "sref": "#/page-headers/7", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" }, { @@ -77186,7 +77186,7 @@ "sref": "#/page-headers/8", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" }, { @@ -77200,7 +77200,7 @@ "sref": "#/page-headers/9", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" }, { @@ -77214,7 +77214,7 @@ "sref": "#/page-headers/10", "subj_hash": 4361549266732238272, "text": "8of15", - "text-hash": 329104147727696635, + "text_hash": 329104147727696635, "type": "page-header" }, { @@ -77228,7 +77228,7 @@ "sref": "#/page-headers/11", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" }, { @@ -77242,7 +77242,7 @@ "sref": "#/page-headers/12", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" }, { @@ -77256,7 +77256,7 @@ "sref": "#/page-headers/13", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" }, { @@ -77270,7 +77270,7 @@ "sref": "#/page-headers/14", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" }, { @@ -77284,7 +77284,7 @@ "sref": "#/page-headers/15", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" }, { @@ -77298,7 +77298,7 @@ "sref": "#/page-headers/16", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" }, { @@ -77312,7 +77312,7 @@ "sref": "#/page-headers/17", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" }, { @@ -77326,7 +77326,7 @@ "sref": "#/page-headers/18", "subj_hash": 8492015887072434396, "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, + "text_hash": 14658966106383255015, "type": "page-header" } ], @@ -80424,7 +80424,7 @@ "sref": "#/tables/0/captions/0", "subj_hash": 8669048055071941045, "text": "TABLE 1 Top-k accuracies validation of KG query results. Numbers represent the fraction in which any of the k highest ranked answers matches the expected answer", - "text-hash": 14400864471075544784, + "text_hash": 14400864471075544784, "type": "caption" } ], @@ -82127,7 +82127,7 @@ "sref": "#/texts/0", "subj_hash": 2144509362215609527, "text": "LETTER", - "text-hash": 16381206540184854990, + "text_hash": 16381206540184854990, "type": "subtitle-level-1" }, { @@ -82141,7 +82141,7 @@ "sref": "#/texts/1", "subj_hash": 16672720454366774824, "text": "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", - "text-hash": 4375081646508065875, + "text_hash": 4375081646508065875, "type": "subtitle-level-1" }, { @@ -82155,7 +82155,7 @@ "sref": "#/texts/2", "subj_hash": 16781763356419781679, "text": "Peter W. J. Staar", - "text-hash": 4049808513512976982, + "text_hash": 4049808513512976982, "type": "subtitle-level-1" }, { @@ -82169,7 +82169,7 @@ "sref": "#/texts/3", "subj_hash": 3352447812305581329, "text": "|", - "text-hash": 17767354399704232748, + "text_hash": 17767354399704232748, "type": "paragraph" }, { @@ -82183,7 +82183,7 @@ "sref": "#/texts/4", "subj_hash": 14877831450145300436, "text": "Michele Dolfi", - "text-hash": 1571808557594152175, + "text_hash": 1571808557594152175, "type": "subtitle-level-1" }, { @@ -82197,7 +82197,7 @@ "sref": "#/texts/5", "subj_hash": 3352447812305581329, "text": "|", - "text-hash": 17767354399704232748, + "text_hash": 17767354399704232748, "type": "paragraph" }, { @@ -82211,7 +82211,7 @@ "sref": "#/texts/6", "subj_hash": 13336841394978214677, "text": "Christoph Auer", - "text-hash": 9737597816447750448, + "text_hash": 9737597816447750448, "type": "paragraph" }, { @@ -82225,7 +82225,7 @@ "sref": "#/texts/7", "subj_hash": 15325526562897377208, "text": "IBM Research, Rueschlikon, Switzerland", - "text-hash": 3204757815416943811, + "text_hash": 3204757815416943811, "type": "paragraph" }, { @@ -82239,7 +82239,7 @@ "sref": "#/texts/8", "subj_hash": 4017434568255781081, "text": "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland. Email: taa@zurich.ibm.com", - "text-hash": 961470147553945060, + "text_hash": 961470147553945060, "type": "paragraph" }, { @@ -82253,7 +82253,7 @@ "sref": "#/texts/9", "subj_hash": 8487024695951375934, "text": "Abstract", - "text-hash": 14650447666970618949, + "text_hash": 14650447666970618949, "type": "subtitle-level-1" }, { @@ -82267,7 +82267,7 @@ "sref": "#/texts/10", "subj_hash": 11695737263227886476, "text": "Knowledge Graphs have been fast emerging as the de facto standard to model and explore knowledge in weakly structured data. Large corpora of documents constitute a source of weakly structured data of particular interest for both the academic and business world. Key examples include scientific publications, technical reports, manuals, patents, regulations, etc. Such corpora embed many facts that are elementary to critical decision making or enabling new discoveries. In this paper, we present a scalable cloud platform to create and serve Knowledge Graphs, which we named corpus processing service (CPS). Its purpose is to process large document corpora, extract the content and embedded facts, and ultimately represent these in a consistent knowledge graph that can be intuitively queried. To accomplish this, we use state-of-the-art natural language understanding models to extract entities and relationships from documents converted with our previously presented corpus conversion service platform. This pipeline is complemented with a newly developed graph engine which ensures extremely performant graph queries and provides powerful graph analytics capabilities. Both components are tightly integrated and can be easily consumed through REST APIs. Additionally, we provide user interfaces to control the data ingestion flow and formulate queries using a visual programming approach. The CPS platform is designed as a modular microservice system operating on Kubernetes clusters. Finally, we validate the quality of queries on our endto-end knowledge pipeline in a real-world application in the oil and gas industry.", - "text-hash": 9356514212507371703, + "text_hash": 9356514212507371703, "type": "paragraph" }, { @@ -82281,7 +82281,7 @@ "sref": "#/texts/11", "subj_hash": 8500733160758672230, "text": "KEYWORDS", - "text-hash": 14650267244735310237, + "text_hash": 14650267244735310237, "type": "subtitle-level-1" }, { @@ -82295,7 +82295,7 @@ "sref": "#/texts/12", "subj_hash": 4452030907228745864, "text": "document processing, knowledge graph, semantic search", - "text-hash": 243147861724212659, + "text_hash": 243147861724212659, "type": "paragraph" }, { @@ -82309,7 +82309,7 @@ "sref": "#/texts/13", "subj_hash": 11913688961435238004, "text": "1 | INTRODUCTION", - "text-hash": 8854903187485535375, + "text_hash": 8854903187485535375, "type": "subtitle-level-1" }, { @@ -82323,7 +82323,7 @@ "sref": "#/texts/14", "subj_hash": 9977041563469582014, "text": "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally. It is self-evident that this number has increased ever since. The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world. The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings. Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable.", - "text-hash": 6468010182398147525, + "text_hash": 6468010182398147525, "type": "paragraph" }, { @@ -82337,7 +82337,7 @@ "sref": "#/texts/15", "subj_hash": 4361549266817300114, "text": "2of15", - "text-hash": 329104147827159977, + "text_hash": 329104147827159977, "type": "paragraph" }, { @@ -82351,7 +82351,7 @@ "sref": "#/texts/16", "subj_hash": 8425126282903547933, "text": "In a previous publication, we presented the corpus conversion service (CCS). 1 The CCS is a scalable cloud service, which leverages state-of-the-art machine learning to convert complex formats (eg, PDF, Word, and Bitmap) into a richly structured JSON representation of their content. As such, the CCS solves the first problem when confronted with a large corpus of documents, that is, make the content of the documents programmatically accessible. Examples of the latter would be ' List all images with their caption from the corpus or list all titles with their publication date. ' The second problem is to obviously search or explore the content of the documents in a large corpus. For this problem, we have developed the corpus processing service (CPS), which we present in this paper. The CPS is intended to create knowledge bases (KBs) from the converted JSON corpus and serve these KBs through in-memory knowledge graph stores. As such, the CPS is the natural extension of the CCS and has as an express purpose to make corpora of documents available for deep data exploration.", - "text-hash": 14716796829201051176, + "text_hash": 14716796829201051176, "type": "paragraph" }, { @@ -82365,7 +82365,7 @@ "sref": "#/texts/17", "subj_hash": 16507313240019459642, "text": "The purpose of CPS is to enable deep data exploration directly on large corpora. Here, we define deep data exploration as the capability to ingest large corpora of documents into a scalable service and detect, extract and combine facts contained in these corpora in order to make new discoveries or support critical decision making. It is key to understand that our goal of creating and querying Knowledge Graphs to enable deep data exploration goes beyond search in the spirit of rank and retrieve. Although search is by no means trivial, many state-of-the art solutions exist for this purpose. * We argue, however, that one needs query capabilities which allow for a combination of extracted facts and a fast, onthe-fly creation of new datasets to enable actual deep data exploration. Those datasets can then be used for further anal-", - "text-hash": 4261190952114998337, + "text_hash": 4261190952114998337, "type": "paragraph" }, { @@ -82379,7 +82379,7 @@ "sref": "#/texts/18", "subj_hash": 7900229969942228522, "text": "ysis, which might lead to new discoveries or support decision making.", - "text-hash": 12931323242585971793, + "text_hash": 12931323242585971793, "type": "paragraph" }, { @@ -82393,7 +82393,7 @@ "sref": "#/texts/19", "subj_hash": 10081303962589804251, "text": "To better distinguish this approach from conventional search, let us consider some example questions:", - "text-hash": 6426882630003520482, + "text_hash": 6426882630003520482, "type": "paragraph" }, { @@ -82407,7 +82407,7 @@ "sref": "#/texts/20", "subj_hash": 12186698460099365002, "text": "a. Definition of high temperature superconductor.", - "text-hash": 8586326920090596785, + "text_hash": 8586326920090596785, "type": "paragraph" }, { @@ -82421,7 +82421,7 @@ "sref": "#/texts/21", "subj_hash": 14190244699299580163, "text": "b. Publications of before year 2010.", - "text-hash": 2034196463390881594, + "text_hash": 2034196463390881594, "type": "paragraph" }, { @@ -82435,7 +82435,7 @@ "sref": "#/texts/22", "subj_hash": 1376279050886549305, "text": "c. Maps of the Permian basin.", - "text-hash": 17379120122282474820, + "text_hash": 17379120122282474820, "type": "paragraph" }, { @@ -82449,7 +82449,7 @@ "sref": "#/texts/23", "subj_hash": 10155628801693924200, "text": "d. Geological formations from the Miocene age with their depth, thickness, geographic location, and composition.", - "text-hash": 6073268612165724563, + "text_hash": 6073268612165724563, "type": "paragraph" }, { @@ -82463,7 +82463,7 @@ "sref": "#/texts/24", "subj_hash": 9107499507097280105, "text": "e. List all high-Tc superconductors with their known crystallographic and material properties?", - "text-hash": 14246074989165808788, + "text_hash": 14246074989165808788, "type": "paragraph" }, { @@ -82477,7 +82477,7 @@ "sref": "#/texts/25", "subj_hash": 7248467870339433322, "text": "Question (a) undoubtedly fits the classic search paradigm, since here one can expect a search engine to find a number sources with exact answers (ie, definitions). Likewise, question (b) can be easily answered through metadata based filter rules on a literature database. Question (c) already requires some extent of domain knowledge to be encoded in a model to accurately classify the relevance of all known maps to the query, at least assuming no manual curation effort has been done. Questions (d) and (e) ultimately impose query capabilities which are clearly infeasible to support through manual curation, and are very unlikely to be answered in any single data source. These questions require the system to return a more complex data structure (eg, a table in which the rows list the formations or materials while the columns contain their respective properties).", - "text-hash": 13592184899010298257, + "text_hash": 13592184899010298257, "type": "paragraph" }, { @@ -82491,7 +82491,7 @@ "sref": "#/texts/26", "subj_hash": 13346892078888080449, "text": "Concluding from the above examples, we define the following qualifying criteria for a system that supports deep data exploration on corpora:", - "text-hash": 9732050976592056956, + "text_hash": 9732050976592056956, "type": "paragraph" }, { @@ -82505,7 +82505,7 @@ "sref": "#/texts/27", "subj_hash": 1118972765223422660, "text": "1. It can answer queries by combining different data elements from different sources into a new data structure.", - "text-hash": 15389200666968750079, + "text_hash": 15389200666968750079, "type": "paragraph" }, { @@ -82519,7 +82519,7 @@ "sref": "#/texts/28", "subj_hash": 324023167304456371, "text": "2. It supports (1) by creating a knowledge model from a controlled, unstructured corpus in a mostly unsupervised way. It may profit from, but not require any manually curated data.", - "text-hash": 15837385157674255818, + "text_hash": 15837385157674255818, "type": "paragraph" }, { @@ -82533,7 +82533,7 @@ "sref": "#/texts/29", "subj_hash": 4651508276868765576, "text": "3. It may restrict supported queries to a specific domain (eg, a technical field).", - "text-hash": 11572955042484278451, + "text_hash": 11572955042484278451, "type": "paragraph" }, { @@ -82547,7 +82547,7 @@ "sref": "#/texts/30", "subj_hash": 3052020526349962744, "text": "To meet the objectives defined earlier, CPS implements and tightly integrates two essential components. The first component is a scalable Knowledge Graph creation pipeline, which is used to automatically process text, tables and images through state-of-the-art segmentation and natural language understanding (NLU) models and extract entities and relationships from them. The second component serves the created KG, enabling users to perform deep queries and advanced graph analytics in real time. 2 This is supported through an underlying, highly optimized graph engine we developed to specifically address requirements for deep data exploration.", - "text-hash": 18009286910191614723, + "text_hash": 18009286910191614723, "type": "paragraph" }, { @@ -82561,7 +82561,7 @@ "sref": "#/texts/31", "subj_hash": 6725501529910185390, "text": "It is worth noting that the CPS platform is a fully functioning cloud application that has been successfully deployed in multiple real-world scenarios in material science 3 and oil and gas industries. 4", - "text-hash": 11737175762912836309, + "text_hash": 11737175762912836309, "type": "paragraph" }, { @@ -82575,7 +82575,7 @@ "sref": "#/texts/32", "subj_hash": 14814111183601762276, "text": "In the remainder of this paper, we discuss in detail the technical aspects and implementation details of the two main components of the CPS. In section 2, we present in depth how the platform extracts facts from corpora at a massive scale. In section 3, we go into detail of designing deep queries and show how we compute them in a very efficient", - "text-hash": 1414786465877142815, + "text_hash": 1414786465877142815, "type": "paragraph" }, { @@ -82589,7 +82589,7 @@ "sref": "#/texts/33", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { @@ -82603,7 +82603,7 @@ "sref": "#/texts/34", "subj_hash": 4361549266681704196, "text": "3of15", - "text-hash": 329104147711745343, + "text_hash": 329104147711745343, "type": "paragraph" }, { @@ -82617,7 +82617,7 @@ "sref": "#/texts/35", "subj_hash": 8043608144162608258, "text": "way with our high-performance graph engine. Later, in section 4, we will discuss in detail how both components are deployed and interacting on the cloud. Finally, in section 5, we present the complete system in a real world case study and benchmark its accuracy.", - "text-hash": 13076251584287625657, + "text_hash": 13076251584287625657, "type": "paragraph" }, { @@ -82631,7 +82631,7 @@ "sref": "#/texts/36", "subj_hash": 7159467829896778939, "text": "2 | SCALABLE KNOWLEDGE GRAPH CREATION", - "text-hash": 13901790948575121858, + "text_hash": 13901790948575121858, "type": "subtitle-level-1" }, { @@ -82645,7 +82645,7 @@ "sref": "#/texts/37", "subj_hash": 5617240156952377, "text": "In CPS, a Knowledge Graph is defined as a collection of entities and their relationships forming the graphs nodes and edges. Entities can have a wide variety of types. A basic scenario includes types such as documents, document components, keywords, and authors. In addition, there can be more specific types tied to domain verticals, such as materials and properties in material science, or geological ages, formations, rocks, minerals, structures, etc., for oil and gas exploration. Relationships in the KG are strictly defined between the entities. Similar to the entities, the relationships are typed (' has-material-property ' or ' has-geological-age '). Also, relationships in the KG can be weighted, for example, to represent the trustworthiness of a fact that the relationship represents.", - "text-hash": 16151270992855323972, + "text_hash": 16151270992855323972, "type": "paragraph" }, { @@ -82659,7 +82659,7 @@ "sref": "#/texts/38", "subj_hash": 3276490574487379366, "text": "In typical cases, we start from a collection of documents in different formats. Sometimes, documents are available in semistructured, machine-interpretable formatssuchasJSON,XML,orHTML.However,inthevastmajority of cases this does not apply, especially for proprietary documents of companies and organizations. The latter are very often scanned or programmatic PDF documents. Using the CCS, 1 these types of documents are converted into structured JSON files. Those provide easy access to the meta-data (eg, title, abstract, references, authors) and the document body. The latter is structured by subtitles (of various levels), paragraphs, lists, tables (with internal row and column structures), figures, and linked captions. O n c et h ec o r p u si sp r e s n ti nas t r u c t u r e d,m a c h i n e processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely extraction, annotation,and aggregation. The inherent dependencies between these three tasks are defined through a directed acyclic graph (DAG). We willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss the details for each DF task.", - "text-hash": 17496609193730656989, + "text_hash": 17496609193730656989, "type": "paragraph" }, { @@ -82673,7 +82673,7 @@ "sref": "#/texts/39", "subj_hash": 3367451956962330174, "text": "2.1 | DF tasks", - "text-hash": 17765848133863277637, + "text_hash": 17765848133863277637, "type": "subtitle-level-1" }, { @@ -82687,7 +82687,7 @@ "sref": "#/texts/40", "subj_hash": 5509744459704235873, "text": "In Figure 1, we sketch a minimal DF, in which each of the three tasks is used consecutively in order to generate entities and relationships for a generic KG. We will use Figure1toillustratethepurposeandimplementationof each DF task.", - "text-hash": 10647094536020604316, + "text_hash": 10647094536020604316, "type": "paragraph" }, { @@ -82701,7 +82701,7 @@ "sref": "#/texts/41", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { @@ -82715,7 +82715,7 @@ "sref": "#/texts/42", "subj_hash": 4361549176688508574, "text": "4of15", - "text-hash": 329104066308221861, + "text_hash": 329104066308221861, "type": "paragraph" }, { @@ -82729,7 +82729,7 @@ "sref": "#/texts/43", "subj_hash": 12374482891052873875, "text": "2.1.1 | Extraction", - "text-hash": 8758905122433574314, + "text_hash": 8758905122433574314, "type": "subtitle-level-1" }, { @@ -82743,7 +82743,7 @@ "sref": "#/texts/44", "subj_hash": 2755397864153233778, "text": "In an extraction task, we generate new data entities (eg, document components) from an original set of source entities (eg, documents). During this process, new links are created which connect these newly generated data entities to their original source entity. Typical examples of such extraction tasks are the extraction of abstracts, paragraphs, tables, or figures from the structured document files.", - "text-hash": 18305914688852125577, + "text_hash": 18305914688852125577, "type": "paragraph" }, { @@ -82757,7 +82757,7 @@ "sref": "#/texts/45", "subj_hash": 4698316471746130896, "text": "From a scalability point of view, this task is embarrassingly parallel, which makes it extremely easy to implement on loosely interconnected environments such as a cloud. We simply iterate in parallel over all source entities in the backend database, extract the desired components and then insert those components as new data entities back into the database. Extraction tasks have no internal synchronization points.", - "text-hash": 11458501594938683627, + "text_hash": 11458501594938683627, "type": "paragraph" }, { @@ -82771,7 +82771,7 @@ "sref": "#/texts/46", "subj_hash": 11827267218358801841, "text": "One particular benefit of this task is to make the query capability on the Knowledge Graph more fine grained by being able to provide provenance information on the result. For example, this would let the user explore all the paragraphs, tables, or figures that embed a certain fact.", - "text-hash": 8932299863639200460, + "text_hash": 8932299863639200460, "type": "paragraph" }, { @@ -82785,7 +82785,7 @@ "sref": "#/texts/47", "subj_hash": 6297710299044869343, "text": "2.1.2 | Annotation", - "text-hash": 12444247655523627494, + "text_hash": 12444247655523627494, "type": "subtitle-level-1" }, { @@ -82799,7 +82799,7 @@ "sref": "#/texts/48", "subj_hash": 7158837349769150986, "text": "In the annotation task, we apply NLU methods to detect language entities and their relationships within a single data entity. Here, data entities can be as simple as a snippet of text (eg, a paragraph) or more complex structures such as tables or figures. The main goal of the annotation task is to obtain all relevant information from the data entity with regard to the domain of the corpus. Since different technical fields require different annotations, our annotation task is modular, allowing language entities to be annotated for material science, oil and gas, or more basic entities (eg, noun phrases, abbreviations, unit and values, etc.).", - "text-hash": 13902418307602972721, + "text_hash": 13902418307602972721, "type": "paragraph" }, { @@ -82813,7 +82813,7 @@ "sref": "#/texts/49", "subj_hash": 1150871476689677866, "text": "From a technical perspective, the language entities are detected and annotated using multiple NLU methods, ranging from complex regular expressions \u2020 to LSTM networks. 5,6 We employ state-of-the-art NLU toolkits such as Spacy 7 or NLTK \u2021 to train and apply custom named entity recognition models. A detailed investigation of these NLU annotators unfortunately goes beyond of the scope of this paper. However, in Figure 2, we show the different types of named (geological) entities found in a paragraph by our oil and gas annotation model.", - "text-hash": 15370812655802342481, + "text_hash": 15370812655802342481, "type": "paragraph" }, { @@ -82827,7 +82827,7 @@ "sref": "#/texts/50", "subj_hash": 5163702913945903725, "text": "In Listing 1, we also show an excerpt of how the annotations (both language entities and relationships) are stored in the backend. It is noteworthy here that relationships are stored as (weighted) links between two entity references. \u00a7 The usage of references reduces data duplication and more importantly ensures that the relationships are always defined between two known entities in the KG. The latter simplifies the aggregation of the relationships significantly, since no new entities need to be created in the KG in order to aggregate the relationships (see section 2.1.4).", - "text-hash": 11348986383696847000, + "text_hash": 11348986383696847000, "type": "paragraph" }, { @@ -82841,7 +82841,7 @@ "sref": "#/texts/51", "subj_hash": 5462319091745771382, "text": "FIGURE 2 Illustration of various detected language entities in a particularly rich snippet of an AAPG abstract. 8 The language entities here are all related to geological concepts in the domain of oil and gas exploration", - "text-hash": 11050304000116997517, + "text_hash": 11050304000116997517, "type": "paragraph" }, { @@ -82855,7 +82855,7 @@ "sref": "#/texts/52", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { @@ -82869,7 +82869,7 @@ "sref": "#/texts/53", "subj_hash": 958124839653591304, "text": "LISTING 1 Excerpt of the annotated abstract from an AAPG paper 8 with its original text and the detected entities and relationships. Note that relationships are typed (encoded in the field name) and weighted. The weight reflects the confidence of the language annotation model during extraction. Relationships are always defined on detected entities, and will therefore use references defining a link between two entities", - "text-hash": 15194258930241746739, + "text_hash": 15194258930241746739, "type": "paragraph" }, { @@ -82883,7 +82883,7 @@ "sref": "#/texts/54", "subj_hash": 1448405324616602032, "text": "From a scaling perspective, this task is again embarrassingly parallel. Unlike the extraction task, the annotation task is not creating new data entities, but rather appending new data associated with an existing data entity. We simply apply the desired entity and relationship annotators on all document components (paragraphs, tables, etc.) in parallel by distributing the operations on all available compute resources. Annotation tasks have no internal synchronization points. From a corpus of about 100 000 documents, we typically extract about 3 million paragraphs. Assuming unlimited resources, the annotation task could be distributed to potentially 3 million independent workers.", - "text-hash": 17018759417884348107, + "text_hash": 17018759417884348107, "type": "paragraph" }, { @@ -82897,7 +82897,7 @@ "sref": "#/texts/55", "subj_hash": 2617775076168299948, "text": "2.1.3 | Aggregation of entities", - "text-hash": 18150799209915986647, + "text_hash": 18150799209915986647, "type": "subtitle-level-1" }, { @@ -82911,7 +82911,7 @@ "sref": "#/texts/56", "subj_hash": 13974986056043304735, "text": "The aggregation task for entities is similar to an extraction task, in the sense that we create new entities and link them each to the source they were mentioned in. In addition to extraction, the entity aggregation task also applies a similarity metric \u00b6 between the entities during extraction. This similarity metric will define if two entities refer to the same language concept and thus need to be represented by a single entity in the KG, rather than remaining separated. In Figure 1, we have illustrated the aggregation task for two types of entities across many different document components. These entity types could be for example materials and properties or geological formations and geological ages. The links connecting the new entities to their source entity are weighted according to the frequency of the match, that is, we set a higher weight if the language entity has been found multiple times. From an implementation point of view, the aggregation task for entities is nontrivial. In distributed computing, it corresponds to a reduction operation. Our implementation distributes the iteration of the source elements among all available computational resources. The aggregation is first performed in a local buffer, which is then synchronized with the backend database only when it reaches a maximum size. The synchronization step is a simple atomic update into an existing (or a newly created) database object. The synchronization for updates from each worker task does not collide with the others.", - "text-hash": 2253911354578933030, + "text_hash": 2253911354578933030, "type": "paragraph" }, { @@ -82925,7 +82925,7 @@ "sref": "#/texts/57", "subj_hash": 5985285694705576020, "text": "2.1.4 | Aggregation of relationships", - "text-hash": 12765605759878485615, + "text_hash": 12765605759878485615, "type": "subtitle-level-1" }, { @@ -82939,7 +82939,7 @@ "sref": "#/texts/58", "subj_hash": 11235296141350659290, "text": "The aggregation of relationships introduces new links between the entities that were aggregated in the previous aggregation operation. In Figure 1, this task is depicted as the last operation, where entities with an annotated relationship are explicitly linked together. For example, we create an edge between the Egret-Hibernia Petroleum System and Jeanne D'Arc Basin from Listing 1.", - "text-hash": 7583169921155047905, + "text_hash": 7583169921155047905, "type": "paragraph" }, { @@ -82953,7 +82953,7 @@ "sref": "#/texts/59", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { @@ -82967,7 +82967,7 @@ "sref": "#/texts/60", "subj_hash": 4361549266576336732, "text": "6of15", - "text-hash": 329104147615819111, + "text_hash": 329104147615819111, "type": "paragraph" }, { @@ -82981,7 +82981,7 @@ "sref": "#/texts/61", "subj_hash": 5771309285006424458, "text": "Similar to the aggregation of entities, the aggregation task for relationships is a reduction operation. Two independent document components could describe the same relationship between two entities. To minimize the synchronization lookup operation with the backend database, this task also utilizes a local buffer which accumulates the changes to be committed to the KG until the maximum size is reached. This approach allows to distribute the computation among all the source document components and performs very few blocking operations in the backend database.", - "text-hash": 12691372718925440689, + "text_hash": 12691372718925440689, "type": "paragraph" }, { @@ -82995,7 +82995,7 @@ "sref": "#/texts/62", "subj_hash": 5371685212527510397, "text": "2.2 | Data flows", - "text-hash": 11140938221338345864, + "text_hash": 11140938221338345864, "type": "subtitle-level-1" }, { @@ -83009,7 +83009,7 @@ "sref": "#/texts/63", "subj_hash": 7817257645383866853, "text": "The purpose of a DF is to provide an execution plan for the task types detailed above in a meaningful order to generate or update a specific KG. When instantiating a DF, one has the possibility to define in a declarative way:", - "text-hash": 12955841367339550496, + "text_hash": 12955841367339550496, "type": "paragraph" }, { @@ -83023,7 +83023,7 @@ "sref": "#/texts/64", "subj_hash": 2929626768872004841, "text": "1. Which document components should be extracted from a converted corpus to form source entities (eg, extract all paragraphs, tables, figures and captions from the AAPG articles)?", - "text-hash": 17906500337671162388, + "text_hash": 17906500337671162388, "type": "paragraph" }, { @@ -83037,7 +83037,7 @@ "sref": "#/texts/65", "subj_hash": 15879756297712818143, "text": "2. Which annotator model(s) to use on which type of source entity (eg, run the geology or material science annotators on paragraphs)?", - "text-hash": 2573988876245521638, + "text_hash": 2573988876245521638, "type": "paragraph" }, { @@ -83051,7 +83051,7 @@ "sref": "#/texts/66", "subj_hash": 16116531546352845311, "text": "3. Which entity and relationship aggregations to perform on which set of annotated language entities?", - "text-hash": 2702000589258555142, + "text_hash": 2702000589258555142, "type": "paragraph" }, { @@ -83065,7 +83065,7 @@ "sref": "#/texts/67", "subj_hash": 9541434157786316356, "text": "The DFs can thus be seen as blueprints for processing the corpus into a defined graph topology. Notably, our implementation of DFs and their tasks retains the flexibility of processing not only source documents of a well-known data schema such as from CCS, but virtually any structure that can be transformed to a JSON representation, including data entities from precurated databases. We designed the CPS platform to support export and import of DFs on entirely new datasets without the burden of recreating it from scratch.", - "text-hash": 6610972392363355263, + "text_hash": 6610972392363355263, "type": "paragraph" }, { @@ -83079,7 +83079,7 @@ "sref": "#/texts/68", "subj_hash": 997682002692959482, "text": "Our backend engine can exploit the DAG defined through the DF to massively distribute the individual tasks on all compute resources, because independent branches of the DAG each containing a chain of tasks can execute in parallel. The achievable level of parallelism changes throughout the execution. A practical example is a DF which extracts paragraphs and abstracts from all documents in the corpus, then annotates them and finally aggregates all entities. Here, the extraction tasks are distributed only over all documents; then, in the annotation tasks, we increase the parallelism to all document components. Any synchronization points thus can be pushed back into the aggregation tasks.", - "text-hash": 15235788623540001281, + "text_hash": 15235788623540001281, "type": "paragraph" }, { @@ -83093,7 +83093,7 @@ "sref": "#/texts/69", "subj_hash": 11590138063543342276, "text": "3 | DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS", - "text-hash": 9254996552431571455, + "text_hash": 9254996552431571455, "type": "subtitle-level-1" }, { @@ -83107,7 +83107,7 @@ "sref": "#/texts/70", "subj_hash": 16380310806374538602, "text": "We will now look into the requirements to perform deep data exploration on a populated Knowledge Graph. A deep data exploration requires two fundamental capabilities:", - "text-hash": 4676441280076073873, + "text_hash": 4676441280076073873, "type": "paragraph" }, { @@ -83121,7 +83121,7 @@ "sref": "#/texts/71", "subj_hash": 5393976293631695754, "text": "1. perform deep queries on the graph, that is, queries that require multi-hop traversals and", - "text-hash": 11127633169729292465, + "text_hash": 11127633169729292465, "type": "paragraph" }, { @@ -83135,7 +83135,7 @@ "sref": "#/texts/72", "subj_hash": 1988335831916069382, "text": "2. perform graph analytics on the full graph or subsets of it on-the-fly.", - "text-hash": 16834701212347777085, + "text_hash": 16834701212347777085, "type": "paragraph" }, { @@ -83149,7 +83149,7 @@ "sref": "#/texts/73", "subj_hash": 5147764798816678886, "text": "Deep queries are essential to dynamically combine independent facts together in the given query context. This would apply for example to explorational queries aimed to characterize petroleum system elements, as detailed in our case study (see section 5). Graph analytics can further reveal hidden structure in the KG topology. Examples of advanced graphanalytical operations are page rank, node centralities, 9,10 node clustering, spectral analysis, and label propagation.", - "text-hash": 11297301064675504413, + "text_hash": 11297301064675504413, "type": "paragraph" }, { @@ -83163,7 +83163,7 @@ "sref": "#/texts/74", "subj_hash": 285583876932865368, "text": "Both deep queries and graph analytics have in common that they are inherently expensive to compute on conventional graph databases, due to a rapid expansion of the number of visited nodes as a function of the graph-traversal depth. This is a major obstacle in providing reasonable time-to-solution in the aforementioned cases. Virtually all established graph database products on the market today ** fall victim to this, as was also reported in multiple sources. 11,12 Due to the poor performance we observed with available graph databases, we developed a new graph engine for the CPS platform. This graph engine is able to execute advanced graph-analytics 2 as well as evaluate deep queries with multi-hop traversals on large graphs (>1B edges) extremely fast.", - "text-hash": 16231538415772072803, + "text_hash": 16231538415772072803, "type": "paragraph" }, { @@ -83177,7 +83177,7 @@ "sref": "#/texts/75", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { @@ -83191,7 +83191,7 @@ "sref": "#/texts/76", "subj_hash": 4361549257370278754, "text": "7of15", - "text-hash": 329104161989101977, + "text_hash": 329104161989101977, "type": "paragraph" }, { @@ -83205,7 +83205,7 @@ "sref": "#/texts/77", "subj_hash": 13183039880198077038, "text": "In the remaining part of this section, we elaborate on our newly developed graph engine. In section 3.1, we discuss the implementation design. In section 3.2, we discuss performance results and compare it to Neo4J. Later, in section 3.3, we will explain how the deep queries are formulated and evaluated in the graph engine.", - "text-hash": 10251595290936699029, + "text_hash": 10251595290936699029, "type": "paragraph" }, { @@ -83219,7 +83219,7 @@ "sref": "#/texts/78", "subj_hash": 13428900458866068249, "text": "3.1 | Design of the graph engine", - "text-hash": 9938197928077211940, + "text_hash": 9938197928077211940, "type": "subtitle-level-1" }, { @@ -83233,7 +83233,7 @@ "sref": "#/texts/79", "subj_hash": 1430911655724119030, "text": "In computer science, two prevalent implementation schemes for graphs have emerged, one using adjacency lists and one relying on adjacency matrices. 13,14 In the adjacency list format, every node is essentially an object which contains a set of indices representing its neighbors. \u2020\u2020 The edges are therefore stored as a property of the node. In the adjacency matrix approach, all nodes obtain an identifier (typically an unsigned integer) and the edges are stored as a list of nodeidentifier tuples.", - "text-hash": 17396562708416737549, + "text_hash": 17396562708416737549, "type": "paragraph" }, { @@ -83247,7 +83247,7 @@ "sref": "#/texts/80", "subj_hash": 13770706479324480755, "text": "It is commonly known that most graph operations can be translated into matrix-operations using linear algebra. 13 For example, consider the graph-traversal V ! A W, in which we start from a set of nodes V and traverse the edge A in order to obtain a new set of nodes W. This can be directly translated into linear algebra as", - "text-hash": 9596444718520353290, + "text_hash": 9596444718520353290, "type": "paragraph" }, { @@ -83261,7 +83261,7 @@ "sref": "#/texts/81", "subj_hash": 11165481757050847950, "text": "w $^{!}$= Av ! with v $^{!}$$_{i}$= 1 if node i \\b V 0 if node i = 2 V , GLYPH \u00f0 1 \u00de", - "text-hash": 7657471412122468341, + "text_hash": 7657471412122468341, "type": "equation" }, { @@ -83275,7 +83275,7 @@ "sref": "#/texts/82", "subj_hash": 9572077971492738329, "text": "and with A being the adjacency matrix representation of the edge A. Translating single graph-traversals into linear algebra operations significantly simplifies the job of deeper graph traversals. For example, to obtain the k-order neighborhood of node set V, one simply needs to evaluate Equation (1) k times recursively, as in", - "text-hash": 6656818579934057252, + "text_hash": 6656818579934057252, "type": "paragraph" }, { @@ -83289,7 +83289,7 @@ "sref": "#/texts/83", "subj_hash": 14951391138799557075, "text": "w $^{!}$= A$^{k}$v $^{!}$= AA \u2026 Av ! GLYPHGLYPH GLYPH GLYPH GLYPH GLYPH : \u00f0 2 \u00de", - "text-hash": 1498163960925914858, + "text_hash": 1498163960925914858, "type": "equation" }, { @@ -83303,7 +83303,7 @@ "sref": "#/texts/84", "subj_hash": 16602156009514813718, "text": "Therefore, deep queries can be implemented efficiently as long as Equation (1) can be evaluated efficiently. Over the past decades, lots of research has been conducted in the High Performance Computing community on the acceleration and parallelization of Equation (1) in the context of graphs. In this context, the matrix A is sparse and the linear operation of Equation (1) is referred to as a sparse matrix vector multiplication (SpMV), for which highly optimized implementations have been developed. 15,16 Notably, most advanced graph-analytical operations can be formulated using SpMV operations. The most trivial case is page-rank, in which one recursively executes Equation (1) in combination with a renormalization until w ! is equal to v $^{!}$. In our previous work, 2 we have also shown in detail that advanced graph-analytical operations such as node centralities and spectral analysis of the graph can be done effectively with only SpMV operations.", - "text-hash": 4445641728881669933, + "text_hash": 4445641728881669933, "type": "paragraph" }, { @@ -83317,7 +83317,7 @@ "sref": "#/texts/85", "subj_hash": 7162849562576593449, "text": "Since both deep queries and advanced graph analytics hugely benefit from a fast SpMV kernel, we have opted to design the graph engine in the CPS platform to work entirely with the adjacency matrix format.", - "text-hash": 13884895358995816532, + "text_hash": 13884895358995816532, "type": "paragraph" }, { @@ -83331,7 +83331,7 @@ "sref": "#/texts/86", "subj_hash": 15385417954505503552, "text": "3.2 | Memory architecture and performance optimization", - "text-hash": 3140380205981200763, + "text_hash": 3140380205981200763, "type": "subtitle-level-1" }, { @@ -83345,7 +83345,7 @@ "sref": "#/texts/87", "subj_hash": 10815650641518265876, "text": "Both adjacency lists and adjacency matrices-based graph implementations have specific advantages and disadvantages. The adjacency list format is very well suited for node-centric operations since it exploits data-locality for local graph operations, such as first order traversals. However, it proves suboptimal for global scale graph operations, which are required for deep queries and the advanced graph analytics. Here, one typically has to perform graph-traversals starting from many (or even all) nodes and accumulating the weight in the resulting nodes. In an adjacency list format, this often leads to many cache misses during execution, resulting in low performance. Furthermore, parallelizing global graph-traversals in the adjacency list format suffers significantly from concurrent write conflicts between threads during execution. In the adjacency matrix format, these problems are not encountered. The graph-traversals can be directly translated into a SpMV or even a sparse-matrix sparse-vector multiplication (SpMSpV). It has also been well established how to execute the SpMV effectively in a multithreaded fashion, and how to minimize cache-misses by applying a clever sorting of the tuples list. 17", - "text-hash": 7939832404963099695, + "text_hash": 7939832404963099695, "type": "paragraph" }, { @@ -83359,7 +83359,7 @@ "sref": "#/texts/88", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { @@ -83373,7 +83373,7 @@ "sref": "#/texts/89", "subj_hash": 12004249365408683930, "text": "To illustrate the advantages of the adjacency matrix format for our needs, we show the time-to-solution (TTS) for queries with increasing order of traversals for Neo4J \u2021\u2021 and our graph engine in Figure 3. We computed a k-hop traversal query on the graph500 \u00a7\u00a7 (64M edges) and twitter-graph \u00b6\u00b6 (1.5B edges). Two important observations can be made. Firstly, our graph engine is able to run easily third, fourth, and even higher-order graph traversals. With Neo4J, this proves very difficult, as the TTS grows upwards of 1 hour. Secondly, our graph engine shows minimal variance in the TTS between all runs of the k-order graph-traversals. This is in stark contrast to Neo4J, where the TTS strongly depends on which node(s) one starts from.", - "text-hash": 9124629550221661345, + "text_hash": 9124629550221661345, "type": "paragraph" }, { @@ -83387,7 +83387,7 @@ "sref": "#/texts/90", "subj_hash": 7223381657047466215, "text": "Another big advantage of using the adjacency matrix format is that we can exploit advanced compression methods 18 such as CSR or blocked COO. This reduces significantly the memory footprint of the graph and allows bigger graphs to be hosted entirely in-memory. In our case, we have opted to represent the edges by blocked matrices of a fixed size, in which each block matrix is of type COO. We chose the size of the block-matrix to be 2 16 = 65 536, allowing a pair of indices to be compactly represented by two unsigned short integers. Consequently, an edge has a memory footprint of only 4 bytes (equivalent to a single 32-bit integer), while a weighted edge a footprint of 8 bytes. *** This is a significant reduction in memory footprint compared to Neo4J graph databases, which use 33 bytes for unweighted edges $^{\u2020\u2020\u2020}$). Consequently, we can host graphs of close to 8 billion edges on a virtual machine with 32 GB of free memory, and even close to one trillion edges on a bare-metal POWER9 node with 4 TB of memory.", - "text-hash": 13549646715324792350, + "text_hash": 13549646715324792350, "type": "paragraph" }, { @@ -83401,7 +83401,7 @@ "sref": "#/texts/91", "subj_hash": 15132906055887224772, "text": "3.3 | Formulation and evaluation of deep queries", - "text-hash": 3609048564712975615, + "text_hash": 3609048564712975615, "type": "subtitle-level-1" }, { @@ -83415,7 +83415,7 @@ "sref": "#/texts/92", "subj_hash": 17129434987283608290, "text": "The goal of querying a KG is to answer complex questions. As such, users need to be provided with a functionality to formulate complex queries on the KG and quickly evaluate them.", - "text-hash": 3711217782201102361, + "text_hash": 3711217782201102361, "type": "paragraph" }, { @@ -83429,7 +83429,7 @@ "sref": "#/texts/93", "subj_hash": 10350406469077463155, "text": "In order to avoid imposing a complex query language onto users, we have devised a way to define complex graph queries in a declarative format, which we call a workflow. Workflows are represented as a DAG of operations and are conceptually related to DFs. Unlike the former, the nodes of workflow DAGs do not represent data-transformation tasks, but specific graph operations which mutate an input (or intermediate) set of nodes into another set. We call these operations worktasks. For further convenience, we have developed a graphical user interface (UI) which allows to define such workflows in a visual programming approach (see Figure 4).", - "text-hash": 6157696558870441610, + "text_hash": 6157696558870441610, "type": "paragraph" }, { @@ -83443,7 +83443,7 @@ "sref": "#/texts/94", "subj_hash": 16949854269270315165, "text": "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions. In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design.", - "text-hash": 4111476184068705704, + "text_hash": 4111476184068705704, "type": "paragraph" }, { @@ -83457,7 +83457,7 @@ "sref": "#/texts/95", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { @@ -83471,7 +83471,7 @@ "sref": "#/texts/96", "subj_hash": 4361549266593946746, "text": "9of15", - "text-hash": 329104147597527681, + "text_hash": 329104147597527681, "type": "paragraph" }, { @@ -83485,7 +83485,7 @@ "sref": "#/texts/97", "subj_hash": 9802652237802670052, "text": "3.3.1 | Node retrieval", - "text-hash": 6349660887815587103, + "text_hash": 6349660887815587103, "type": "subtitle-level-1" }, { @@ -83499,7 +83499,7 @@ "sref": "#/texts/98", "subj_hash": 5524728206729419689, "text": "This task finds a set of nodes which satisfy certain search criteria. This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property. The task constructs a node vector v $^{!}$, such that", - "text-hash": 10699646946138261716, + "text_hash": 10699646946138261716, "type": "paragraph" }, { @@ -83513,7 +83513,7 @@ "sref": "#/texts/99", "subj_hash": 4043385013945968936, "text": "v $^{!}$$_{i}$= 1 if node i \\b S 0 if node i = 2 S , GLYPH \u00f0 3 \u00de", - "text-hash": 588808569772103507, + "text_hash": 588808569772103507, "type": "equation" }, { @@ -83527,7 +83527,7 @@ "sref": "#/texts/100", "subj_hash": 11778884428660217326, "text": "where S represents the set of nodes that satisfy the search criteria.", - "text-hash": 9277850099981357845, + "text_hash": 9277850099981357845, "type": "paragraph" }, { @@ -83541,7 +83541,7 @@ "sref": "#/texts/101", "subj_hash": 12875050310340408203, "text": "3.3.2 | Graph traversal", - "text-hash": 10555101842315227314, + "text_hash": 10555101842315227314, "type": "subtitle-level-1" }, { @@ -83555,7 +83555,7 @@ "sref": "#/texts/102", "subj_hash": 3785875504044487339, "text": "The simplest type of graph-traversal is the direct graph-traversal. As explained in detail in section 3.1, these can be implemented as a straightforward SpMV operation w $^{!}$= Av $^{!}$. In more advanced types of graph-traversals, we evaluate all paths of different depth. Since the number of paths connecting two nodes might increase exponentially with the pathlength, one typically reduces the contribution of each path by weighting it with the inverse factorial of the path-length. For example, consider the case in which we want to explore deeper, indirect paths as follows,", - "text-hash": 909351913600217042, + "text_hash": 909351913600217042, "type": "paragraph" }, { @@ -83569,7 +83569,7 @@ "sref": "#/texts/103", "subj_hash": 12105626155924658285, "text": "w $^{!}$= A + A 2 2 ! + A 3 3 ! + GLYPH GLYPH GLYPH GLYPH GLYPH v $^{!}$= e$^{A}$- 1 GLYPH GLYPH v $^{!}$: \u00f0 4 \u00de", - "text-hash": 9027673695254677144, + "text_hash": 9027673695254677144, "type": "equation" }, { @@ -83583,7 +83583,7 @@ "sref": "#/texts/104", "subj_hash": 16265612055607243129, "text": "In its most generic case, a graph-traversal can therefore be written down as a matrix-function applied on an edge, that is, w $^{!}$= fA \u00f0 \u00de v $^{!}$. As discussed in detail in previous work, 2 this type of operation can be evaluated extremely efficiently using a recursive Chebyshev polynomial expansion.", - "text-hash": 4579475315408875396, + "text_hash": 4579475315408875396, "type": "paragraph" }, { @@ -83597,7 +83597,7 @@ "sref": "#/texts/105", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { @@ -83611,7 +83611,7 @@ "sref": "#/texts/106", "subj_hash": 10252446451495472512, "text": "3.3.3 | Logical operations", - "text-hash": 6188098459342469819, + "text_hash": 6188098459342469819, "type": "subtitle-level-1" }, { @@ -83625,7 +83625,7 @@ "sref": "#/texts/107", "subj_hash": 17011944206067158637, "text": "In logical operations, two sets of nodes are merged into one resulting set, each represented through a node vector. There are three common logical operations, AND, OR, and NOT. In the AND and OR operations, we compute the geometric or the arithmetic mean respectively for each pairwise elements in the vectors. In the NOT operation, we inverse the sign for each element of the input vector.", - "text-hash": 3756558606376352920, + "text_hash": 3756558606376352920, "type": "paragraph" }, { @@ -83639,7 +83639,7 @@ "sref": "#/texts/108", "subj_hash": 16289627123982758705, "text": "3.3.4 | Transform functions", - "text-hash": 4767177430745297228, + "text_hash": 4767177430745297228, "type": "subtitle-level-1" }, { @@ -83653,7 +83653,7 @@ "sref": "#/texts/109", "subj_hash": 13969801897340997317, "text": "Lastly, we implement operations which transform the weights associated with nodes. One such operation renormalizes and ultimately ranks the nodes according to their weight.", - "text-hash": 2263647560089238528, + "text_hash": 2263647560089238528, "type": "paragraph" }, { @@ -83667,7 +83667,7 @@ "sref": "#/texts/110", "subj_hash": 105697770555684555, "text": "With these four types of operations, we can express rich queries to answer complex questions, which can have multiple inputs and outputs. Let us now discuss how a workflow is evaluated within the graph engine. Once a workflow has been submitted, each worktask is initially assigned a vector. These vectors are all initialized to zero (v $^{!}$$_{i}$= 0). Next, the graph will analyze the DAG of worktasks and identify which tasks can be run in parallel. This is achieved by performing a topological sort using depth-first traversal, which yields a list in which each item is a set of tasks that can be executed in parallel. The graph engine then proceeds with the parallel task computations.", - "text-hash": 16051124526605366258, + "text_hash": 16051124526605366258, "type": "paragraph" }, { @@ -83681,7 +83681,7 @@ "sref": "#/texts/111", "subj_hash": 15938840672015995359, "text": "For each task, we obtain a set of nodes with corresponding weights by identifying the nonzero elements in the associated node vector. After executing the full workflow, we therefore obtain for each task a list of nodes which can be sorted according to their weights. The higher the weight of the node, the more relevant this node is. As such, we can also retrace which nodes were important in each stage of the workflow.", - "text-hash": 2523894108122369766, + "text_hash": 2523894108122369766, "type": "paragraph" }, { @@ -83695,7 +83695,7 @@ "sref": "#/texts/112", "subj_hash": 16505790528099785698, "text": "4 | CLOUD DESIGN AND DEPLOYMENT", - "text-hash": 4262729847538649369, + "text_hash": 4262729847538649369, "type": "subtitle-level-1" }, { @@ -83709,7 +83709,7 @@ "sref": "#/texts/113", "subj_hash": 14738723905055920039, "text": "The primary deployment target for the CPS is a cloud environment orchestrated via Kubernetes. We package the full platform assets with a Helm chart for quick deployment on multiple setups. For example we can easily deploy the platform on the IBM Cloud or on-premise in an IBM Cloud Private instance, both on x86-and POWER-based nodes.", - "text-hash": 1485721651435830494, + "text_hash": 1485721651435830494, "type": "paragraph" }, { @@ -83723,7 +83723,7 @@ "sref": "#/texts/114", "subj_hash": 5699550326698755904, "text": "In Figure 5, we show the high-level cloud design of the CPS. The platform allows to manage and instrument the corpus processing in a multitenant fashion, that is, it handles multiple knowledge ingestion pipelines and it serves multiple knowledge graphs. We call each unit a Knowledge Graph Space (KGS), which consists of a dedicated instance of the graph engine, a dedicated MongoDB database and a bucket on a cloud object store (COS). A dashboard allows each project owner to manage the access and the usage of resources. The KGS can be launched into multiple flavors to optimally balance the utilization of the cluster. These flavors range from a virtual machine with small amount of memory to a full dedicated node including hardware acceleration with GPUs. Once a KGS is created, it can be paused and rescaled without loss of data or downtime.", - "text-hash": 10750023430231115131, + "text_hash": 10750023430231115131, "type": "paragraph" }, { @@ -83737,7 +83737,7 @@ "sref": "#/texts/115", "subj_hash": 11609131422778723150, "text": "For the KG creation pipeline, we implemented an asynchronous compute scheme we already use in our CCS solution. 1 The system is exposed to the user via an API frontend which communicates to the compute workers through a message broker and a result backend. The workers operate on the data, which is hosted on a NoSQL database and a cloud object store for data blobs. These workers are dynamically scaled by the cloud orchestrator to best match the current load of the platform.", - "text-hash": 9163968380151462261, + "text_hash": 9163968380151462261, "type": "paragraph" }, { @@ -83751,7 +83751,7 @@ "sref": "#/texts/116", "subj_hash": 788128893109726279, "text": "The processing of the KG creation typically starts with the user submitting the DF to the frontend API. The DAG of operations is then interpreted as described in the previous section and fine-grained tasks are submitted to the broker, for example, the whole corpus is split in many independent chunks. The user receives an overall status from the API and is notified when the DF processing has completed.", - "text-hash": 15724564631854553726, + "text_hash": 15724564631854553726, "type": "paragraph" }, { @@ -83765,7 +83765,7 @@ "sref": "#/texts/117", "subj_hash": 7029344862946908483, "text": "The KG data are distributed between three storage solutions: a NoSQL database, a cloud object storage (COS) and the KGS. Each node is represented as a document in a NoSQL database which contains all the properties attached to the node, for example, the text of a paragraph. If there is a binary object attached to the node, for example, the PDF document or an image, this is stored on the COS. The KGS contains only the minimal information needed to execute the queries, that is, the connectivity of the graph and the properties which are indexed for filtering and search.", - "text-hash": 13806805648097199994, + "text_hash": 13806805648097199994, "type": "paragraph" }, { @@ -83779,7 +83779,7 @@ "sref": "#/texts/118", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { @@ -83793,7 +83793,7 @@ "sref": "#/texts/119", "subj_hash": 2144926686518491811, "text": "11of15", - "text-hash": 16380805707549272026, + "text_hash": 16380805707549272026, "type": "paragraph" }, { @@ -83807,7 +83807,7 @@ "sref": "#/texts/120", "subj_hash": 18333396269095847693, "text": "The KGS is exposed to the user via a REST API which is able to aggregate results collected from the different storage sources. To ensure decent performance when serving queries of multiple users, the graph engine can be dynamically scaled horizontally. Most workflow queries execute fast enough such that they can be responded from a synchronous request. Others, especially the graph analytics computations, are more expensive and return large amounts of data. Thus, these queries are executed through an asynchronous API and the results are paginated and streamed back to the user on completion.", - "text-hash": 5024699355629880632, + "text_hash": 5024699355629880632, "type": "paragraph" }, { @@ -83821,7 +83821,7 @@ "sref": "#/texts/121", "subj_hash": 4030998538427149966, "text": "5 | CASE STUDY: OIL AND GAS EXPLORATION", - "text-hash": 956984534850296757, + "text_hash": 956984534850296757, "type": "subtitle-level-1" }, { @@ -83835,7 +83835,7 @@ "sref": "#/texts/122", "subj_hash": 10295608624766759271, "text": "Oil and gas exploration is a complex, technical field of expertise. Unfortunately, the data of many geological processes and entities is scattered across databases (public and proprietary) and corpora of documents, where it is often deeply embedded in text, tables, and figures. This is a serious impediment for efficient exploration of new oil and gas opportunities. For example, geographic information of geological structures can be found in NaturalEarthData, \u2021\u2021\u2021 while their history, evolution, and components (eg, formations with their age, rock-composition, and depth) are discussed in reports (governmental and proprietary) and scientific articles. As such, experts in oil and gas exploration often need to read many documents in order to find all the information of a certain geographic area and get a good understanding of its underlying geology.", - "text-hash": 6212506812498931614, + "text_hash": 6212506812498931614, "type": "paragraph" }, { @@ -83849,7 +83849,7 @@ "sref": "#/texts/123", "subj_hash": 10633780781731536747, "text": "The main tasks of the experts working in oil and gas exploration are to identify potential new exploration sites. This is typically done by describing a basin or one of its sub-regions. In practice, ' describing a basin ' boils down to identifying all geological formations with their properties in the basin and investigating if these formations constitute a petroleum system. 19 In its most minimalistic form, a petroleum system is defined by three components: source, reservoir, and seal. The source is the rock formation in which the oil or gas was created. Once created, the oil or gas typically migrates to a porous reservoir rock, which holds the oil and gas. In order for the oil and gas not to escape, the reservoir needs to be covered by an impermeable rock formation which is called the seal. Each one of these components is comprised of one or more formations, with a certain age and rock composition. To identify a petroleum system in a certain geographical area, one has to find a candidate formation for each component (ie, reservoir, seal, and source) and observe that the properties of these components satisfy some well-established constraints. For example, the reservoir", - "text-hash": 8189171326047604114, + "text_hash": 8189171326047604114, "type": "paragraph" }, { @@ -83863,7 +83863,7 @@ "sref": "#/texts/124", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { @@ -83877,7 +83877,7 @@ "sref": "#/texts/125", "subj_hash": 1080447728722590413, "text": "12", - "text-hash": 15441160910541481976, + "text_hash": 15441160910541481976, "type": "paragraph" }, { @@ -83891,7 +83891,7 @@ "sref": "#/texts/126", "subj_hash": 4361549257087816853, "text": "of 15", - "text-hash": 329104161717916080, + "text_hash": 329104161717916080, "type": "paragraph" }, { @@ -83905,7 +83905,7 @@ "sref": "#/texts/127", "subj_hash": 10195664788154887804, "text": "formation has to have a lower depth than the seal formation. Another example of such constraints is that the age of the seal and reservoir has to be older than the source.", - "text-hash": 5965659969661688967, + "text_hash": 5965659969661688967, "type": "paragraph" }, { @@ -83919,7 +83919,7 @@ "sref": "#/texts/128", "subj_hash": 7538054744015619336, "text": "In order for the CPS platform to help the oil and gas explorationalists in their day-to-day job effectively, it needs to meet two objectives. On the one hand, it needs to create a consistent Knowledge Graph from a document corpus. This Knowledge Graph has to contain all geological formations with their respective properties (eg, geographical locations, depth, age, and rock composition). On the other hand, CPS needs to provide fast query responses, such that one can automatically retrieve potential components of petroleum systems and apply the constraints to filter out promising candidates.", - "text-hash": 13307027925001159475, + "text_hash": 13307027925001159475, "type": "paragraph" }, { @@ -83933,7 +83933,7 @@ "sref": "#/texts/129", "subj_hash": 12426662601736619109, "text": "During the development and implementation of custom NLU annotators in CPS for oil and gas exploration, the client team worked hand in hand with the IBM Research team to set up a controlled accuracy benchmark in which the key capabilities of the CPS can be quantified. The goal of the benchmark was to test the entire pipeline depicted in Figure 6, that is, from PDF document ingestion to a final, queryable KG. The key components of this specific pipeline are,", - "text-hash": 8341863300316693152, + "text_hash": 8341863300316693152, "type": "paragraph" }, { @@ -83947,7 +83947,7 @@ "sref": "#/texts/130", "subj_hash": 4162783521620221579, "text": "1. the conversion of PDF documents into JSON through CCS,", - "text-hash": 527957687390948274, + "text_hash": 527957687390948274, "type": "paragraph" }, { @@ -83961,7 +83961,7 @@ "sref": "#/texts/131", "subj_hash": 5135259059216244866, "text": "2. the creation of the KG in the CPS from the JSON documents, and", - "text-hash": 11300804242294087097, + "text_hash": 11300804242294087097, "type": "paragraph" }, { @@ -83975,7 +83975,7 @@ "sref": "#/texts/132", "subj_hash": 16998817296948099535, "text": "3. the querying of the KG served by CPS to identify petroleum systems elements with their properties.", - "text-hash": 4121058581451712246, + "text_hash": 4121058581451712246, "type": "paragraph" }, { @@ -83989,7 +83989,7 @@ "sref": "#/texts/133", "subj_hash": 1205649569241141618, "text": "On the suggestion of the experts in the client team, the entire pipeline was run on the 1051 Field Evaluation Reports from the C&C Reservoirs \u00a7\u00a7\u00a7 dataset. The advantage of using this dataset for an accuracy benchmark is that each report includes two parts. One part is verbose text describing the history, evolution, and composition of the fields. The language used is of similar complexity to standard geological publications and thus a realistic challenge for our KG creation pipeline. The second part at the end of each report is comprised of tables which summarize the text and provide us the elements of the petroleum systems with their properties. Therefore, we ingest these reports into CCS and extract both text and tables. Then, by generating a KG only from the text and keeping the tables as ground-truth to compare answers of the KG queries against, we obtain a well-controlled, end-to-end accuracy benchmark.", - "text-hash": 17333577132913364873, + "text_hash": 17333577132913364873, "type": "paragraph" }, { @@ -84003,7 +84003,7 @@ "sref": "#/texts/134", "subj_hash": 12257840490666828590, "text": "For step (1) of the pipeline, we ingested all 1051 PDFs into CCS and visually annotated the document structure on 300 (out of 46 019) pages. This yielded a page model which accurately converted all documents to JSON format with a 99.7% recall and 99.3% precision in the converted structure. These numbers are in line with those reported in our previous works. 1 Importantly, very accurate conversion results are key to the resulting quality, since otherwise the language annotators will process incomplete data and eventually the relevance of query results will suffer.", - "text-hash": 8803415231465414997, + "text_hash": 8803415231465414997, "type": "paragraph" }, { @@ -84017,7 +84017,7 @@ "sref": "#/texts/135", "subj_hash": 7040847965650746591, "text": "In step (2), we create the Knowledge Graph by executing a DF that will generate all the entities and relationships relevant to the geology domain. Our language annotator models trained for geology extract geographic areas, geological structures (eg, basins), formations, ages, rocks, petroleum systems, and their elements (PSE) (eg, seal, source, and reservoir). Overall, we extracted a total of 4597 PSEs, 8811 formations, 471 geological ages, and 64 rock types (relevant to the PSEs). The full processing performed at an average rate of 130 ms per page per worker core, on a system with three worker nodes each using four cores. Eventually, the KG included 679 296 edges connecting 116 662 nodes.", - "text-hash": 13799731378750663142, + "text_hash": 13799731378750663142, "type": "paragraph" }, { @@ -84031,7 +84031,7 @@ "sref": "#/texts/136", "subj_hash": 7927601225025519287, "text": "In step (3), we query the Knowledge Graph using a tailored evaluation workflow. This workflow allows us to identify PSEs and their connected properties in the Knowledge Graph, for example, their age, formation and rock", - "text-hash": 13120217128072555470, + "text_hash": 13120217128072555470, "type": "paragraph" }, { @@ -84045,7 +84045,7 @@ "sref": "#/texts/137", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { @@ -84059,7 +84059,7 @@ "sref": "#/texts/138", "subj_hash": 1080447728722590402, "text": "13", - "text-hash": 15441160910541481977, + "text_hash": 15441160910541481977, "type": "paragraph" }, { @@ -84073,7 +84073,7 @@ "sref": "#/texts/139", "subj_hash": 4361549257087816853, "text": "of 15", - "text-hash": 329104161717916080, + "text_hash": 329104161717916080, "type": "paragraph" }, { @@ -84087,7 +84087,7 @@ "sref": "#/texts/140", "subj_hash": 8207961846673301043, "text": "composition. In Figure 7, we visualize the DAG of this workflow. The final node weights are accumulated throughout the branches on the workflow and represent the relevance score of each node.", - "text-hash": 14933956665806015562, + "text_hash": 14933956665806015562, "type": "paragraph" }, { @@ -84101,7 +84101,7 @@ "sref": "#/texts/141", "subj_hash": 11998199584890640594, "text": "To evaluate the correctness of the predicted PSE properties, we follow the standard practice of reporting the top-k accuracy. This is computed as the percentage in which any of the k highest ranked answers matches the expected answer, over all documents. In Table 1, we show the top-1, top-2, top-3, and top-5 accuracy for all properties of each petroleum system element. One can make two distinct observations. First, the top-1 numbers are in the range of 0.75-0.9, meaning that for 3 in 4 cases, the most relevant result predicted by the KG was correct (precision). Secondly, we observe that the top-5 numbers are very high (\u2265 0.97), showing that the system was able detect and aggregate most of the PSEs and their properties (recall). Thus, the recall of the language annotators in the KG creation pipeline was very satisfactory.", - "text-hash": 9121677663017059817, + "text_hash": 9121677663017059817, "type": "paragraph" }, { @@ -84115,7 +84115,7 @@ "sref": "#/texts/142", "subj_hash": 16446129547721407877, "text": "6 | CONCLUSIONS", - "text-hash": 4326952903809379008, + "text_hash": 4326952903809379008, "type": "subtitle-level-1" }, { @@ -84129,7 +84129,7 @@ "sref": "#/texts/143", "subj_hash": 6720443978031524294, "text": "With the introduction of the CPS platform, we demonstrate substantial benefit for domain experts and data scientists in exercising deep exploration of published knowledge in a fully integrated, yet modular cloud solution. CPS seamlessly connects to the CSS, complementing it with a highly scalable, automated pipeline to build consistent domain knowledge models and an intuitive, powerful approach to explorational queries and graph-scale analytics. This is accomplished through three fundamental design considerations: (1) We do not require manual data curation or annotation; (2) We built a scalable, efficient architecture to support the ingestion, processing and query workloads, all embedded in", - "text-hash": 11733208797674542845, + "text_hash": 11733208797674542845, "type": "paragraph" }, { @@ -84143,7 +84143,7 @@ "sref": "#/texts/144", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { @@ -84157,7 +84157,7 @@ "sref": "#/texts/145", "subj_hash": 2144926730621142072, "text": "14of15", - "text-hash": 16380805732317250115, + "text_hash": 16380805732317250115, "type": "paragraph" }, { @@ -84171,7 +84171,7 @@ "sref": "#/texts/146", "subj_hash": 14222671032550229818, "text": "a single platform; and (3) We expose the capabilities through an intuitively consumable API and complementary UI tools.", - "text-hash": 1925144237473465665, + "text_hash": 1925144237473465665, "type": "paragraph" }, { @@ -84185,7 +84185,7 @@ "sref": "#/texts/147", "subj_hash": 17486770941839589126, "text": "In our oil and gas case study, we successfully verified our solution for a real-world application with the help of subject matter experts from a client team. Currently, CCS and CPS are actively used in more than five client engagements, most notably in the oil and gas industry as well as in the material science industry.", - "text-hash": 5943448246547541309, + "text_hash": 5943448246547541309, "type": "paragraph" }, { @@ -84199,7 +84199,7 @@ "sref": "#/texts/148", "subj_hash": 16574813224778118841, "text": "Future work will focus on processing public repositories such as the arXiv.org library, USPTO, and PubMed in order to make their content available to deep data exploration.", - "text-hash": 4472913868502496196, + "text_hash": 4472913868502496196, "type": "paragraph" }, { @@ -84213,7 +84213,7 @@ "sref": "#/texts/149", "subj_hash": 3356142343274371864, "text": "DATA AVAILABILITY STATEMENT", - "text-hash": 17772737780533561635, + "text_hash": 17772737780533561635, "type": "subtitle-level-1" }, { @@ -84227,7 +84227,7 @@ "sref": "#/texts/150", "subj_hash": 4778022085288441371, "text": "Data subject to third party restrictions.", - "text-hash": 11662592888764396578, + "text_hash": 11662592888764396578, "type": "paragraph" }, { @@ -84241,7 +84241,7 @@ "sref": "#/texts/151", "subj_hash": 4361549257598904601, "text": "ORCID", - "text-hash": 329104162230294308, + "text_hash": 329104162230294308, "type": "subtitle-level-1" }, { @@ -84255,7 +84255,7 @@ "sref": "#/texts/152", "subj_hash": 3523281823889115814, "text": "Peter W. J. Staar https://orcid.org/0000-0002-8088-0823 Michele Dolfi https://orcid.org/0000-0001-7216-8505 Christoph Auer https://orcid.org/0000-0001-5761-0422", - "text-hash": 1167445296370300893, + "text_hash": 1167445296370300893, "type": "paragraph" }, { @@ -84269,7 +84269,7 @@ "sref": "#/texts/153", "subj_hash": 8500729849894221215, "text": "ENDNOTES", - "text-hash": 14650266124350583462, + "text_hash": 14650266124350583462, "type": "subtitle-level-1" }, { @@ -84283,7 +84283,7 @@ "sref": "#/texts/154", "subj_hash": 7813503946963688644, "text": "* For example, ElasticSearch (https://www.elastic.co) and ApacheLucene (https://lucene.apache.org).", - "text-hash": 12950565807350876671, + "text_hash": 12950565807350876671, "type": "paragraph" }, { @@ -84297,7 +84297,7 @@ "sref": "#/texts/155", "subj_hash": 9230987401345399746, "text": "\u2020 Most language entities from a technical field are typically represented in a very specific, rigorous way that can be easily captured by regular expressions. We found that in practice, regular expressions often outperform DL models, since we can simply encode these representations.", - "text-hash": 6930355155738437881, + "text_hash": 6930355155738437881, "type": "paragraph" }, { @@ -84311,7 +84311,7 @@ "sref": "#/texts/156", "subj_hash": 1997735398126013155, "text": "\u2021 https://www.nltk.org", - "text-hash": 16829787344811603994, + "text_hash": 16829787344811603994, "type": "paragraph" }, { @@ -84325,7 +84325,7 @@ "sref": "#/texts/157", "subj_hash": 13566764974477978642, "text": "\u00a7 We follow the standard JSON-schema for references.", - "text-hash": 9498574747519310377, + "text_hash": 9498574747519310377, "type": "paragraph" }, { @@ -84339,7 +84339,7 @@ "sref": "#/texts/158", "subj_hash": 4925537010788978399, "text": "\u00b6 A rather simple similarity metric is to perform a fuzzy comparison of the names of the newly found entities (ie, the name field found in Listing 1). A more sophisticated approach is to use word embeddings to identify if two concepts are similar.", - "text-hash": 11235784383716113382, + "text_hash": 11235784383716113382, "type": "paragraph" }, { @@ -84353,7 +84353,7 @@ "sref": "#/texts/159", "subj_hash": 16552665876195410077, "text": "** For example Neo4J, Titan, JanusGraph, Amazon Neptune, and Arangodb.", - "text-hash": 4287966239864749480, + "text_hash": 4287966239864749480, "type": "paragraph" }, { @@ -84367,7 +84367,7 @@ "sref": "#/texts/160", "subj_hash": 17579390613842440572, "text": "\u2020\u2020 This memory architecture is clearly documented for Titan (http://s3.thinkaurelius.com/docs/titan/current/data-model.html) and Neo4J (http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html).", - "text-hash": 5855266272999108487, + "text_hash": 5855266272999108487, "type": "paragraph" }, { @@ -84381,7 +84381,7 @@ "sref": "#/texts/161", "subj_hash": 722212543953276862, "text": "\u2021\u2021 We chose Neo4J as a reference since it is currently the most popular graph database solution, see https://db-engines.com/en/ranking_ trend/graph+dbms", - "text-hash": 15713827668903361733, + "text_hash": 15713827668903361733, "type": "paragraph" }, { @@ -84395,7 +84395,7 @@ "sref": "#/texts/162", "subj_hash": 11085577343317113173, "text": "\u00a7\u00a7 http://graph500.org/", - "text-hash": 7449211522826545008, + "text_hash": 7449211522826545008, "type": "paragraph" }, { @@ -84409,7 +84409,7 @@ "sref": "#/texts/163", "subj_hash": 1792096630133661292, "text": "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html", - "text-hash": 16747146533825186967, + "text_hash": 16747146533825186967, "type": "paragraph" }, { @@ -84423,7 +84423,7 @@ "sref": "#/texts/164", "subj_hash": 11462638369524745676, "text": "*** We assume the weight can be represented by a float value.", - "text-hash": 7288340874592977655, + "text_hash": 7288340874592977655, "type": "paragraph" }, { @@ -84437,7 +84437,7 @@ "sref": "#/texts/165", "subj_hash": 16611805225457383637, "text": "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", - "text-hash": 4512570954370983408, + "text_hash": 4512570954370983408, "type": "paragraph" }, { @@ -84451,7 +84451,7 @@ "sref": "#/texts/166", "subj_hash": 1531505125666754945, "text": "\u2021\u2021\u2021 https://www.naturalearthdata.com/", - "text-hash": 16922240937803157180, + "text_hash": 16922240937803157180, "type": "paragraph" }, { @@ -84465,7 +84465,7 @@ "sref": "#/texts/167", "subj_hash": 15684389308320953629, "text": "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/", - "text-hash": 2845896203864732456, + "text_hash": 2845896203864732456, "type": "paragraph" }, { @@ -84479,7 +84479,7 @@ "sref": "#/texts/168", "subj_hash": 14590754343934702701, "text": "REFERENCES", - "text-hash": 1858797456585454232, + "text_hash": 1858797456585454232, "type": "subtitle-level-1" }, { @@ -84493,7 +84493,7 @@ "sref": "#/texts/169", "subj_hash": 10480452763767134455, "text": "1. Staar Peter WJ, Michele D, Christoph A, Costas B. Corpus conversion service: a machine learning platform to ingest documents at scale. KDD '18. New York, NY: ACM; 2018:774-782.", - "text-hash": 7982224532612302350, + "text_hash": 7982224532612302350, "type": "paragraph" }, { @@ -84507,7 +84507,7 @@ "sref": "#/texts/170", "subj_hash": 11866471329779366855, "text": "2. Staar Peter WJ, Kl BP, Roxana I, et al. Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance. Chicago, IL: IEEE; 2016:812-821.", - "text-hash": 8969674542364551422, + "text_hash": 8969674542364551422, "type": "paragraph" }, { @@ -84521,7 +84521,7 @@ "sref": "#/texts/171", "subj_hash": 6016885898370676469, "text": "3. Matteo M, Christoph A, Val'ery W, et al. An information extraction and knowledge graph platform for accelerating biochemical discoveries. ArXiv.abs/1907.08400; 2019.", - "text-hash": 12797055744904705040, + "text_hash": 12797055744904705040, "type": "paragraph" }, { @@ -84535,7 +84535,7 @@ "sref": "#/texts/172", "subj_hash": 13946275785662847920, "text": "4. Paolo R, Marco P, Floriana B, Peter S, Costas B. Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019). Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10. https://doi. org/10.2118/197610-MS.", - "text-hash": 2278118371277588683, + "text_hash": 2278118371277588683, "type": "paragraph" }, { @@ -84549,7 +84549,7 @@ "sref": "#/texts/173", "subj_hash": 7693798302433367973, "text": "5. Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D. Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics; 2016.", - "text-hash": 13426003943449777376, + "text_hash": 13426003943449777376, "type": "paragraph" }, { @@ -84563,7 +84563,7 @@ "sref": "#/texts/174", "subj_hash": 3109792572574236398, "text": "6. Chiu Jason PC, Eric N. Named entity recognition with bidirectional LSTM-CNNs. TACL. 2016;4:357-370.", - "text-hash": 17942512882695875605, + "text_hash": 17942512882695875605, "type": "paragraph" }, { @@ -84577,7 +84577,7 @@ "sref": "#/texts/175", "subj_hash": 8111170387462350170, "text": "7. Matthew H, Ines M. spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing. To appear. 2017.", - "text-hash": 15035325662489879393, + "text_hash": 15035325662489879393, "type": "paragraph" }, { @@ -84591,7 +84591,7 @@ "sref": "#/texts/176", "subj_hash": 14682702346227170925, "text": "8. Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!), a significant petroleum system, northern Grand Banks area, offshore eastern Canada. Am Assoc Pet Geol Bull. 2005;89(9):1203-1237.", - "text-hash": 1825488956803771544, + "text_hash": 1825488956803771544, "type": "paragraph" }, { @@ -84605,7 +84605,7 @@ "sref": "#/texts/177", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { @@ -84619,7 +84619,7 @@ "sref": "#/texts/178", "subj_hash": 11430385775112165283, "text": "9. Estrada E. Subgraph centrality in complex networks. Phys Rev E. 2005;71(5):056103.", - "text-hash": 7383629567386653914, + "text_hash": 7383629567386653914, "type": "paragraph" }, { @@ -84633,7 +84633,7 @@ "sref": "#/texts/179", "subj_hash": 5825495964576843004, "text": "10. Estrada Ernesto, Higham Desmond J. (2010). Network Properties Revealed through Matrix Functions. SIAM Review, 52, (4), 696-714. http://dx.doi.org/10.1137/090761070.", - "text-hash": 12713726337853489671, + "text_hash": 12713726337853489671, "type": "paragraph" }, { @@ -84647,7 +84647,7 @@ "sref": "#/texts/180", "subj_hash": 5698421097735371040, "text": "11. Labs Redis. Benchmarking RedisGraph 1.0. 2019.", - "text-hash": 10746649133789046619, + "text_hash": 10746649133789046619, "type": "paragraph" }, { @@ -84661,7 +84661,7 @@ "sref": "#/texts/181", "subj_hash": 5870535063942256428, "text": "12. TigerGraph. Real-Time Deep Link Analytics. 2018.", - "text-hash": 12596629408176592215, + "text_hash": 12596629408176592215, "type": "paragraph" }, { @@ -84675,7 +84675,7 @@ "sref": "#/texts/182", "subj_hash": 18196767266655606709, "text": "13. Jeremy K, John G. Graph Algorithms in the Language of Linear Algebra. Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011.", - "text-hash": 4940703957630358736, + "text_hash": 4940703957630358736, "type": "paragraph" }, { @@ -84689,7 +84689,7 @@ "sref": "#/texts/183", "subj_hash": 3623403683642367845, "text": "14. Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning (2015). Graphs, Matrices, and the GraphBLAS: Seven Good Reasons. Procedia Computer Science, 51, 2453-2462. http://dx.doi.org/10.1016/j.procs.2015.05.353.", - "text-hash": 1288017376570396064, + "text_hash": 1288017376570396064, "type": "paragraph" }, { @@ -84703,7 +84703,7 @@ "sref": "#/texts/184", "subj_hash": 13936866850854297069, "text": "15. Aydin B, Gilbert John R. The combinatorial BLAS: design, implementation, and applications. Int J High Perform Comput Appl. 2011;25 (4):496-509.", - "text-hash": 2215522210708998936, + "text_hash": 2215522210708998936, "type": "paragraph" }, { @@ -84717,7 +84717,7 @@ "sref": "#/texts/185", "subj_hash": 8497015665124263236, "text": "16. Jeremy K, Peter A, Bader David A, et al. Mathematical foundations of the GraphBLAS. 2016 IEEE HPEC. 2016; 1-9.", - "text-hash": 14644960259055240063, + "text_hash": 14644960259055240063, "type": "paragraph" }, { @@ -84731,7 +84731,7 @@ "sref": "#/texts/186", "subj_hash": 15947529491299956047, "text": "17. Ariful A, Mathias J, Aydin B, Ng Esmond G. The reverse Cuthill-McKee algorithm in distributed-memory. 2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 2017: 22-31.", - "text-hash": 2515131343544103798, + "text_hash": 2515131343544103798, "type": "paragraph" }, { @@ -84745,7 +84745,7 @@ "sref": "#/texts/187", "subj_hash": 14843401725435831033, "text": "18. Rukhsana S, Anila U, Chughtai IR. Review of storage techniques for sparse matrices. 2005 Pakistan Section Multitopic Conference. 2005 1-7.", - "text-hash": 1389998498969001988, + "text_hash": 1389998498969001988, "type": "paragraph" }, { @@ -84759,7 +84759,7 @@ "sref": "#/texts/188", "subj_hash": 16676439669743530711, "text": "19. Welte DH, Horsfield B, Baker DR. Petroleum and Basin Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling, Berlin Heidelberg: Springer-Verlag; 1997.", - "text-hash": 4375808543141490670, + "text_hash": 4375808543141490670, "type": "paragraph" }, { @@ -84773,7 +84773,7 @@ "sref": "#/texts/189", "subj_hash": 2986547206451163051, "text": "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora. Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", - "text-hash": 17781974298360978642, + "text_hash": 17781974298360978642, "type": "paragraph" }, { @@ -84787,7 +84787,7 @@ "sref": "#/texts/190", "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" } ] diff --git a/tests/data/texts/references.nlp.jsonl b/tests/data/texts/references.nlp.jsonl index 45fb5b21..18a2bdc4 100644 --- a/tests/data/texts/references.nlp.jsonl +++ b/tests/data/texts/references.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["reference", "citation-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, null, null, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3089690646178643593, 8143668872857370346, null, null, 4, 16, 4, 16, 3, 6, true, "J. Nagamatsu", "J. Nagamatsu"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 4958313984110456424, 12758216704979571657, null, null, 18, 29, 18, 29, 7, 10, true, "N. Nakagawa", "N. Nakagawa"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 11365016642846088308, 8423174012903247560, null, null, 31, 42, 31, 42, 11, 14, true, "T. Muranaka", "T. Muranaka"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3078907282781552519, 13717640772378057531, null, null, 44, 55, 44, 55, 15, 18, true, "Y. Zenitani", "Y. Zenitani"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 1988581422311921121, 4091673062415006471, null, null, 61, 72, 61, 72, 20, 23, true, "J. Akimitsu", "J. Akimitsu"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164561, null, null, 74, 120, 74, 120, 24, 31, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, null, null, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, null, null, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14523797031010145779, "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, null, null, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, null, null, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, null, null, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, null, null, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, null, null, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, null, null, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, null, null, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, null, null, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, null, null, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, null, null, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, null, null, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, null, null, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, null, null, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, null, null, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4183773491823524238, "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["reference", "citation-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, null, null, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3089690646178643593, 8143668872857370346, null, null, 4, 16, 4, 16, 3, 6, true, "J. Nagamatsu", "J. Nagamatsu"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 4958313984110456424, 12758216704979571657, null, null, 18, 29, 18, 29, 7, 10, true, "N. Nakagawa", "N. Nakagawa"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 11365016642846088308, 8423174012903247560, null, null, 31, 42, 31, 42, 11, 14, true, "T. Muranaka", "T. Muranaka"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3078907282781552519, 13717640772378057531, null, null, 44, 55, 44, 55, 15, 18, true, "Y. Zenitani", "Y. Zenitani"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 1988581422311921121, 4091673062415006471, null, null, 61, 72, 61, 72, 20, 23, true, "J. Akimitsu", "J. Akimitsu"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164561, null, null, 74, 120, 74, 120, 24, 31, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, null, null, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, null, null, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14523797031010145779, "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text_hash": 18067349248114064711, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, null, null, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, null, null, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, null, null, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, null, null, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, null, null, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, null, null, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, null, null, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, null, null, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, null, null, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, null, null, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, null, null, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, null, null, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, null, null, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, null, null, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4183773491823524238, "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text_hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/semantics.nlp.jsonl b/tests/data/texts/semantics.nlp.jsonl index 80ea6157..441b861a 100644 --- a/tests/data/texts/semantics.nlp.jsonl +++ b/tests/data/texts/semantics.nlp.jsonl @@ -1,7 +1,7 @@ -{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", 7759316032128614217, "TEXT", "#", "reference", 0.48]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 7759316032128614217, "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "text"} -{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", 14339562343989983509, "TEXT", "#", "meta-data", 0.88]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14339562343989983509, "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "text"} -{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", 18143996061359107703, "TEXT", "#", "meta-data", 0.71]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 18143996061359107703, "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "text"} -{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "ABSTRACT", "properties": {"data": [["semantic", 11035282656876697300, "TEXT", "#", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 11035282656876697300, "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "text"} -{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, null, null, 447, 449, 447, 449, 81, 82, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, null, null, 599, 602, 599, 602, 106, 107, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", 14993488697470108654, "TEXT", "#", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14993488697470108654, "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 164218115435155290, "type": "text"} -{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, null, null, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481728, 16379900111711101126, null, null, 95, 97, 95, 97, 26, 27, true, "39", "39"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, null, null, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, null, null, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["numval", "year", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, null, null, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14523797031010145779, "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, null, null, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, null, null, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, null, null, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, null, null, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, null, null, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4183773491823524238, "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", 7759316032128614217, "TEXT", "#", "reference", 0.48]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 7759316032128614217, "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text_hash": 11303007895399162817, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", 14339562343989983509, "TEXT", "#", "meta-data", 0.88]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14339562343989983509, "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text_hash": 17380979703907035493, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", 18143996061359107703, "TEXT", "#", "meta-data", 0.71]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 18143996061359107703, "text": "IBM Research, Rueschlikon, Switzerland", "text_hash": 3204757815416943811, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "ABSTRACT", "properties": {"data": [["semantic", 11035282656876697300, "TEXT", "#", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 11035282656876697300, "text": "ABSTRACT", "text_hash": 14650435066888584228, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, null, null, 447, 449, 447, 449, 81, 82, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, null, null, 599, 602, 599, 602, 106, 107, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", 14993488697470108654, "TEXT", "#", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14993488697470108654, "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text_hash": 164218115435155290, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, null, null, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481728, 16379900111711101126, null, null, 95, 97, 95, 97, 26, 27, true, "39", "39"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, null, null, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, null, null, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["numval", "year", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, null, null, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14523797031010145779, "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text_hash": 18067349248114064711, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, null, null, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, null, null, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, null, null, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, null, null, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, null, null, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4183773491823524238, "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text_hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/terms.nlp.jsonl b/tests/data/texts/terms.nlp.jsonl index b0c73917..88bdba21 100644 --- a/tests/data/texts/terms.nlp.jsonl +++ b/tests/data/texts/terms.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied_models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, null, null, 0, 177, 0, 164, 0, 37, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, null, null, 7, 31, 7, 26, 1, 9, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, null, null, 16, 26, 16, 23, 4, 7, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206575305750373, 3269040892355287555, null, null, 16, 25, 16, 22, 4, 6, true, "[f\u0281\u0251\u0303s", "[f\u0281\u0251\u0303s"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, null, null, 27, 30, 24, 25, 7, 8, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, null, null, 48, 63, 43, 58, 12, 14, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, null, null, 64, 122, 59, 109, 14, 24, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, null, null, 73, 95, 68, 88, 17, 19, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, null, null, 96, 106, 89, 98, 19, 21, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, null, null, 107, 121, 99, 108, 21, 23, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, null, null, 123, 127, 110, 114, 25, 28, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, null, null, 124, 126, 111, 113, 26, 27, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, null, null, 128, 130, 115, 117, 28, 29, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, null, null, 133, 140, 120, 127, 30, 31, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, null, null, 141, 158, 128, 145, 31, 33, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, null, null, 159, 161, 146, 148, 33, 34, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, null, null, 162, 176, 149, 163, 34, 36, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, null, null, 170, 176, 157, 163, 35, 36, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, null, null, 178, 375, 165, 362, 37, 73, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, null, null, 186, 194, 173, 181, 39, 40, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, null, null, 195, 211, 182, 198, 40, 42, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, null, null, 204, 227, 191, 214, 41, 44, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, null, null, 216, 227, 203, 214, 43, 44, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, null, null, 228, 234, 215, 221, 44, 46, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, null, null, 235, 243, 222, 230, 46, 47, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 820203855428083856, 16279894764651307170, null, null, 252, 280, 239, 267, 49, 54, true, "Atlantic, Pacific and Indian", "Atlantic, Pacific and Indian"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, null, null, 252, 260, 239, 247, 49, 50, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, null, null, 262, 269, 249, 256, 51, 52, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 4553045173532721202, 17291436396596241777, null, null, 274, 287, 261, 274, 53, 55, true, "Indian oceans", "Indian oceans"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, null, null, 281, 293, 268, 280, 54, 59, true, "oceans,[XII]", "oceans,[XII]"], ["parenthesis", "square brackets", 9818235231875948258, "TEXT", "#", 1.0, 329104147687597164, 12284735790511259080, null, null, 288, 293, 275, 280, 56, 59, true, "[XII]", "[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895542235, 10796895691287030884, null, null, 289, 292, 276, 279, 57, 58, true, "XII", "XII"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 14650940714797320124, 6236592394333508229, null, null, 292, 300, 279, 287, 58, 60, true, "] giving", "] giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, null, null, 308, 314, 295, 301, 62, 64, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, null, null, 315, 361, 302, 348, 64, 69, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, null, null, 362, 368, 349, 355, 69, 71, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, null, null, 369, 374, 356, 361, 71, 72, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, null, null, 376, 637, 363, 624, 73, 124, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, null, null, 376, 410, 363, 397, 73, 77, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, null, null, 389, 395, 376, 382, 74, 75, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, null, null, 411, 415, 398, 402, 77, 78, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, null, null, 416, 438, 403, 425, 78, 81, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 78, 79, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 78, 79, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 80, 81, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 80, 81, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, null, null, 439, 445, 426, 432, 81, 83, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, null, null, 446, 451, 433, 438, 83, 84, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 85, 86, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 85, 86, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, null, null, 461, 467, 448, 454, 86, 88, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 91, 92, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 91, 92, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, null, null, 492, 498, 479, 485, 92, 94, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, null, null, 505, 521, 492, 508, 96, 99, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 96, 97, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 96, 97, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, null, null, 515, 521, 502, 508, 98, 99, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, null, null, 522, 528, 509, 515, 99, 101, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, null, null, 541, 558, 528, 545, 104, 107, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 104, 105, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 104, 105, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 106, 107, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 106, 107, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, null, null, 559, 565, 546, 552, 107, 109, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, null, null, 566, 571, 553, 558, 109, 110, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, null, null, 579, 594, 566, 581, 113, 115, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, null, null, 595, 603, 582, 590, 115, 117, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 117, 119, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 117, 119, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, null, null, 619, 625, 606, 612, 119, 121, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, null, null, 626, 636, 613, 623, 121, 123, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, null, null, 638, 961, 625, 948, 124, 182, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, null, null, 642, 659, 629, 646, 125, 127, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, null, null, 660, 667, 647, 654, 127, 128, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, null, null, 668, 676, 655, 663, 128, 130, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, null, null, 677, 682, 664, 669, 130, 131, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, null, null, 683, 689, 670, 676, 131, 133, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 133, 135, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 133, 135, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, null, null, 709, 717, 696, 704, 136, 138, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 138, 140, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 138, 140, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, null, null, 736, 742, 723, 729, 140, 142, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 142, 144, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 142, 144, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 146, 148, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 146, 148, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, null, null, 778, 798, 765, 785, 149, 151, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, null, null, 799, 806, 786, 793, 151, 152, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, null, null, 807, 820, 794, 807, 152, 154, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, null, null, 821, 823, 808, 810, 154, 155, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, null, null, 824, 864, 811, 851, 155, 162, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 155, 157, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 155, 157, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, null, null, 839, 851, 826, 838, 158, 160, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, null, null, 856, 864, 843, 851, 161, 162, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, null, null, 865, 871, 852, 858, 162, 164, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, null, null, 872, 886, 859, 873, 164, 166, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, null, null, 892, 910, 879, 897, 168, 171, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, null, null, 916, 928, 903, 915, 173, 175, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, null, null, 929, 931, 916, 918, 175, 176, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 176, 177, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 176, 177, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 179, 181, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 179, 181, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, null, null, 962, 1384, 949, 1371, 182, 276, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, null, null, 966, 991, 953, 978, 183, 186, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, null, null, 992, 1020, 979, 1007, 186, 193, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, null, null, 998, 1000, 985, 987, 188, 189, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, null, null, 1007, 1019, 994, 1006, 190, 192, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, null, null, 1021, 1025, 1008, 1012, 193, 194, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, null, null, 1028, 1036, 1015, 1023, 195, 196, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, null, null, 1037, 1041, 1024, 1028, 196, 197, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, null, null, 1042, 1044, 1029, 1031, 197, 198, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, null, null, 1045, 1052, 1032, 1039, 198, 201, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, null, null, 1053, 1056, 1040, 1043, 201, 203, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486414, 16516410147586311652, null, null, 1053, 1055, 1040, 1042, 201, 202, true, "km", "km"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235162, 2654033242220620585, null, null, 1055, 1056, 1042, 1043, 202, 203, true, "2", "2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, null, null, 1057, 1072, 1044, 1059, 203, 210, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, null, null, 1058, 1065, 1045, 1052, 204, 207, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, null, null, 1066, 1071, 1053, 1058, 207, 209, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, null, null, 1077, 1081, 1064, 1068, 211, 212, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, null, null, 1084, 1100, 1071, 1087, 213, 215, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, null, null, 1101, 1103, 1088, 1090, 215, 216, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, null, null, 1104, 1108, 1091, 1095, 216, 217, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, null, null, 1109, 1111, 1096, 1098, 217, 218, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, null, null, 1112, 1119, 1099, 1106, 218, 219, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, null, null, 1120, 1122, 1107, 1109, 219, 220, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, null, null, 1123, 1125, 1110, 1112, 220, 221, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, null, null, 1126, 1133, 1113, 1120, 221, 222, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, null, null, 1134, 1145, 1121, 1132, 222, 230, true, "2023.[5][8]", "2023.[5][8]"], ["numval", "year", 9818235231875948258, "TEXT", "#", 1.0, 389609625548777251, 4871157181485963100, null, null, 1134, 1138, 1121, 1125, 222, 223, true, "2023", "2023"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577901, 10796892691399633238, null, null, 1139, 1142, 1126, 1129, 224, 227, true, "[5]", "[5]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235157, 2654033131002543179, null, null, 1140, 1141, 1127, 1128, 225, 226, true, "5", "5"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577838, 10796892702691935623, null, null, 1142, 1145, 1129, 1132, 227, 230, true, "[8]", "[8]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235152, 2654033132467492508, null, null, 1143, 1144, 1130, 1131, 228, 229, true, "8", "8"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, null, null, 1146, 1152, 1133, 1139, 230, 231, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, null, null, 1153, 1155, 1140, 1142, 231, 232, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14782540711164886662, 14111360077134393327, null, null, 1158, 1170, 1145, 1157, 233, 235, true, "unitary semi", "unitary semi"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, null, null, 1166, 1183, 1153, 1170, 234, 237, true, "semi-presidential", "semi-presidential"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9493572096187311884, 17586523526652496832, null, null, 1171, 1192, 1158, 1179, 236, 238, true, "presidential republic", "presidential republic"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, null, null, 1193, 1197, 1180, 1184, 238, 239, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, null, null, 1202, 1209, 1189, 1196, 240, 241, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, null, null, 1210, 1212, 1197, 1199, 241, 242, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, null, null, 1213, 1218, 1200, 1205, 242, 243, true, "Paris", "Paris"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, null, null, 1224, 1233, 1211, 1220, 245, 248, true, "countrys", "country's"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263873511, null, null, 1224, 1231, 1211, 1218, 245, 246, true, "country", "country"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13491731564569135959, 5310634626438687925, null, null, 1232, 1246, 1219, 1233, 247, 250, true, "s largest city", "s largest city"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, null, null, 1251, 1286, 1238, 1273, 251, 256, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, null, null, 1269, 1286, 1256, 1273, 254, 256, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, null, null, 1288, 1311, 1275, 1298, 257, 261, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, null, null, 1312, 1319, 1299, 1306, 261, 262, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, null, null, 1320, 1383, 1307, 1370, 262, 275, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, null, null, 1320, 1329, 1307, 1316, 262, 263, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, null, null, 1331, 1335, 1318, 1322, 264, 265, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, null, null, 1337, 1345, 1324, 1332, 266, 267, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, null, null, 1347, 1352, 1334, 1339, 268, 269, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, null, null, 1354, 1362, 1341, 1349, 270, 271, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, null, null, 1364, 1374, 1351, 1361, 272, 273, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, null, null, 1379, 1383, 1366, 1370, 274, 275, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.93], ["semantic", 9818235231875948258, "TEXT", "#", "text", 0.82]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 9818235231875948258, "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} -{"applied_models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, null, null, 0, 188, 0, 188, 0, 40, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, null, null, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, null, null, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, null, null, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301128091, null, null, 24, 33, 24, 33, 5, 6, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587921185, null, null, 34, 41, 34, 41, 6, 7, true, "pairing", "pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, null, null, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, null, null, 45, 53, 45, 53, 8, 11, true, "two-band", "two-band"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206570221100871, 2911818818181444888, null, null, 49, 55, 49, 55, 10, 12, true, "band s", "band s"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, null, null, 54, 60, 54, 60, 11, 14, true, "s-wave", "s-wave"], ["term", "enum-term-mark-2", 4522339299074192207, "TEXT", "#", 1.0, 8560127426779937860, 4026994879422986240, null, null, 56, 66, 56, 66, 13, 16, true, "wave and d", "wave and d"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625633602560, 14144633872330801396, null, null, 56, 60, 56, 60, 13, 14, true, "wave", "wave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, null, null, 65, 71, 65, 71, 15, 18, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5267005535915851615, 13852357345485708038, null, null, 67, 87, 67, 87, 17, 19, true, "wave superconductors", "wave superconductors"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, null, null, 88, 92, 88, 92, 19, 20, true, "with", "with"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, null, null, 93, 96, 93, 96, 20, 23, true, "D4h", "D4h"], ["numval", "ival", 4522339299074192207, "TEXT", "#", 1.0, 17767354399704235156, 8513040951015345484, null, null, 94, 95, 94, 95, 21, 22, true, "4", "4"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 2516792725790519961, 10765065347046652233, null, null, 95, 105, 95, 105, 22, 24, true, "h symmetry", "h symmetry"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, null, null, 106, 113, 106, 113, 24, 26, true, "in both", "in both"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, null, null, 114, 127, 114, 127, 26, 29, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183561878, null, null, 114, 118, 114, 118, 26, 27, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1366921581602115232, 15058186165846257397, null, null, 119, 137, 119, 137, 28, 30, true, "reversal invariant", "reversal invariant"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, null, null, 146, 148, 146, 148, 32, 33, true, "as", "as"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, null, null, 149, 162, 149, 162, 33, 36, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183567675, null, null, 149, 153, 149, 153, 33, 34, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16155708024079339904, 14846007814114510811, null, null, 154, 171, 154, 171, 35, 37, true, "reversal symmetry", "reversal symmetry"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, null, null, 172, 180, 172, 180, 37, 38, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, null, null, 181, 187, 181, 187, 38, 39, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, null, null, 189, 384, 189, 384, 40, 75, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, null, null, 193, 201, 193, 201, 41, 42, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, null, null, 202, 204, 202, 204, 42, 43, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, null, null, 205, 214, 205, 214, 43, 44, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, null, null, 215, 244, 215, 244, 44, 47, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, null, null, 249, 264, 249, 264, 48, 50, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, null, null, 265, 271, 265, 271, 50, 52, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, null, null, 272, 286, 272, 286, 52, 53, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, null, null, 288, 293, 288, 293, 54, 55, true, "nodes", "nodes"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, null, null, 298, 309, 298, 309, 56, 60, true, "(dis)appear", "(dis)appear"], ["parenthesis", "round brackets", 4522339299074192207, "TEXT", "#", 1.0, 329104053577713079, 7302082272979819201, null, null, 298, 303, 298, 303, 56, 59, true, "(dis)", "(dis)"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 12178341415895452094, 8713100074317547395, null, null, 299, 302, 299, 302, 57, 58, true, "dis", "dis"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 16381206574684919940, 8627590102959499799, null, null, 303, 309, 303, 309, 59, 60, true, "appear", "appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, null, null, 311, 316, 311, 316, 61, 62, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, null, null, 322, 327, 322, 327, 64, 65, true, "leave", "leave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, null, null, 328, 341, 328, 341, 65, 68, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4859188827321755536, 9887725278734779219, null, null, 333, 351, 333, 351, 67, 69, true, "symmetry locations", "symmetry locations"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, null, null, 357, 374, 357, 374, 70, 72, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, null, null, 375, 383, 375, 383, 72, 74, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, null, null, 385, 594, 385, 594, 75, 114, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, null, null, 398, 404, 398, 404, 77, 79, true, "in the", "in the"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, null, null, 405, 411, 405, 411, 79, 82, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 3545604367994270661, 11829255560935036292, null, null, 407, 416, 407, 416, 81, 83, true, "wave case", "wave case"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, null, null, 421, 425, 421, 425, 85, 86, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, null, null, 426, 430, 426, 430, 86, 87, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, null, null, 440, 454, 440, 454, 89, 91, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, null, null, 455, 475, 455, 475, 91, 93, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, null, null, 481, 490, 481, 490, 94, 95, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, null, null, 491, 498, 491, 498, 95, 96, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, null, null, 499, 508, 499, 508, 96, 97, true, "increases", "increases"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, null, null, 515, 526, 515, 526, 99, 102, true, "zero-energy", "zero-energy"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1885602650026083434, 12476719833465444023, null, null, 520, 534, 520, 534, 101, 103, true, "energy Andreev", "energy Andreev"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104159325585799, 66191664906118763, null, null, 535, 540, 535, 540, 103, 104, true, "bound", "bound"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433796974, null, null, 541, 547, 541, 547, 104, 105, true, "states", "states"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, null, null, 548, 555, 548, 555, 105, 107, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, null, null, 560, 570, 560, 570, 108, 109, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, null, null, 571, 573, 571, 573, 109, 110, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, null, null, 574, 593, 574, 593, 110, 113, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.87], ["semantic", 4522339299074192207, "TEXT", "#", "text", 0.97]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4522339299074192207, "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} +{"applied_models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, null, null, 0, 177, 0, 164, 0, 37, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, null, null, 7, 31, 7, 26, 1, 9, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, null, null, 16, 26, 16, 23, 4, 7, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206575305750373, 3269040892355287555, null, null, 16, 25, 16, 22, 4, 6, true, "[f\u0281\u0251\u0303s", "[f\u0281\u0251\u0303s"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, null, null, 27, 30, 24, 25, 7, 8, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, null, null, 48, 63, 43, 58, 12, 14, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, null, null, 64, 122, 59, 109, 14, 24, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, null, null, 73, 95, 68, 88, 17, 19, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, null, null, 96, 106, 89, 98, 19, 21, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, null, null, 107, 121, 99, 108, 21, 23, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, null, null, 123, 127, 110, 114, 25, 28, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, null, null, 124, 126, 111, 113, 26, 27, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, null, null, 128, 130, 115, 117, 28, 29, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, null, null, 133, 140, 120, 127, 30, 31, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, null, null, 141, 158, 128, 145, 31, 33, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, null, null, 159, 161, 146, 148, 33, 34, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, null, null, 162, 176, 149, 163, 34, 36, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, null, null, 170, 176, 157, 163, 35, 36, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, null, null, 178, 375, 165, 362, 37, 73, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, null, null, 186, 194, 173, 181, 39, 40, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, null, null, 195, 211, 182, 198, 40, 42, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, null, null, 204, 227, 191, 214, 41, 44, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, null, null, 216, 227, 203, 214, 43, 44, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, null, null, 228, 234, 215, 221, 44, 46, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, null, null, 235, 243, 222, 230, 46, 47, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 820203855428083856, 16279894764651307170, null, null, 252, 280, 239, 267, 49, 54, true, "Atlantic, Pacific and Indian", "Atlantic, Pacific and Indian"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, null, null, 252, 260, 239, 247, 49, 50, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, null, null, 262, 269, 249, 256, 51, 52, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 4553045173532721202, 17291436396596241777, null, null, 274, 287, 261, 274, 53, 55, true, "Indian oceans", "Indian oceans"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, null, null, 281, 293, 268, 280, 54, 59, true, "oceans,[XII]", "oceans,[XII]"], ["parenthesis", "square brackets", 9818235231875948258, "TEXT", "#", 1.0, 329104147687597164, 12284735790511259080, null, null, 288, 293, 275, 280, 56, 59, true, "[XII]", "[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895542235, 10796895691287030884, null, null, 289, 292, 276, 279, 57, 58, true, "XII", "XII"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 14650940714797320124, 6236592394333508229, null, null, 292, 300, 279, 287, 58, 60, true, "] giving", "] giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, null, null, 308, 314, 295, 301, 62, 64, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, null, null, 315, 361, 302, 348, 64, 69, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, null, null, 362, 368, 349, 355, 69, 71, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, null, null, 369, 374, 356, 361, 71, 72, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, null, null, 376, 637, 363, 624, 73, 124, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, null, null, 376, 410, 363, 397, 73, 77, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, null, null, 389, 395, 376, 382, 74, 75, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, null, null, 411, 415, 398, 402, 77, 78, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, null, null, 416, 438, 403, 425, 78, 81, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 78, 79, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 78, 79, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 80, 81, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 80, 81, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, null, null, 439, 445, 426, 432, 81, 83, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, null, null, 446, 451, 433, 438, 83, 84, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 85, 86, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 85, 86, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, null, null, 461, 467, 448, 454, 86, 88, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 91, 92, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 91, 92, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, null, null, 492, 498, 479, 485, 92, 94, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, null, null, 505, 521, 492, 508, 96, 99, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 96, 97, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 96, 97, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, null, null, 515, 521, 502, 508, 98, 99, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, null, null, 522, 528, 509, 515, 99, 101, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, null, null, 541, 558, 528, 545, 104, 107, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 104, 105, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 104, 105, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 106, 107, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 106, 107, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, null, null, 559, 565, 546, 552, 107, 109, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, null, null, 566, 571, 553, 558, 109, 110, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, null, null, 579, 594, 566, 581, 113, 115, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, null, null, 595, 603, 582, 590, 115, 117, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 117, 119, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 117, 119, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, null, null, 619, 625, 606, 612, 119, 121, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, null, null, 626, 636, 613, 623, 121, 123, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, null, null, 638, 961, 625, 948, 124, 182, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, null, null, 642, 659, 629, 646, 125, 127, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, null, null, 660, 667, 647, 654, 127, 128, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, null, null, 668, 676, 655, 663, 128, 130, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, null, null, 677, 682, 664, 669, 130, 131, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, null, null, 683, 689, 670, 676, 131, 133, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 133, 135, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 133, 135, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, null, null, 709, 717, 696, 704, 136, 138, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 138, 140, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 138, 140, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, null, null, 736, 742, 723, 729, 140, 142, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 142, 144, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 142, 144, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 146, 148, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 146, 148, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, null, null, 778, 798, 765, 785, 149, 151, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, null, null, 799, 806, 786, 793, 151, 152, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, null, null, 807, 820, 794, 807, 152, 154, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, null, null, 821, 823, 808, 810, 154, 155, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, null, null, 824, 864, 811, 851, 155, 162, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 155, 157, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 155, 157, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, null, null, 839, 851, 826, 838, 158, 160, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, null, null, 856, 864, 843, 851, 161, 162, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, null, null, 865, 871, 852, 858, 162, 164, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, null, null, 872, 886, 859, 873, 164, 166, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, null, null, 892, 910, 879, 897, 168, 171, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, null, null, 916, 928, 903, 915, 173, 175, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, null, null, 929, 931, 916, 918, 175, 176, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 176, 177, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 176, 177, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 179, 181, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 179, 181, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, null, null, 962, 1384, 949, 1371, 182, 276, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, null, null, 966, 991, 953, 978, 183, 186, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, null, null, 992, 1020, 979, 1007, 186, 193, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, null, null, 998, 1000, 985, 987, 188, 189, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, null, null, 1007, 1019, 994, 1006, 190, 192, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, null, null, 1021, 1025, 1008, 1012, 193, 194, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, null, null, 1028, 1036, 1015, 1023, 195, 196, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, null, null, 1037, 1041, 1024, 1028, 196, 197, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, null, null, 1042, 1044, 1029, 1031, 197, 198, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, null, null, 1045, 1052, 1032, 1039, 198, 201, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, null, null, 1053, 1056, 1040, 1043, 201, 203, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486414, 16516410147586311652, null, null, 1053, 1055, 1040, 1042, 201, 202, true, "km", "km"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235162, 2654033242220620585, null, null, 1055, 1056, 1042, 1043, 202, 203, true, "2", "2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, null, null, 1057, 1072, 1044, 1059, 203, 210, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, null, null, 1058, 1065, 1045, 1052, 204, 207, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, null, null, 1066, 1071, 1053, 1058, 207, 209, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, null, null, 1077, 1081, 1064, 1068, 211, 212, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, null, null, 1084, 1100, 1071, 1087, 213, 215, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, null, null, 1101, 1103, 1088, 1090, 215, 216, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, null, null, 1104, 1108, 1091, 1095, 216, 217, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, null, null, 1109, 1111, 1096, 1098, 217, 218, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, null, null, 1112, 1119, 1099, 1106, 218, 219, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, null, null, 1120, 1122, 1107, 1109, 219, 220, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, null, null, 1123, 1125, 1110, 1112, 220, 221, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, null, null, 1126, 1133, 1113, 1120, 221, 222, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, null, null, 1134, 1145, 1121, 1132, 222, 230, true, "2023.[5][8]", "2023.[5][8]"], ["numval", "year", 9818235231875948258, "TEXT", "#", 1.0, 389609625548777251, 4871157181485963100, null, null, 1134, 1138, 1121, 1125, 222, 223, true, "2023", "2023"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577901, 10796892691399633238, null, null, 1139, 1142, 1126, 1129, 224, 227, true, "[5]", "[5]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235157, 2654033131002543179, null, null, 1140, 1141, 1127, 1128, 225, 226, true, "5", "5"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577838, 10796892702691935623, null, null, 1142, 1145, 1129, 1132, 227, 230, true, "[8]", "[8]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235152, 2654033132467492508, null, null, 1143, 1144, 1130, 1131, 228, 229, true, "8", "8"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, null, null, 1146, 1152, 1133, 1139, 230, 231, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, null, null, 1153, 1155, 1140, 1142, 231, 232, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14782540711164886662, 14111360077134393327, null, null, 1158, 1170, 1145, 1157, 233, 235, true, "unitary semi", "unitary semi"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, null, null, 1166, 1183, 1153, 1170, 234, 237, true, "semi-presidential", "semi-presidential"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9493572096187311884, 17586523526652496832, null, null, 1171, 1192, 1158, 1179, 236, 238, true, "presidential republic", "presidential republic"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, null, null, 1193, 1197, 1180, 1184, 238, 239, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, null, null, 1202, 1209, 1189, 1196, 240, 241, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, null, null, 1210, 1212, 1197, 1199, 241, 242, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, null, null, 1213, 1218, 1200, 1205, 242, 243, true, "Paris", "Paris"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, null, null, 1224, 1233, 1211, 1220, 245, 248, true, "countrys", "country's"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263873511, null, null, 1224, 1231, 1211, 1218, 245, 246, true, "country", "country"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13491731564569135959, 5310634626438687925, null, null, 1232, 1246, 1219, 1233, 247, 250, true, "s largest city", "s largest city"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, null, null, 1251, 1286, 1238, 1273, 251, 256, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, null, null, 1269, 1286, 1256, 1273, 254, 256, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, null, null, 1288, 1311, 1275, 1298, 257, 261, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, null, null, 1312, 1319, 1299, 1306, 261, 262, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, null, null, 1320, 1383, 1307, 1370, 262, 275, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, null, null, 1320, 1329, 1307, 1316, 262, 263, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, null, null, 1331, 1335, 1318, 1322, 264, 265, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, null, null, 1337, 1345, 1324, 1332, 266, 267, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, null, null, 1347, 1352, 1334, 1339, 268, 269, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, null, null, 1354, 1362, 1341, 1349, 270, 271, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, null, null, 1364, 1374, 1351, 1361, 272, 273, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, null, null, 1379, 1383, 1366, 1370, 274, 275, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.93], ["semantic", 9818235231875948258, "TEXT", "#", "text", 0.82]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 9818235231875948258, "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text_hash": 13399504000106611798, "type": "text"} +{"applied_models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, null, null, 0, 188, 0, 188, 0, 40, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, null, null, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, null, null, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, null, null, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301128091, null, null, 24, 33, 24, 33, 5, 6, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587921185, null, null, 34, 41, 34, 41, 6, 7, true, "pairing", "pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, null, null, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, null, null, 45, 53, 45, 53, 8, 11, true, "two-band", "two-band"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206570221100871, 2911818818181444888, null, null, 49, 55, 49, 55, 10, 12, true, "band s", "band s"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, null, null, 54, 60, 54, 60, 11, 14, true, "s-wave", "s-wave"], ["term", "enum-term-mark-2", 4522339299074192207, "TEXT", "#", 1.0, 8560127426779937860, 4026994879422986240, null, null, 56, 66, 56, 66, 13, 16, true, "wave and d", "wave and d"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625633602560, 14144633872330801396, null, null, 56, 60, 56, 60, 13, 14, true, "wave", "wave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, null, null, 65, 71, 65, 71, 15, 18, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5267005535915851615, 13852357345485708038, null, null, 67, 87, 67, 87, 17, 19, true, "wave superconductors", "wave superconductors"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, null, null, 88, 92, 88, 92, 19, 20, true, "with", "with"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, null, null, 93, 96, 93, 96, 20, 23, true, "D4h", "D4h"], ["numval", "ival", 4522339299074192207, "TEXT", "#", 1.0, 17767354399704235156, 8513040951015345484, null, null, 94, 95, 94, 95, 21, 22, true, "4", "4"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 2516792725790519961, 10765065347046652233, null, null, 95, 105, 95, 105, 22, 24, true, "h symmetry", "h symmetry"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, null, null, 106, 113, 106, 113, 24, 26, true, "in both", "in both"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, null, null, 114, 127, 114, 127, 26, 29, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183561878, null, null, 114, 118, 114, 118, 26, 27, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1366921581602115232, 15058186165846257397, null, null, 119, 137, 119, 137, 28, 30, true, "reversal invariant", "reversal invariant"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, null, null, 146, 148, 146, 148, 32, 33, true, "as", "as"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, null, null, 149, 162, 149, 162, 33, 36, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183567675, null, null, 149, 153, 149, 153, 33, 34, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16155708024079339904, 14846007814114510811, null, null, 154, 171, 154, 171, 35, 37, true, "reversal symmetry", "reversal symmetry"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, null, null, 172, 180, 172, 180, 37, 38, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, null, null, 181, 187, 181, 187, 38, 39, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, null, null, 189, 384, 189, 384, 40, 75, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, null, null, 193, 201, 193, 201, 41, 42, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, null, null, 202, 204, 202, 204, 42, 43, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, null, null, 205, 214, 205, 214, 43, 44, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, null, null, 215, 244, 215, 244, 44, 47, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, null, null, 249, 264, 249, 264, 48, 50, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, null, null, 265, 271, 265, 271, 50, 52, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, null, null, 272, 286, 272, 286, 52, 53, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, null, null, 288, 293, 288, 293, 54, 55, true, "nodes", "nodes"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, null, null, 298, 309, 298, 309, 56, 60, true, "(dis)appear", "(dis)appear"], ["parenthesis", "round brackets", 4522339299074192207, "TEXT", "#", 1.0, 329104053577713079, 7302082272979819201, null, null, 298, 303, 298, 303, 56, 59, true, "(dis)", "(dis)"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 12178341415895452094, 8713100074317547395, null, null, 299, 302, 299, 302, 57, 58, true, "dis", "dis"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 16381206574684919940, 8627590102959499799, null, null, 303, 309, 303, 309, 59, 60, true, "appear", "appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, null, null, 311, 316, 311, 316, 61, 62, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, null, null, 322, 327, 322, 327, 64, 65, true, "leave", "leave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, null, null, 328, 341, 328, 341, 65, 68, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4859188827321755536, 9887725278734779219, null, null, 333, 351, 333, 351, 67, 69, true, "symmetry locations", "symmetry locations"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, null, null, 357, 374, 357, 374, 70, 72, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, null, null, 375, 383, 375, 383, 72, 74, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, null, null, 385, 594, 385, 594, 75, 114, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, null, null, 398, 404, 398, 404, 77, 79, true, "in the", "in the"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, null, null, 405, 411, 405, 411, 79, 82, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 3545604367994270661, 11829255560935036292, null, null, 407, 416, 407, 416, 81, 83, true, "wave case", "wave case"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, null, null, 421, 425, 421, 425, 85, 86, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, null, null, 426, 430, 426, 430, 86, 87, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, null, null, 440, 454, 440, 454, 89, 91, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, null, null, 455, 475, 455, 475, 91, 93, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, null, null, 481, 490, 481, 490, 94, 95, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, null, null, 491, 498, 491, 498, 95, 96, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, null, null, 499, 508, 499, 508, 96, 97, true, "increases", "increases"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, null, null, 515, 526, 515, 526, 99, 102, true, "zero-energy", "zero-energy"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1885602650026083434, 12476719833465444023, null, null, 520, 534, 520, 534, 101, 103, true, "energy Andreev", "energy Andreev"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104159325585799, 66191664906118763, null, null, 535, 540, 535, 540, 103, 104, true, "bound", "bound"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433796974, null, null, 541, 547, 541, 547, 104, 105, true, "states", "states"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, null, null, 548, 555, 548, 555, 105, 107, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, null, null, 560, 570, 560, 570, 108, 109, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, null, null, 571, 573, 571, 573, 109, 110, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, null, null, 574, 593, 574, 593, 110, 113, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.87], ["semantic", 4522339299074192207, "TEXT", "#", "text", 0.97]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4522339299074192207, "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text_hash": 7455828584320671675, "type": "text"} diff --git a/tests/data/texts/test_02A_text_01.jsonl b/tests/data/texts/test_02A_text_01.jsonl index 9f1f1038..9488eac5 100644 --- a/tests/data/texts/test_02A_text_01.jsonl +++ b/tests/data/texts/test_02A_text_01.jsonl @@ -1 +1 @@ -{"applied_models": ["cite", "expression", "language", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "#", "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, null, null, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, null, null, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, null, null, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 253473544312511038, "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"applied_models": ["cite", "expression", "language", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "#", "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, null, null, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, null, null, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, null, null, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 253473544312511038, "text": "FeSe is a material.", "text_hash": 3797235776056707210, "type": "text"} diff --git a/tests/data/texts/test_02B_text_01.jsonl b/tests/data/texts/test_02B_text_01.jsonl index 65eb53cd..f472c0eb 100644 --- a/tests/data/texts/test_02B_text_01.jsonl +++ b/tests/data/texts/test_02B_text_01.jsonl @@ -1 +1 @@ -{"dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 253473544312511038, "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 253473544312511038, "text": "FeSe is a material.", "text_hash": 3797235776056707210, "type": "text"} From 8e4e89972c1b4b5198bf4457073ff83ccb9ac038 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Fri, 1 Dec 2023 07:09:22 +0100 Subject: [PATCH 19/22] improved reference output Signed-off-by: Peter Staar --- deepsearch_glm/nlp_train_reference.py | 77 ++-- src/andromeda/nlp/ent/reference.h | 337 +++++++++++------- src/andromeda/nlp/ent/sentence.h | 80 ++++- src/andromeda/nlp/pos/lapos.h | 7 +- .../base_crf_model/algorithms/crf_train.h | 5 +- 5 files changed, 339 insertions(+), 167 deletions(-) diff --git a/deepsearch_glm/nlp_train_reference.py b/deepsearch_glm/nlp_train_reference.py index f114b78b..228ac94e 100644 --- a/deepsearch_glm/nlp_train_reference.py +++ b/deepsearch_glm/nlp_train_reference.py @@ -52,6 +52,10 @@ def parse_arguments(): parser.add_argument('--output-dir', required=False, type=str, default="./reference-models", help="output directory for trained models") + + parser.add_argument('--max-items', required=False, + type=int, default=-1, + help="number of references") args = parser.parse_args() @@ -67,7 +71,7 @@ def parse_arguments(): else: odir = args.output_dir - return args.mode, args.input_dir, odir + return args.mode, args.input_dir, odir, args.max_items def shorten_text(text): @@ -192,7 +196,7 @@ def parse_with_anystyle_api(refs): return [] -def update_references(refs): +def update_references(refs, label_map): results = parse_with_anystyle_api(refs) @@ -220,9 +224,9 @@ def update_references(refs): beg += charlen beg += 1 - refs[j]["word-tokens"]["headers"].append("true-label") + refs[j]["word_tokens"]["headers"].append("true-label") - for ri,row_i in enumerate(refs[j]["word-tokens"]["data"]): + for ri,row_i in enumerate(refs[j]["word_tokens"]["data"]): label="__undef__" for rj,row_j in enumerate(item): @@ -230,20 +234,48 @@ def update_references(refs): label = row_j[0] break - refs[j]["word-tokens"]["data"][ri].append(label) + if label in label_map: + label = label_map[label] + else: + ##print(label) + label = "null" + + refs[j]["word_tokens"]["data"][ri].append(label) """ - print("\n\n", tabulate(refs[j]["word-tokens"]["data"], - headers=refs[j]["word-tokens"]["headers"])) + print(text) + print("\n\n", tabulate(refs[j]["word_tokens"]["data"], + headers=refs[j]["word_tokens"]["headers"])) """ refs[j]["annotated"]=True -def annotate(rfile, ofile): - - nlp_model = init_nlp_model("semantic", filters=["properties", "word-tokens"]) +def annotate(rfile, ofile, max_items): + + label_map = { + "author": "authors", + "title": "title", + "container-title": "conference", + "journal": "journal", + "date": "date", + "volume": "volume", + "pages": "pages", + "citation-number": "reference-number", + "note": "note", + "url": "url", + "doi": "doi", + "isbn": "isbn", + "publisher": "publisher" + } + + nlp_model = init_nlp_model("semantic", filters=["properties", "word_tokens"]) num_lines = sum(1 for _ in open(rfile)) + + if max_items!=-1: + max_items = min(max_items, num_lines) + else: + max_items = num_lines refs=[] @@ -252,7 +284,7 @@ def annotate(rfile, ofile): cnt = 0 - while True: + for i in tqdm.tqdm(range(0,max_items)): line = fr.readline().strip() if line==None or len(line)==0: @@ -271,8 +303,8 @@ def annotate(rfile, ofile): if len(refs)>=16: - print(f"\rreferennce-annotation: {cnt}/{num_lines}", end="") - update_references(refs) + #print(f"\rreference-annotation: {cnt}/{num_lines}", end="") + update_references(refs, label_map) for ref in refs: if "annotated" in ref and ref["annotated"]: @@ -280,7 +312,9 @@ def annotate(rfile, ofile): refs=[] - + #if max_items!=-1 and cnt>max_items: + # break + print(" --> done") fr.close() @@ -309,7 +343,7 @@ def prepare_for_crf(afile): except: continue - wt = item["word-tokens"] + wt = item["word_tokens"] if item["annotated"]: @@ -477,7 +511,7 @@ def train_fst(train_file, model_file, metrics_file): model.train(config) -def create_reference_model(mode, idir, odir): +def create_reference_model(mode:str, idir:str, odir:str, max_items:int=-1): json_files = glob.glob(os.path.join(idir, "*.json")) print("#-docs: ", len(json_files)) @@ -489,19 +523,16 @@ def create_reference_model(mode, idir, odir): crf_metrics_file = crf_model_file+".metrics.txt" """ - rfile = os.path.join(tdir, "nlp-train-references-crf.jsonl") - - fst_model_file = os.path.join(tdir, "fst_sematic") """ if mode=="extract" or mode=="all": - extract_references(json_files, sfile) + extract_references(json_files, sfile, max_items) if mode=="annotate" or mode=="all": - annotate(sfile, afile) + annotate(sfile, afile, max_items) if mode=="train" or mode=="all": train_crf(afile, crf_model_file, crf_metrics_file) @@ -524,6 +555,6 @@ def create_reference_model(mode, idir, odir): if __name__ == '__main__': - mode, idir, odir = parse_arguments() + mode, idir, odir, max_items = parse_arguments() - create_reference_model(mode, idir, odir) + create_reference_model(mode, idir, odir, max_items) diff --git a/src/andromeda/nlp/ent/reference.h b/src/andromeda/nlp/ent/reference.h index 09296df4..bd068bb5 100644 --- a/src/andromeda/nlp/ent/reference.h +++ b/src/andromeda/nlp/ent/reference.h @@ -10,9 +10,18 @@ namespace andromeda class nlp_model: public base_crf_model { typedef typename word_token::range_type range_type; - + const static inline std::string TAG = "__"+to_string(REFERENCE)+"__"; + const static inline std::set LABELS = { "reference-number", + "authors", "title", + "publisher", + "journal", "conference", + "date", + "volume", "pages", + "url", "doi", "isbn", + "note"}; + public: nlp_model(); @@ -21,23 +30,26 @@ namespace andromeda ~nlp_model(); virtual std::set get_dependencies() { return dependencies; } - + virtual model_type get_type() { return ENT; } virtual model_name get_name() { return REFERENCE; } virtual bool apply(subject& subj); virtual bool apply(subject
& subj) { return false; } virtual bool apply(subject& subj); - + private: //void initialise(std::filesystem::path resources_dir); bool initialise(); void run_model(subject& subj); - + void post_process(subject& subj); - + + std::string normalise_name(std::string orig); + void normalise_subject(subject& subj); + private: const static std::set dependencies; @@ -60,226 +72,281 @@ namespace andromeda { if(not base_crf_model::load(model_file, false)) { - LOG_S(ERROR) << "could not load REFERENCE model from " << model_file; - return false; + LOG_S(ERROR) << "could not load REFERENCE model from " << model_file; + return false; } return true; } - + bool nlp_model::apply(subject& doc) { if(not satisfies_dependencies(doc)) { - return false; + return false; } //LOG_S(INFO) << "#-texts: " << doc.texts.size(); for(auto& paragraph:doc.texts) { - this->apply(*paragraph); + this->apply(*paragraph); } return true; } - + bool nlp_model::apply(subject& subj) { //LOG_S(INFO) << __FILE__ << ":" << __LINE__ << "\t" << subj.get_text(); - + if(not satisfies_dependencies(subj)) { - //LOG_S(WARNING) << "does not satisfy deps ... "; - return false; + //LOG_S(WARNING) << "does not satisfy deps ... "; + return false; } - + bool is_ref=false; for(auto& cls:subj.properties) - { - if((cls.get_type()==to_key(SEMANTIC)) and (cls.is_label("reference"))) - { - is_ref = true; - //LOG_S(WARNING) << " => " << cls.get_type() << "\t" << cls.get_label(); - } - else - { - //LOG_S(INFO) << " => " << cls.get_type() << "\t" << cls.get_label(); - } + { + if((cls.get_type()==to_key(SEMANTIC)) and (cls.is_label("reference"))) + { + is_ref = true; + //LOG_S(WARNING) << " => " << cls.get_type() << "\t" << cls.get_label(); + } + else + { + //LOG_S(INFO) << " => " << cls.get_type() << "\t" << cls.get_label(); + } } - + // text in subject is not a reference and we do not apply the reference parser - if(not is_ref) + if(not is_ref) { - return true; + return true; } - + run_model(subj); post_process(subj); - + return true; } void nlp_model::run_model(subject& subj) { //LOG_S(WARNING) << __FILE__ << ":" << __LINE__ << "\t" << __FUNCTION__; - + std::vector crf_tokens={}; std::map ptid_to_wtid={}; auto& wtokens = subj.word_tokens; //auto& entities = subj.entities; - + //pre_process(wtokens, ent.wtok_range, pos_tokens, ptid_to_wtid); for(std::size_t l=0; l texts={".",",","and"}; - for(std::size_t l=1; l texts={".",",","and"}; + for(std::size_t l=1; l::post_process(subject& subj) { auto& wtokens = subj.word_tokens; //std::map > > labels_to_crng={}; std::map > labels_to_crng={}; - + for(std::size_t l=0; lfirst << ": " << (itr->second).size(); + for(auto jtr=(itr->second).begin(); jtr!=(itr->second).end(); jtr++) { - LOG_S(INFO) << itr->first << ": " << (itr->second).size(); - for(auto jtr=(itr->second).begin(); jtr!=(itr->second).end(); jtr++) - { - LOG_S(INFO) << " -> " << (*jtr)[0] << ", " << (*jtr)[1]; - } + LOG_S(INFO) << " -> " << (*jtr)[0] << ", " << (*jtr)[1]; + } } */ - - std::set labels + + /* + std::set labels = { "citation-number", - "author", "title", - //"publisher", "editor", - "journal", "container-title", - "location", "date", - //"volume", "pages", - "url", "doi"}; - - for(const auto& label:labels) - { - if(labels_to_crng.count(label)==0) - { - continue; - } - - auto& ranges = labels_to_crng.at(label); - - std::size_t ind=0; - while(ind::normalise_name(std::string orig) + { + const static std::vector endings + = {")", "]", ".", ",", " "}; + const static std::vector startings + = {"(", "[", + "doi:", "DOI:", "isbn:", "ISBN:", + "arXiv preprint", + " "}; + + std::string name = orig; + + bool updating=true; + while(updating) + { + updating=false; + for(auto end:endings) + { + if(name.ends_with(end)) + { + name = name.substr(0, name.size()-end.size()); + updating=true; + } + } } - // delete all non-reference instances - { - auto itr=subj.instances.begin(); - while(itr!=subj.instances.end()) - { - if(not (itr->is_model(REFERENCE))) - { - itr = subj.instances.erase(itr); - } - else - { - itr++; - } - } - } + updating = true; + while(updating) + { + updating=false; + for(auto strt:startings) + { + if(name.starts_with(strt)) + { + name = name.substr(strt.size(), name.size()-strt.size()); + updating=true; + } + } + } + + return name; } - + + void nlp_model::normalise_subject(subject& subj) + { + auto itr=subj.instances.begin(); + while(itr!=subj.instances.end()) + { + if(not (itr->is_model(REFERENCE))) + { + itr = subj.instances.erase(itr); + } + else + { + itr++; + } + } + } + } #endif diff --git a/src/andromeda/nlp/ent/sentence.h b/src/andromeda/nlp/ent/sentence.h index 6c5f17aa..6480f039 100644 --- a/src/andromeda/nlp/ent/sentence.h +++ b/src/andromeda/nlp/ent/sentence.h @@ -22,9 +22,6 @@ namespace andromeda virtual bool apply(subject& subj); virtual bool apply(subject
& subj) { return false; } - - //virtual bool apply(subject& subj) { return false; } - //virtual bool apply(subject& subj); private: @@ -93,7 +90,8 @@ namespace andromeda } std::string orig = subj.text; - + + std::vector sent_ranges={}; for(auto& expr:exprs) { std::vector items; @@ -109,9 +107,81 @@ namespace andromeda std::string sent = orig.substr(char_range[0], char_range[1]-char_range[0]); subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), - SENTENCE, "", + SENTENCE, "proper", sent, sent, char_range, ctok_range, wtok_range); + + sent_ranges.push_back(char_range); + } + } + + std::vector ranges={}; + for(auto& rng:sent_ranges) + { + if(ranges.size()==0 and rng.at(0)==0) + { + ranges.push_back(rng); + } + else if(ranges.size()==0 and rng.at(0)>0) + { + ranges.push_back({0, rng.at(0)}); + ranges.push_back(rng); + } + else if(ranges.back().at(1)==rng.at(0)) + { + ranges.push_back(rng); + } + else if(ranges.back().at(1)0 and ranges.back().at(1)0) + { + ranges.push_back({0, subj.get_len()}); + } + + for(auto itr=ranges.begin(); itr!=ranges.end(); ) + { + bool updated=false; + for(auto sent_rng:sent_ranges) + { + if(*itr==sent_rng) + { + itr = ranges.erase(itr); + updated=true; + } + } + + if(not updated) + { + itr++; + } + } + + for(auto rng:ranges) + { + range_type char_range = rng; + + range_type ctok_range = subj.get_char_token_range(char_range); + range_type wtok_range = subj.get_word_token_range(char_range); + + std::string sent = orig.substr(char_range[0], char_range[1]-char_range[0]); + + std::string normalised_sent = utils::replace(sent, " ", ""); + + if(normalised_sent.size()>0) + { + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), + SENTENCE, "improper", + sent, sent, + char_range, ctok_range, wtok_range); } } diff --git a/src/andromeda/nlp/pos/lapos.h b/src/andromeda/nlp/pos/lapos.h index 67bf46e6..843286b8 100644 --- a/src/andromeda/nlp/pos/lapos.h +++ b/src/andromeda/nlp/pos/lapos.h @@ -206,12 +206,13 @@ namespace andromeda { sent_ranges.push_back(inst.get_wtok_range()); - //LOG_S(INFO) << "sentence: " + //LOG_S(INFO) << "sentence (" << inst.get_subtype() << ") : " //<< sent_ranges.back().at(0) << ", " //<< sent_ranges.back().at(1); } } + /* std::vector ranges={}; for(auto& rng:sent_ranges) { @@ -239,8 +240,10 @@ namespace andromeda { ranges.push_back({ranges.back().at(1), wtokens.size()}); } + */ - for(auto& rng:ranges) + //for(auto& rng:ranges) + for(auto& rng:sent_ranges) { //LOG_S(INFO) << "range: " << rng.at(0) << ", " << rng.at(1); diff --git a/src/andromeda/tooling/models/base_crf_model/algorithms/crf_train.h b/src/andromeda/tooling/models/base_crf_model/algorithms/crf_train.h index 1582a23a..9e827b5c 100644 --- a/src/andromeda/tooling/models/base_crf_model/algorithms/crf_train.h +++ b/src/andromeda/tooling/models/base_crf_model/algorithms/crf_train.h @@ -262,11 +262,12 @@ namespace andromeda_crf { nlohmann::json sample = nlohmann::json::parse(line); - assert(sample.count("word-tokens")>0); + //assert(sample.count("word-tokens")>0); + //assert(sample.count(text_element::word_tokens_lbl)==1); assert(sample.count("training-sample")>0); //LOG_S(INFO) << sample.dump(2); - auto& wtokens = sample["word-tokens"]; + auto& wtokens = sample.at(andromeda::text_element::word_tokens_lbl); std::vector headers = {}; headers = wtokens.value("headers", headers); From f689409c390ace9b68d55a92ad49c492aa88c52f Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Fri, 1 Dec 2023 07:10:53 +0100 Subject: [PATCH 20/22] updated test02A Signed-off-by: Peter Staar --- tests/data/texts/test_02A_text_01.jsonl | 2 +- tests/test_nlp.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/data/texts/test_02A_text_01.jsonl b/tests/data/texts/test_02A_text_01.jsonl index 9488eac5..c535d49e 100644 --- a/tests/data/texts/test_02A_text_01.jsonl +++ b/tests/data/texts/test_02A_text_01.jsonl @@ -1 +1 @@ -{"applied_models": ["cite", "expression", "language", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "#", "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, null, null, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, null, null, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, null, null, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 253473544312511038, "text": "FeSe is a material.", "text_hash": 3797235776056707210, "type": "text"} +{"applied_models": ["cite", "expression", "language", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "#", "instances": {"data": [["sentence", "proper", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, null, null, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, null, null, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, null, null, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 253473544312511038, "text": "FeSe is a material.", "text_hash": 3797235776056707210, "type": "text"} diff --git a/tests/test_nlp.py b/tests/test_nlp.py index d64837c3..a0a18873 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -GENERATE=False +GENERATE=True import os import json From 41b8b2f4e6617ea7fbba5add1f78a4032341ea34 Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Sun, 3 Dec 2023 06:07:39 +0100 Subject: [PATCH 21/22] updated the models Signed-off-by: Peter Staar --- .../glm/model_cli/create/model_creator.h | 4 +- src/andromeda/nlp/cls/language.h | 4 +- src/andromeda/nlp/cls/semantic.h | 7 +- src/andromeda/nlp/ent/cite.h | 2 +- src/andromeda/nlp/ent/expression.h | 30 +- src/andromeda/nlp/ent/geoloc.h | 2 +- src/andromeda/nlp/ent/link.h | 2 +- src/andromeda/nlp/ent/name.h | 2 +- src/andromeda/nlp/ent/numval.h | 4 +- src/andromeda/nlp/ent/parenthesis.h | 4 +- src/andromeda/nlp/ent/pos_pattern.h | 17 +- src/andromeda/nlp/ent/quote.h | 2 +- src/andromeda/nlp/ent/reference.h | 4 +- src/andromeda/nlp/ent/sentence.h | 57 +- src/andromeda/nlp/pos/lapos.h | 14 +- src/andromeda/nlp/rel/abbreviation.h | 2 +- .../fasttext_supervised_model.h | 12 +- .../tooling/structs/elements/text_element.h | 30 +- .../tooling/structs/elements/utils.h | 2 +- .../structs/subjects/document/doc_captions.h | 2 +- .../structs/subjects/document/doc_maintext.h | 4 +- .../tooling/structs/subjects/table.h | 8 +- src/andromeda/tooling/structs/subjects/text.h | 6 +- tests/data/docs/1806.02284.nlp.json | 5029 ++++++--- tests/data/docs/doc_01.nlp.json | 9106 +++++++++++++---- tests/data/glm/test_01A/glm_ref/topology.json | 416 +- tests/data/texts/references.nlp.jsonl | 4 +- tests/data/texts/terms.nlp.jsonl | 4 +- tests/test_nlp.py | 4 +- 29 files changed, 10708 insertions(+), 4076 deletions(-) diff --git a/src/andromeda/glm/model_cli/create/model_creator.h b/src/andromeda/glm/model_cli/create/model_creator.h index b233cef4..25c6c9c5 100644 --- a/src/andromeda/glm/model_cli/create/model_creator.h +++ b/src/andromeda/glm/model_cli/create/model_creator.h @@ -224,7 +224,7 @@ namespace andromeda LOG_S(INFO) << "inserted node: " << doc_path; } - std::vector& tokens = subj.word_tokens; + std::vector& tokens = subj.get_word_tokens(); std::vector& instances = subj.instances; std::vector& relations = subj.relations; @@ -376,7 +376,7 @@ namespace andromeda { continue; } - std::vector& tokens = subj(i,j).word_tokens; + std::vector& tokens = subj(i,j).get_word_tokens(); //LOG_S(INFO) << "(i, j): " << i << ", " << j; //LOG_S(INFO) << andromeda::tabulate(tokens, subj(i,j).text); diff --git a/src/andromeda/nlp/cls/language.h b/src/andromeda/nlp/cls/language.h index dd811b15..00e68c21 100644 --- a/src/andromeda/nlp/cls/language.h +++ b/src/andromeda/nlp/cls/language.h @@ -63,7 +63,7 @@ namespace andromeda bool nlp_model::preprocess(const subject& subj, std::string& text) { - text = subj.text; + text = subj.get_text(); return true; } @@ -75,7 +75,7 @@ namespace andromeda auto& row = subj.data.at(i); for(std::size_t j=0; j::preprocess(const subject& subj, std::string& text) { - auto& wtokens = subj.word_tokens; + //auto& wtokens = subj.get_word_tokens(); - if(wtokens.size()==0) + //if(wtokens.size()==0) + if(subj.get_num_wtokens()==0) { text.clear(); return false; @@ -255,7 +256,7 @@ namespace andromeda auto& row = subj.data.at(i); for(std::size_t j=0; j::apply_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); for(auto& expr:exprs) { diff --git a/src/andromeda/nlp/ent/expression.h b/src/andromeda/nlp/ent/expression.h index e0fcda87..a80b1540 100644 --- a/src/andromeda/nlp/ent/expression.h +++ b/src/andromeda/nlp/ent/expression.h @@ -363,11 +363,13 @@ namespace andromeda { if(ent.is_model(EXPRESSION) and ent.is_subtype("common") and ent.wtoken_len()==1) { - subj.word_tokens.at(ent.get_wtok_range(0)).set_word(ent.get_name()); + //subj.word_tokens.at(ent.get_wtok_range(0)).set_word(ent.get_name()); + subj.set_word(ent.get_wtok_range(0), ent.get_name()); } else if(ent.is_model(EXPRESSION) and ent.is_subtype("apostrophe") and ent.wtoken_len()==1) { - subj.word_tokens.at(ent.get_wtok_range(0)).set_word(ent.get_name()); + //subj.word_tokens.at(ent.get_wtok_range(0)).set_word(ent.get_name()); + subj.set_word(ent.get_wtok_range(0), ent.get_name()); } else {} @@ -378,8 +380,8 @@ namespace andromeda bool nlp_model::apply_common_regex(subject& subj) { - //std::string orig = subj.text; - std::string text = subj.text; + //std::string orig = subj.get_text(); + std::string text = subj.get_text(); //std::size_t max_id = subj.get_max_ent_hash(); @@ -429,7 +431,7 @@ namespace andromeda bool nlp_model::apply_apostrophe_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); //std::size_t max_id = subj.get_max_ent_hash(); @@ -472,7 +474,7 @@ namespace andromeda bool nlp_model::apply_abbr_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); for(std::size_t l=0; l::apply_concatenation_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); // find all concat expressions for(auto& expr:concat_exprs) @@ -592,12 +594,12 @@ namespace andromeda { for(std::size_t j=0; j::apply_latex_regex(subject& subj) { - //std::string orig = subj.text; - std::string text = subj.text; + //std::string orig = subj.get_text(); + std::string text = subj.get_text(); for(auto& ent:subj.instances) { @@ -725,7 +727,7 @@ namespace andromeda } } - auto& wtokens = subj.word_tokens; + auto& wtokens = subj.get_word_tokens(); std::list wtoken_inds={}; for(std::size_t l=0; l::add_concatenated_expression(subject& subj, std::list wtoken_inds) { - auto& wtokens = subj.word_tokens; + auto& wtokens = subj.get_word_tokens(); std::set special_begins = {"\"", "'", "''", "{", "}", ".", ",", ";", "/"}; std::set special_endings = {".",",","?","!",":", ";", "\"", "'", "''"}; @@ -892,7 +894,7 @@ namespace andromeda //LOG_S(WARNING) << ent.name << " ->" << words.size(); //for(auto word:words) //{ - //LOG_S(WARNING) << "\t ->" << word.text; + //LOG_S(WARNING) << "\t ->" << word.get_text(); //} if(orig.starts_with("(") and orig.ends_with(")")) diff --git a/src/andromeda/nlp/ent/geoloc.h b/src/andromeda/nlp/ent/geoloc.h index d099db58..49ff9e77 100644 --- a/src/andromeda/nlp/ent/geoloc.h +++ b/src/andromeda/nlp/ent/geoloc.h @@ -221,7 +221,7 @@ namespace andromeda { for(std::size_t j=0; j::apply_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); for(auto& expr:exprs) { diff --git a/src/andromeda/nlp/ent/name.h b/src/andromeda/nlp/ent/name.h index 4d1c3e98..26572c25 100644 --- a/src/andromeda/nlp/ent/name.h +++ b/src/andromeda/nlp/ent/name.h @@ -136,7 +136,7 @@ namespace andromeda bool nlp_model::apply_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); for(auto& expr:exprs) { std::vector items; diff --git a/src/andromeda/nlp/ent/numval.h b/src/andromeda/nlp/ent/numval.h index a0c54917..7230b072 100644 --- a/src/andromeda/nlp/ent/numval.h +++ b/src/andromeda/nlp/ent/numval.h @@ -151,7 +151,7 @@ namespace andromeda bool nlp_model::apply_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); for(auto& expr:exprs) { std::vector items; @@ -199,7 +199,7 @@ namespace andromeda { for(std::size_t j=0; j& ranges_02, std::vector& chunks) { + auto& word_tokens = subj.get_word_tokens(); + for(pcre2_item& chunk:chunks) { std::vector token_inds = get_indices(chunk.text); @@ -172,7 +176,7 @@ namespace andromeda for(std::size_t l=0; l& ranges_02, std::vector& chunks) { + + for(pcre2_item& chunk:chunks) { std::vector token_inds = get_indices(chunk.text); @@ -223,12 +229,13 @@ namespace andromeda std::size_t ci=0,cj=0; auto& elem = subj(coor); - + auto& word_tokens = elem.get_word_tokens(); + std::vector > words; for(std::size_t l=0; l crf_tokens={}; std::map ptid_to_wtid={}; - auto& wtokens = subj.word_tokens; + auto& wtokens = subj.get_word_tokens(); //auto& entities = subj.entities; //pre_process(wtokens, ent.wtok_range, pos_tokens, ptid_to_wtid); @@ -186,7 +186,7 @@ namespace andromeda void nlp_model::post_process(subject& subj) { - auto& wtokens = subj.word_tokens; + auto& wtokens = subj.get_word_tokens(); //std::map > > labels_to_crng={}; std::map > labels_to_crng={}; diff --git a/src/andromeda/nlp/ent/sentence.h b/src/andromeda/nlp/ent/sentence.h index 6480f039..9624e73b 100644 --- a/src/andromeda/nlp/ent/sentence.h +++ b/src/andromeda/nlp/ent/sentence.h @@ -60,7 +60,7 @@ namespace andromeda return false; } - std::string text = subj.text; + std::string text = subj.get_text(); for(auto& ent:subj.instances) { @@ -89,7 +89,7 @@ namespace andromeda } } - std::string orig = subj.text; + std::string orig = subj.get_text(); std::vector sent_ranges={}; for(auto& expr:exprs) @@ -138,13 +138,13 @@ namespace andromeda } } - if(ranges.size()>0 and ranges.back().at(1)0 and ranges.back().at(1)0) + else if(ranges.size()==0 and text.size()>0) { - ranges.push_back({0, subj.get_len()}); + ranges.push_back({0, text.size()}); } for(auto itr=ranges.begin(); itr!=ranges.end(); ) @@ -165,24 +165,51 @@ namespace andromeda } } + //LOG_S(WARNING) << "text: " << text; + //LOG_S(WARNING) << "text-size: " << text.size() << "; subj.len: " << subj.get_len(); + for(auto rng:ranges) { range_type char_range = rng; + + //LOG_S(INFO) << "char (1): " << char_range.at(0) << "-" << char_range.at(1); + + while(char_range.at(0) char: " << char_range.at(0) << "-" << char_range.at(1); + } + + //LOG_S(INFO) << "char (2): " << char_range.at(0) << "-" << char_range.at(1); + + while(char_range.at(0) char: " << char_range.at(0) << "-" << char_range.at(1); + } + + //LOG_S(INFO) << "char (3): " << char_range.at(0) << "-" << char_range.at(1); + + if(char_range.at(0)==char_range.at(1)) + { + continue; + } range_type ctok_range = subj.get_char_token_range(char_range); range_type wtok_range = subj.get_word_token_range(char_range); std::string sent = orig.substr(char_range[0], char_range[1]-char_range[0]); - std::string normalised_sent = utils::replace(sent, " ", ""); - - if(normalised_sent.size()>0) - { - subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), - SENTENCE, "improper", - sent, sent, - char_range, ctok_range, wtok_range); - } + //LOG_S(WARNING) << " => sent: " << sent; + + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), + SENTENCE, "improper", + sent, sent, + char_range, ctok_range, wtok_range); } return update_applied_models(subj); diff --git a/src/andromeda/nlp/pos/lapos.h b/src/andromeda/nlp/pos/lapos.h index 843286b8..ab0e93d5 100644 --- a/src/andromeda/nlp/pos/lapos.h +++ b/src/andromeda/nlp/pos/lapos.h @@ -149,11 +149,13 @@ namespace andromeda bool nlp_model::apply(subject& subj) { // initialise - for(auto& token:subj.word_tokens) - { - token.set_pos(word_token::UNDEF_POS); - } + //for(auto& token:subj.get_word_tokens()) + //{ + //token.set_pos(word_token::UNDEF_POS); + //} + subj.init_pos(); + std::string lang="null"; if(not check_dependency(text_dependencies, subj, lang)) { @@ -174,7 +176,7 @@ namespace andromeda std::vector pos_tokens={}; std::map ptid_to_wtid={}; - auto& wtokens = subj.word_tokens; + auto& wtokens = subj.get_word_tokens(); auto& instances = subj.instances; /* @@ -269,7 +271,7 @@ namespace andromeda { for(std::size_t j=0; j::find_abbreviation_instances(subject& subj) { - std::string& text = subj.text; + std::string text = subj.get_text(); //std::size_t max_id = subj.get_max_ent_hash(); diff --git a/src/andromeda/tooling/models/base_fst_model/fasttext_supervised_model.h b/src/andromeda/tooling/models/base_fst_model/fasttext_supervised_model.h index 297667c2..0aae4012 100644 --- a/src/andromeda/tooling/models/base_fst_model/fasttext_supervised_model.h +++ b/src/andromeda/tooling/models/base_fst_model/fasttext_supervised_model.h @@ -352,15 +352,17 @@ namespace andromeda bool fasttext_supervised_model::preprocess(const subject& subj, std::string& text) { - auto& wtokens = subj.word_tokens; + //auto& wtokens = subj.word_tokens; //LOG_S(INFO) << "tokens: \n\n" << tabulate(wtokens); std::stringstream ss; - - std::size_t MAXLEN = 256; - for(std::size_t l=0; l0) diff --git a/src/andromeda/tooling/structs/elements/text_element.h b/src/andromeda/tooling/structs/elements/text_element.h index 2adbd38c..488db898 100644 --- a/src/andromeda/tooling/structs/elements/text_element.h +++ b/src/andromeda/tooling/structs/elements/text_element.h @@ -32,10 +32,23 @@ namespace andromeda std::size_t get_len() const { return len; } // number-of-chars std::size_t get_dst() const { return dst; } // number-of-utf8-tokens + + bool is_text_valid() { return text_valid; } void clear(); hash_type get_text_hash() const { return text_hash; } + + std::size_t get_num_wtokens() const { return word_tokens.size(); } + const word_token& get_wtoken(std::size_t i) const { return word_tokens.at(i); } + + std::vector& get_word_tokens() { return word_tokens; } + + void init_pos() { for(auto& wtoken:word_tokens) { wtoken.set_pos(word_token::UNDEF_POS); } } + + void set_pos(std::size_t i, std::string pos) { word_tokens.at(i).set_pos(pos); } + void set_tag(std::size_t i, std::string tag) { word_tokens.at(i).set_tag(tag); } + void set_word(std::size_t i, std::string wrd) { word_tokens.at(i).set_word(wrd); } bool set_text(const std::string& ctext); @@ -77,18 +90,23 @@ namespace andromeda void contract_char_tokens(); void contract_word_tokens(); - public: + //public: + private: + bool text_valid; - uint64_t text_hash; // hash of normalised text - + std::size_t len; // number-of-chars std::size_t dst; // number-of-utf8-tokens + protected: + std::string orig; // original text std::string text; // normalised text (removing confusables) + protected: + std::vector char_tokens; std::vector word_tokens; }; @@ -567,12 +585,12 @@ namespace andromeda std::string text_element::from_char_range(range_type char_range) { - std::size_t beg = char_range[0]; - std::size_t len = char_range[1]-beg; + std::size_t beg_ = char_range[0]; + std::size_t len_ = char_range[1]-beg_; if(char_range[1]<=text.size()) { - return text.substr(beg, len); + return text.substr(beg_, len_); } LOG_S(ERROR) << "char-range is out of bounds: text-length: " << text.size() diff --git a/src/andromeda/tooling/structs/elements/utils.h b/src/andromeda/tooling/structs/elements/utils.h index 23af9fc7..11c3cf40 100644 --- a/src/andromeda/tooling/structs/elements/utils.h +++ b/src/andromeda/tooling/structs/elements/utils.h @@ -14,7 +14,7 @@ namespace andromeda grid.push_back({}); for(auto& item:row) { - grid.back().push_back(item.text); + grid.back().push_back(item.get_text()); } } diff --git a/src/andromeda/tooling/structs/subjects/document/doc_captions.h b/src/andromeda/tooling/structs/subjects/document/doc_captions.h index 3f0e6a45..05217b46 100644 --- a/src/andromeda/tooling/structs/subjects/document/doc_captions.h +++ b/src/andromeda/tooling/structs/subjects/document/doc_captions.h @@ -169,7 +169,7 @@ namespace andromeda ind_type prov_ind = prov_to_index.at(prov); ind_type page_num = prov->get_page(); - std::string text = elem->text; + std::string text = elem->get_text(); text = utils::to_lower(text); text = utils::strip(text); diff --git a/src/andromeda/tooling/structs/subjects/document/doc_maintext.h b/src/andromeda/tooling/structs/subjects/document/doc_maintext.h index 6623789f..fa7c951c 100644 --- a/src/andromeda/tooling/structs/subjects/document/doc_maintext.h +++ b/src/andromeda/tooling/structs/subjects/document/doc_maintext.h @@ -84,8 +84,8 @@ namespace andromeda auto& curr_prov = curr->provs.back(); auto& next_prov = next->provs.front(); - auto& curr_text = curr->text; - auto& next_text = next->text; + std::string curr_text = curr->get_text(); + std::string next_text = next->get_text(); if(curr_prov->get_type()!="paragraph" or next_prov->get_type()!="paragraph" or diff --git a/src/andromeda/tooling/structs/subjects/table.h b/src/andromeda/tooling/structs/subjects/table.h index eddc6c2c..b91d99d3 100644 --- a/src/andromeda/tooling/structs/subjects/table.h +++ b/src/andromeda/tooling/structs/subjects/table.h @@ -383,7 +383,7 @@ namespace andromeda { for(std::size_t j=0; j& filters); @@ -147,13 +147,13 @@ namespace andromeda //LOG_S(INFO) << " -> subject::dhash = '" << dhash << "'"; //LOG_S(INFO) << " -> subject::text_hash = '" << text_element::text_hash << "'"; - std::vector hashes={dhash, text_element::text_hash}; + std::vector hashes={dhash, text_element::get_text_hash()}; base_subject::hash = utils::to_hash(hashes); //LOG_S(INFO) << " -> base_subject::hash = " << base_subject::hash; //LOG_S(INFO) << " -> subject::hash = " << subject::hash; - return text_element::text_valid; + return text_element::is_text_valid(); } bool subject::set_data(const nlohmann::json& item) diff --git a/tests/data/docs/1806.02284.nlp.json b/tests/data/docs/1806.02284.nlp.json index c39f002e..38a5e15b 100644 --- a/tests/data/docs/1806.02284.nlp.json +++ b/tests/data/docs/1806.02284.nlp.json @@ -2122,108 +2122,150 @@ ], [ "reference", - "doi", + "note", 7377574370756688828, "TEXT", "#/texts/0", 1.0, - 15358376557624922247, - 16767804341034909078, + 605943372629925146, + 4150439050068157691, null, null, 0, - 10, + 38, 0, - 10, + 38, 0, - 3, + 15, true, - "arXiv:1806", - "arXiv:1806" + "arXiv:1806.02284v1 [cs.DL] 24 May 2018", + "arXiv:1806.02284v1 [cs.DL] 24 May 2018" ], [ "reference", - "date", - 7377574370756688828, + "title", + 10227328696767902037, "TEXT", - "#/texts/0", + "#/texts/1", 1.0, - 15441160910541486922, - 218889631309891128, + 2059592768319149889, + 12648419765540885679, null, null, - 20, - 22, - 20, - 22, - 8, - 9, + 0, + 84, + 0, + 84, + 0, + 14, true, - "cs", - "cs" + "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale", + "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale." ], [ - "reference", - "title", - 7377574370756688828, + "sentence", + "improper", + 8770494724746327817, "TEXT", - "#/texts/0", + "#/texts/2", 1.0, - 15441160910541480776, - 218888595256728797, + 17380979703907035493, + 11475303598866218042, null, null, - 23, - 25, - 23, - 25, - 10, - 11, + 0, + 60, + 0, + 60, + 0, + 13, true, - "DL", - "DL" + "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", + "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas" ], [ - "reference", - "title", - 7377574370756688828, + "term", + "single-term", + 8770494724746327817, "TEXT", - "#/texts/0", + "#/texts/2", 1.0, - 2633454640929888599, - 16744695038281265103, + 4686361850733567621, + 18004158316128292853, null, null, - 27, - 38, - 27, - 38, - 12, + 0, + 15, + 0, 15, + 0, + 4, true, - "24 May 2018", - "24 May 2018" + "Peter W J Staar", + "Peter W J Staar" ], [ - "reference", - "title", - 10227328696767902037, + "term", + "single-term", + 8770494724746327817, "TEXT", - "#/texts/1", + "#/texts/2", 1.0, - 2059592768319149889, - 12648419765540885676, + 1571808557594152175, + 16063511885696103932, null, null, - 0, - 83, - 0, - 83, - 0, + 17, + 30, + 17, + 30, + 5, + 7, + true, + "Michele Dolfi", + "Michele Dolfi" + ], + [ + "term", + "single-term", + 8770494724746327817, + "TEXT", + "#/texts/2", + 1.0, + 9737597816447750448, + 3321146819492882758, + null, + null, + 32, + 46, + 32, + 46, + 8, + 10, + true, + "Christoph Auer", + "Christoph Auer" + ], + [ + "term", + "single-term", + 8770494724746327817, + "TEXT", + "#/texts/2", + 1.0, + 10999349626623612055, + 13198018091169765004, + null, + null, + 48, + 60, + 48, + 60, + 11, 13, true, - "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale", - "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale" + "Costas Bekas", + "Costas Bekas" ], [ "link", @@ -2246,6 +2288,90 @@ "taa,dol,cau,bek@zurich.ibm.com", "taa,dol,cau,bek@zurich.ibm.com" ], + [ + "sentence", + "improper", + 5704354110496947297, + "TEXT", + "#/texts/4", + 1.0, + 16114797969310195405, + 18017429467769240876, + null, + null, + 0, + 12, + 0, + 12, + 0, + 2, + true, + "IBM Research", + "IBM Research" + ], + [ + "term", + "single-term", + 5704354110496947297, + "TEXT", + "#/texts/4", + 1.0, + 16114797969310195405, + 18017429467769240876, + null, + null, + 0, + 12, + 0, + 12, + 0, + 2, + true, + "IBM Research", + "IBM Research" + ], + [ + "sentence", + "improper", + 11056873211244709904, + "TEXT", + "#/texts/5", + 1.0, + 10483037511456664190, + 18224338740994498152, + null, + null, + 0, + 24, + 0, + 24, + 0, + 3, + true, + "Rueschlikon, Switzerland", + "Rueschlikon, Switzerland" + ], + [ + "term", + "single-term", + 11056873211244709904, + "TEXT", + "#/texts/5", + 1.0, + 13928399879966460166, + 15958992602230194452, + null, + null, + 0, + 11, + 0, + 11, + 0, + 1, + true, + "Rueschlikon", + "Rueschlikon" + ], [ "geoloc", "country", @@ -2267,6 +2393,69 @@ "Switzerland", "Switzerland" ], + [ + "term", + "single-term", + 11056873211244709904, + "TEXT", + "#/texts/5", + 1.0, + 2664439525053388608, + 16906723856094244091, + null, + null, + 13, + 24, + 13, + 24, + 2, + 3, + true, + "Switzerland", + "Switzerland" + ], + [ + "sentence", + "improper", + 11788868678004267702, + "TEXT", + "#/texts/6", + 1.0, + 14650435066888584228, + 10333408182378271367, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "ABSTRACT", + "ABSTRACT" + ], + [ + "term", + "single-term", + 11788868678004267702, + "TEXT", + "#/texts/6", + 1.0, + 14650435066888584228, + 10333408182378271367, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "ABSTRACT", + "ABSTRACT" + ], [ "numval", "ival", @@ -2290,7 +2479,49 @@ ], [ "sentence", - "", + "improper", + 3624246356859711021, + "TEXT", + "#/texts/7", + 1.0, + 8523954622022126279, + 2634600934606984140, + null, + null, + 2, + 14, + 2, + 14, + 1, + 2, + true, + "INTRODUCTION", + "INTRODUCTION" + ], + [ + "term", + "single-term", + 3624246356859711021, + "TEXT", + "#/texts/7", + 1.0, + 8523954622022126279, + 2634600934606984140, + null, + null, + 2, + 14, + 2, + 14, + 1, + 2, + true, + "INTRODUCTION", + "INTRODUCTION" + ], + [ + "sentence", + "proper", 17999848460847860039, "TEXT", "#/texts/8", @@ -2500,7 +2731,7 @@ ], [ "sentence", - "", + "proper", 17999848460847860039, "TEXT", "#/texts/8", @@ -2752,7 +2983,7 @@ ], [ "sentence", - "", + "proper", 17999848460847860039, "TEXT", "#/texts/8", @@ -3193,7 +3424,7 @@ ], [ "sentence", - "", + "proper", 17999848460847860039, "TEXT", "#/texts/8", @@ -3445,7 +3676,7 @@ ], [ "sentence", - "", + "proper", 17999848460847860039, "TEXT", "#/texts/8", @@ -4117,7 +4348,7 @@ ], [ "sentence", - "", + "proper", 17999848460847860039, "TEXT", "#/texts/8", @@ -4369,7 +4600,7 @@ ], [ "sentence", - "", + "proper", 17999848460847860039, "TEXT", "#/texts/8", @@ -4789,7 +5020,7 @@ ], [ "sentence", - "", + "proper", 17999848460847860039, "TEXT", "#/texts/8", @@ -5356,7 +5587,7 @@ ], [ "sentence", - "", + "proper", 17999848460847860039, "TEXT", "#/texts/8", @@ -5628,88 +5859,67 @@ "engineering project engagements" ], [ - "reference", - "author", - 11222145795862225841, + "sentence", + "improper", + 14387482728083328702, "TEXT", - "#/texts/10", + "#/texts/9", 1.0, - 4686361850733567621, - 14659076240775980364, + 7430992009485070364, + 3404236123378547578, null, null, 0, - 15, + 21, 0, - 15, + 21, 0, 4, true, - "Peter W J Staar", - "Peter W J Staar" + "ACM Reference Format:", + "ACM Reference Format:" ], [ - "reference", - "author", - 11222145795862225841, - "TEXT", - "#/texts/10", - 1.0, - 1571808557594152175, - 2521268111811279239, - null, - null, - 17, - 30, - 17, - 30, - 5, - 7, - true, - "Michele Dolfi", - "Michele Dolfi" - ], - [ - "reference", - "author", - 11222145795862225841, + "term", + "single-term", + 14387482728083328702, "TEXT", - "#/texts/10", + "#/texts/9", 1.0, - 9737597816447750448, - 18360796446007226291, + 2765358421439641504, + 12293676611681314503, null, null, - 32, - 46, - 32, - 46, - 8, - 10, + 0, + 20, + 0, + 20, + 0, + 3, true, - "Christoph Auer", - "Christoph Auer" + "ACM Reference Format", + "ACM Reference Format" ], [ "reference", - "author", + "authors", 11222145795862225841, "TEXT", "#/texts/10", 1.0, - 13732913329338511598, - 1087221346292312189, + 17380979703907035493, + 13515342076980283133, null, null, - 48, + 0, 61, - 48, + 0, 61, - 11, + 0, 14, true, - "Costas Bekas.", - "Costas Bekas." + "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", + "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas." ], [ "reference", @@ -5719,18 +5929,18 @@ "#/texts/10", 1.0, 389609625548777054, - 918164764798402581, + 918164764798402580, null, null, 62, - 66, + 67, 62, - 66, + 67, 14, - 15, + 16, true, "2018", - "2018" + "2018." ], [ "reference", @@ -5740,39 +5950,39 @@ "#/texts/10", 1.0, 2059592768319149889, - 10728790470880119375, + 10728790470880119369, null, null, 68, - 151, + 153, 68, - 151, + 153, 16, - 29, + 31, true, "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale", - "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale" + "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.." ], [ "reference", - "container-title", + "conference", 11222145795862225841, "TEXT", "#/texts/10", 1.0, - 18326306750753291457, - 8917954083851035786, + 1024339758608418763, + 4530307856411656701, null, null, 154, - 247, + 262, 154, - 247, + 262, 31, - 49, + 55, true, - "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining", - "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining" + "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23", + "In KDD '18: The 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 19-23," ], [ "reference", @@ -5781,107 +5991,86 @@ "TEXT", "#/texts/10", 1.0, - 17017808558592810577, - 1917644983122671206, + 389609625548777054, + 918164764798382452, null, null, - 249, - 267, - 249, - 267, - 50, - 56, + 263, + 268, + 263, + 268, + 55, + 57, true, - "August 19-23, 2018", - "August 19-23, 2018" + "2018", + "2018," ], [ "reference", - "location", + "title", 11222145795862225841, "TEXT", "#/texts/10", 1.0, 7719325285618625183, - 12559995503894274239, + 12559995503894274232, null, null, 269, - 291, + 292, 269, - 291, + 292, 57, - 61, + 62, true, "London, United Kingdom", - "London, United Kingdom" + "London, United Kingdom." ], [ "reference", - "location", - 11222145795862225841, - "TEXT", - "#/texts/10", - 1.0, - 16918962045161917454, - 17491630952016593380, - null, - null, - 298, - 315, - 298, - 315, - 64, - 70, - true, - "New York, NY, USA", - "New York, NY, USA" - ], - [ - "reference", - "url", + "pages", 11222145795862225841, "TEXT", "#/texts/10", 1.0, - 3534146179424153776, - 16664784081959773586, + 8106348854048173944, + 14835972200295983643, null, null, - 326, - 344, - 326, - 344, + 317, + 325, + 317, + 325, + 71, 74, - 83, true, - "https://doi.org/10", - "https://doi.org/10" + "9 pages", + "9 pages." ], [ "reference", - "url", + "doi", 11222145795862225841, "TEXT", "#/texts/10", 1.0, - 7680709109455866852, - 9531684221895358060, + 3547103316902677392, + 5420440194729707424, null, null, - 346, + 326, 366, - 346, + 326, 366, - 84, + 74, 89, true, - "1145/3219819.3219834", - "1145/3219819.3219834" + "https://doi.org/10.1145/3219819.3219834", + "https://doi.org/10. 1145/3219819.3219834" ], [ "sentence", - "", + "proper", 16923207262044929933, "TEXT", "#/texts/11", @@ -6091,7 +6280,7 @@ ], [ "sentence", - "", + "proper", 16923207262044929933, "TEXT", "#/texts/11", @@ -6406,7 +6595,7 @@ ], [ "sentence", - "", + "proper", 16923207262044929933, "TEXT", "#/texts/11", @@ -6616,7 +6805,7 @@ ], [ "sentence", - "", + "proper", 16923207262044929933, "TEXT", "#/texts/11", @@ -6952,7 +7141,7 @@ ], [ "sentence", - "", + "proper", 16923207262044929933, "TEXT", "#/texts/11", @@ -7288,7 +7477,7 @@ ], [ "sentence", - "", + "proper", 16923207262044929933, "TEXT", "#/texts/11", @@ -7624,7 +7813,7 @@ ], [ "sentence", - "", + "proper", 16923207262044929933, "TEXT", "#/texts/11", @@ -7813,7 +8002,7 @@ ], [ "sentence", - "", + "proper", 16923207262044929933, "TEXT", "#/texts/11", @@ -8191,7 +8380,7 @@ ], [ "sentence", - "", + "proper", 16923207262044929933, "TEXT", "#/texts/11", @@ -8674,7 +8863,7 @@ ], [ "sentence", - "", + "proper", 16923207262044929933, "TEXT", "#/texts/11", @@ -9115,7 +9304,7 @@ ], [ "sentence", - "", + "proper", 3749305213430885773, "TEXT", "#/texts/12", @@ -9367,7 +9556,7 @@ ], [ "sentence", - "", + "proper", 3749305213430885773, "TEXT", "#/texts/12", @@ -9619,7 +9808,7 @@ ], [ "sentence", - "", + "proper", 3749305213430885773, "TEXT", "#/texts/12", @@ -9934,7 +10123,7 @@ ], [ "sentence", - "", + "proper", 3749305213430885773, "TEXT", "#/texts/12", @@ -10060,7 +10249,7 @@ ], [ "sentence", - "", + "proper", 3749305213430885773, "TEXT", "#/texts/12", @@ -10375,7 +10564,7 @@ ], [ "sentence", - "", + "proper", 3749305213430885773, "TEXT", "#/texts/12", @@ -10814,6 +11003,300 @@ "single task failures", "single task failures" ], + [ + "sentence", + "improper", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 4583103017707584490, + 3773241053432528296, + null, + null, + 0, + 200, + 0, + 200, + 0, + 42, + true, + "To obtain a thorough understanding of what our platform can do and how well it performs, we have structured this paper as follows: In Section 2, we briefly review the current state-of-the-art document", + "To obtain a thorough understanding of what our platform can do and how well it performs, we have structured this paper as follows: In Section 2, we briefly review the current state-of-the-art document" + ], + [ + "conn", + "single-conn", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 15441160910541487889, + 1208301800202473771, + null, + null, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "To", + "To" + ], + [ + "verb", + "single-verb", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 16381206566454849358, + 9937675227149035356, + null, + null, + 3, + 9, + 3, + 9, + 1, + 2, + true, + "obtain", + "obtain" + ], + [ + "term", + "single-term", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 6413078374611367110, + 18289099855452041800, + null, + null, + 12, + 34, + 12, + 34, + 3, + 5, + true, + "thorough understanding", + "thorough understanding" + ], + [ + "conn", + "single-conn", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 15441160910541485670, + 1208305055808327195, + null, + null, + 35, + 37, + 35, + 37, + 5, + 6, + true, + "of", + "of" + ], + [ + "term", + "single-term", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 14814125365076808131, + 9132922851223009092, + null, + null, + 47, + 55, + 47, + 55, + 8, + 9, + true, + "platform", + "platform" + ], + [ + "verb", + "single-verb", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 16381206563385633981, + 9409981804416442331, + null, + null, + 56, + 62, + 56, + 62, + 9, + 11, + true, + "can do", + "can do" + ], + [ + "verb", + "single-verb", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 14814126800707419949, + 5381850489748989063, + null, + null, + 79, + 87, + 79, + 87, + 15, + 16, + true, + "performs", + "performs" + ], + [ + "verb", + "compound-verb", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 10735586210654851278, + 13301199589425272395, + null, + null, + 92, + 107, + 92, + 107, + 18, + 20, + true, + "have structured", + "have structured" + ], + [ + "term", + "single-term", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 329104161668023890, + 15568633541447308062, + null, + null, + 113, + 118, + 113, + 118, + 21, + 22, + true, + "paper", + "paper" + ], + [ + "conn", + "single-conn", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 15441160910541487053, + 1208301915017920886, + null, + null, + 119, + 121, + 119, + 121, + 22, + 23, + true, + "as", + "as" + ], + [ + "verb", + "single-verb", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 8106397733466170068, + 18084786961609693163, + null, + null, + 122, + 129, + 122, + 129, + 23, + 24, + true, + "follows", + "follows" + ], + [ + "conn", + "single-conn", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 15441160910541480354, + 1208313713507445572, + null, + null, + 131, + 133, + 131, + 133, + 25, + 26, + true, + "In", + "In" + ], + [ + "term", + "single-term", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 8106352240078799135, + 17901286493151078321, + null, + null, + 134, + 141, + 134, + 141, + 26, + 27, + true, + "Section", + "Section" + ], [ "numval", "ival", @@ -10835,6 +11318,48 @@ "2", "2" ], + [ + "verb", + "compound-verb", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 6392257094263078663, + 15694891913787292952, + null, + null, + 148, + 162, + 148, + 162, + 30, + 32, + true, + "briefly review", + "briefly review" + ], + [ + "term", + "single-term", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 1479982345692789744, + 7784170769143762047, + null, + null, + 167, + 180, + 167, + 180, + 33, + 35, + true, + "current state", + "current state" + ], [ "expression", "word-concatenation", @@ -10856,6 +11381,69 @@ "state-of-the-art", "state-of-the-art" ], + [ + "conn", + "single-conn", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 15441160910541485670, + 1208305055808201363, + null, + null, + 181, + 183, + 181, + 183, + 36, + 37, + true, + "of", + "of" + ], + [ + "term", + "single-term", + 3409470577915009676, + "TEXT", + "#/texts/13", + 1.0, + 7130308703079769972, + 2158379659179583432, + null, + null, + 188, + 200, + 188, + 200, + 40, + 42, + true, + "art document", + "art document" + ], + [ + "sentence", + "improper", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 14404576958363248129, + 1130583122238099365, + null, + null, + 0, + 21, + 0, + 21, + 0, + 3, + true, + "processing solutions.", + "processing solutions." + ], [ "term", "single-term", @@ -10879,7 +11467,7 @@ ], [ "sentence", - "", + "proper", 17187299362680072378, "TEXT", "#/texts/14", @@ -11068,7 +11656,7 @@ ], [ "sentence", - "", + "proper", 17187299362680072378, "TEXT", "#/texts/14", @@ -11425,7 +12013,7 @@ ], [ "sentence", - "", + "proper", 17187299362680072378, "TEXT", "#/texts/14", @@ -11570,6 +12158,27 @@ "w.r.t", "w.r.t" ], + [ + "sentence", + "improper", + 17187299362680072378, + "TEXT", + "#/texts/14", + 1.0, + 1872435420814889085, + 408468597017277294, + null, + null, + 341, + 409, + 341, + 409, + 71, + 83, + true, + "research and possible next steps in the development of the platform.", + "research and possible next steps in the development of the platform." + ], [ "term", "single-term", @@ -11719,7 +12328,91 @@ ], [ "sentence", - "", + "improper", + 697648145931166262, + "TEXT", + "#/texts/15", + 1.0, + 15117478873388212468, + 13316734887501489198, + null, + null, + 2, + 18, + 2, + 18, + 1, + 5, + true, + "STATE OF THE ART", + "STATE OF THE ART" + ], + [ + "term", + "single-term", + 697648145931166262, + "TEXT", + "#/texts/15", + 1.0, + 329104162354012156, + 1352683901097439890, + null, + null, + 2, + 7, + 2, + 7, + 1, + 2, + true, + "STATE", + "STATE" + ], + [ + "conn", + "single-conn", + 697648145931166262, + "TEXT", + "#/texts/15", + 1.0, + 16381206483305693085, + 6041793152598526942, + null, + null, + 8, + 14, + 8, + 14, + 2, + 4, + true, + "OF THE", + "OF THE" + ], + [ + "term", + "single-term", + 697648145931166262, + "TEXT", + "#/texts/15", + 1.0, + 12178341415896230525, + 4014694735334277655, + null, + null, + 15, + 18, + 15, + 18, + 4, + 5, + true, + "ART", + "ART" + ], + [ + "sentence", + "proper", 7935233310532930917, "TEXT", "#/texts/16", @@ -12013,7 +12706,7 @@ ], [ "sentence", - "", + "proper", 7935233310532930917, "TEXT", "#/texts/16", @@ -12181,7 +12874,7 @@ ], [ "sentence", - "", + "proper", 7935233310532930917, "TEXT", "#/texts/16", @@ -12496,7 +13189,7 @@ ], [ "sentence", - "", + "proper", 7935233310532930917, "TEXT", "#/texts/16", @@ -12748,7 +13441,7 @@ ], [ "sentence", - "", + "proper", 7935233310532930917, "TEXT", "#/texts/16", @@ -13168,7 +13861,7 @@ ], [ "sentence", - "", + "proper", 7935233310532930917, "TEXT", "#/texts/16", @@ -13336,7 +14029,7 @@ ], [ "sentence", - "", + "proper", 7935233310532930917, "TEXT", "#/texts/16", @@ -13672,7 +14365,7 @@ ], [ "sentence", - "", + "proper", 2762070725424637531, "TEXT", "#/texts/17", @@ -13819,7 +14512,7 @@ ], [ "sentence", - "", + "proper", 2762070725424637531, "TEXT", "#/texts/17", @@ -14029,7 +14722,7 @@ ], [ "sentence", - "", + "proper", 2762070725424637531, "TEXT", "#/texts/17", @@ -14279,6 +14972,27 @@ "7", "7" ], + [ + "sentence", + "improper", + 2762070725424637531, + "TEXT", + "#/texts/17", + 1.0, + 170191308215292988, + 7068948155811907345, + null, + null, + 251, + 337, + 251, + 337, + 62, + 77, + true, + "In contrast to the open-source solutions, all three proprietary solutions support also", + "In contrast to the open-source solutions, all three proprietary solutions support also" + ], [ "conn", "single-conn", @@ -14405,6 +15119,27 @@ "proprietary solutions support", "proprietary solutions support" ], + [ + "sentence", + "improper", + 7536915191196259776, + "TEXT", + "#/texts/18", + 1.0, + 4898340339282641363, + 11801988189552517849, + null, + null, + 0, + 34, + 0, + 34, + 0, + 5, + true, + "extraction from scanned documents.", + "extraction from scanned documents." + ], [ "term", "single-term", @@ -14491,7 +15226,7 @@ ], [ "sentence", - "", + "proper", 7536915191196259776, "TEXT", "#/texts/18", @@ -14680,7 +15415,7 @@ ], [ "sentence", - "", + "proper", 7536915191196259776, "TEXT", "#/texts/18", @@ -15037,7 +15772,7 @@ ], [ "sentence", - "", + "proper", 7536915191196259776, "TEXT", "#/texts/18", @@ -15163,7 +15898,49 @@ ], [ "sentence", - "", + "improper", + 11495493007651807568, + "TEXT", + "#/texts/19", + 1.0, + 15017609965790311447, + 4178226830783960310, + null, + null, + 2, + 17, + 2, + 17, + 1, + 3, + true, + "PLATFORM DESIGN", + "PLATFORM DESIGN" + ], + [ + "term", + "single-term", + 11495493007651807568, + "TEXT", + "#/texts/19", + 1.0, + 15017609965790311447, + 4178226830783960310, + null, + null, + 2, + 17, + 2, + 17, + 1, + 3, + true, + "PLATFORM DESIGN", + "PLATFORM DESIGN" + ], + [ + "sentence", + "proper", 7650015170039242996, "TEXT", "#/texts/20", @@ -15520,7 +16297,7 @@ ], [ "sentence", - "", + "proper", 14959508657858158650, "TEXT", "#/texts/21", @@ -16024,7 +16801,7 @@ ], [ "sentence", - "", + "proper", 14959508657858158650, "TEXT", "#/texts/21", @@ -16465,7 +17242,7 @@ ], [ "sentence", - "", + "proper", 14959508657858158650, "TEXT", "#/texts/21", @@ -16717,7 +17494,7 @@ ], [ "sentence", - "", + "proper", 10379300903412882972, "TEXT", "#/texts/22", @@ -17200,7 +17977,7 @@ ], [ "sentence", - "", + "proper", 10379300903412882972, "TEXT", "#/texts/22", @@ -17368,7 +18145,7 @@ ], [ "sentence", - "", + "proper", 10379300903412882972, "TEXT", "#/texts/22", @@ -17620,7 +18397,7 @@ ], [ "sentence", - "", + "proper", 10379300903412882972, "TEXT", "#/texts/22", @@ -18082,7 +18859,7 @@ ], [ "sentence", - "", + "proper", 10379300903412882972, "TEXT", "#/texts/22", @@ -18523,7 +19300,7 @@ ], [ "sentence", - "", + "proper", 10379300903412882972, "TEXT", "#/texts/22", @@ -18796,7 +19573,7 @@ ], [ "sentence", - "", + "proper", 10379300903412882972, "TEXT", "#/texts/22", @@ -18985,7 +19762,7 @@ ], [ "sentence", - "", + "proper", 4994395008195818594, "TEXT", "#/texts/23", @@ -19384,7 +20161,7 @@ ], [ "sentence", - "", + "proper", 4994395008195818594, "TEXT", "#/texts/23", @@ -19804,7 +20581,7 @@ ], [ "sentence", - "", + "proper", 4994395008195818594, "TEXT", "#/texts/23", @@ -20014,7 +20791,7 @@ ], [ "sentence", - "", + "proper", 4994395008195818594, "TEXT", "#/texts/23", @@ -20560,7 +21337,7 @@ ], [ "sentence", - "", + "proper", 4994395008195818594, "TEXT", "#/texts/23", @@ -20770,7 +21547,7 @@ ], [ "sentence", - "", + "proper", 4994395008195818594, "TEXT", "#/texts/23", @@ -21085,7 +21862,49 @@ ], [ "sentence", - "", + "improper", + 4203835122307823579, + "TEXT", + "#/texts/24", + 1.0, + 966280404629460283, + 3529002687165516120, + null, + null, + 4, + 14, + 4, + 14, + 3, + 4, + true, + "Components", + "Components" + ], + [ + "term", + "single-term", + 4203835122307823579, + "TEXT", + "#/texts/24", + 1.0, + 966280404629460283, + 3529002687165516120, + null, + null, + 4, + 14, + 4, + 14, + 3, + 4, + true, + "Components", + "Components" + ], + [ + "sentence", + "proper", 13520362244078084911, "TEXT", "#/texts/25", @@ -21736,7 +22555,7 @@ ], [ "sentence", - "", + "proper", 1749622367305947670, "TEXT", "#/texts/26", @@ -22765,7 +23584,7 @@ ], [ "sentence", - "", + "proper", 1749622367305947670, "TEXT", "#/texts/26", @@ -22996,7 +23815,7 @@ ], [ "sentence", - "", + "proper", 1749622367305947670, "TEXT", "#/texts/26", @@ -23437,7 +24256,7 @@ ], [ "sentence", - "", + "proper", 1749622367305947670, "TEXT", "#/texts/26", @@ -23794,7 +24613,7 @@ ], [ "sentence", - "", + "proper", 11083736481641202939, "TEXT", "#/texts/27", @@ -24046,7 +24865,91 @@ ], [ "sentence", - "", + "improper", + 15403141463083979171, + "TEXT", + "#/texts/28", + 1.0, + 11915167694096606959, + 5182733670794307620, + null, + null, + 4, + 24, + 4, + 24, + 3, + 6, + true, + "Parsing of Documents", + "Parsing of Documents" + ], + [ + "term", + "single-term", + 15403141463083979171, + "TEXT", + "#/texts/28", + 1.0, + 8106352039449712482, + 16094475553600658489, + null, + null, + 4, + 11, + 4, + 11, + 3, + 4, + true, + "Parsing", + "Parsing" + ], + [ + "conn", + "single-conn", + 15403141463083979171, + "TEXT", + "#/texts/28", + 1.0, + 15441160910541485670, + 6638874286081294738, + null, + null, + 12, + 14, + 12, + 14, + 4, + 5, + true, + "of", + "of" + ], + [ + "term", + "single-term", + 15403141463083979171, + "TEXT", + "#/texts/28", + 1.0, + 2908675737836410520, + 13985897290952550496, + null, + null, + 15, + 24, + 15, + 24, + 5, + 6, + true, + "Documents", + "Documents" + ], + [ + "sentence", + "proper", 12234429517419341922, "TEXT", "#/texts/29", @@ -24445,7 +25348,7 @@ ], [ "sentence", - "", + "proper", 12234429517419341922, "TEXT", "#/texts/29", @@ -24802,7 +25705,7 @@ ], [ "sentence", - "", + "proper", 12234429517419341922, "TEXT", "#/texts/29", @@ -24928,7 +25831,7 @@ ], [ "sentence", - "", + "proper", 12234429517419341922, "TEXT", "#/texts/29", @@ -25180,7 +26083,7 @@ ], [ "sentence", - "", + "proper", 12234429517419341922, "TEXT", "#/texts/29", @@ -25327,7 +26230,7 @@ ], [ "sentence", - "", + "proper", 12234429517419341922, "TEXT", "#/texts/29", @@ -25558,7 +26461,7 @@ ], [ "sentence", - "", + "proper", 16957857111665886816, "TEXT", "#/texts/30", @@ -25957,7 +26860,7 @@ ], [ "sentence", - "", + "proper", 16957857111665886816, "TEXT", "#/texts/30", @@ -26335,7 +27238,7 @@ ], [ "sentence", - "", + "proper", 16957857111665886816, "TEXT", "#/texts/30", @@ -26797,7 +27700,7 @@ ], [ "sentence", - "", + "proper", 16957857111665886816, "TEXT", "#/texts/30", @@ -27154,7 +28057,7 @@ ], [ "sentence", - "", + "proper", 16957857111665886816, "TEXT", "#/texts/30", @@ -27322,7 +28225,7 @@ ], [ "sentence", - "", + "proper", 16957857111665886816, "TEXT", "#/texts/30", @@ -27532,7 +28435,7 @@ ], [ "sentence", - "", + "proper", 10390915169360946497, "TEXT", "#/texts/31", @@ -27826,7 +28729,7 @@ ], [ "sentence", - "", + "proper", 10390915169360946497, "TEXT", "#/texts/31", @@ -28036,7 +28939,7 @@ ], [ "sentence", - "", + "proper", 15254383206256494278, "TEXT", "#/texts/32", @@ -28645,7 +29548,7 @@ ], [ "sentence", - "", + "proper", 15254383206256494278, "TEXT", "#/texts/32", @@ -28939,7 +29842,7 @@ ], [ "sentence", - "", + "proper", 15254383206256494278, "TEXT", "#/texts/32", @@ -29147,6 +30050,27 @@ "3.3", "3.3" ], + [ + "sentence", + "improper", + 17759618186065566858, + "TEXT", + "#/texts/33", + 1.0, + 5080538864315934615, + 16467571907173769428, + null, + null, + 4, + 51, + 4, + 51, + 3, + 11, + true, + "Ground-truth gathering through human-annotation", + "Ground-truth gathering through human-annotation" + ], [ "expression", "word-concatenation", @@ -29168,9 +30092,114 @@ "Ground-truth", "Ground-truth" ], + [ + "term", + "single-term", + 17759618186065566858, + "TEXT", + "#/texts/33", + 1.0, + 16380809986200090416, + 12128809016399933385, + null, + null, + 4, + 10, + 4, + 10, + 3, + 4, + true, + "Ground", + "Ground" + ], + [ + "term", + "single-term", + 17759618186065566858, + "TEXT", + "#/texts/33", + 1.0, + 329104159241711235, + 17110406672795769920, + null, + null, + 11, + 16, + 11, + 16, + 5, + 6, + true, + "truth", + "truth" + ], + [ + "verb", + "single-verb", + 17759618186065566858, + "TEXT", + "#/texts/33", + 1.0, + 6182618727395946480, + 17473306470358297284, + null, + null, + 17, + 26, + 17, + 26, + 6, + 7, + true, + "gathering", + "gathering" + ], + [ + "conn", + "single-conn", + 17759618186065566858, + "TEXT", + "#/texts/33", + 1.0, + 8106478041484051995, + 17870444747334150633, + null, + null, + 27, + 34, + 27, + 34, + 7, + 8, + true, + "through", + "through" + ], + [ + "term", + "single-term", + 17759618186065566858, + "TEXT", + "#/texts/33", + 1.0, + 15359807916847495711, + 15527976255731243951, + null, + null, + 41, + 51, + 41, + 51, + 10, + 11, + true, + "annotation", + "annotation" + ], [ "sentence", - "", + "proper", 11638821473906997927, "TEXT", "#/texts/34", @@ -29464,7 +30493,7 @@ ], [ "sentence", - "", + "proper", 11638821473906997927, "TEXT", "#/texts/34", @@ -29800,7 +30829,7 @@ ], [ "sentence", - "", + "proper", 11638821473906997927, "TEXT", "#/texts/34", @@ -30094,7 +31123,7 @@ ], [ "sentence", - "", + "proper", 11638821473906997927, "TEXT", "#/texts/34", @@ -30367,7 +31396,7 @@ ], [ "sentence", - "", + "proper", 11638821473906997927, "TEXT", "#/texts/34", @@ -30514,7 +31543,7 @@ ], [ "sentence", - "", + "proper", 13020065077657899116, "TEXT", "#/texts/35", @@ -30871,7 +31900,7 @@ ], [ "sentence", - "", + "proper", 13020065077657899116, "TEXT", "#/texts/35", @@ -31144,7 +32173,7 @@ ], [ "sentence", - "", + "proper", 13020065077657899116, "TEXT", "#/texts/35", @@ -31312,7 +32341,7 @@ ], [ "sentence", - "", + "proper", 13020065077657899116, "TEXT", "#/texts/35", @@ -31627,7 +32656,7 @@ ], [ "sentence", - "", + "proper", 13020065077657899116, "TEXT", "#/texts/35", @@ -31774,7 +32803,7 @@ ], [ "sentence", - "", + "proper", 13020065077657899116, "TEXT", "#/texts/35", @@ -32173,7 +33202,7 @@ ], [ "sentence", - "", + "proper", 13020065077657899116, "TEXT", "#/texts/35", @@ -32362,7 +33391,7 @@ ], [ "sentence", - "", + "proper", 13020065077657899116, "TEXT", "#/texts/35", @@ -32677,7 +33706,7 @@ ], [ "sentence", - "", + "proper", 10103841011442966464, "TEXT", "#/texts/36", @@ -32908,7 +33937,7 @@ ], [ "sentence", - "", + "proper", 10103841011442966464, "TEXT", "#/texts/36", @@ -33223,7 +34252,7 @@ ], [ "sentence", - "", + "proper", 10103841011442966464, "TEXT", "#/texts/36", @@ -33454,7 +34483,7 @@ ], [ "sentence", - "", + "proper", 10103841011442966464, "TEXT", "#/texts/36", @@ -33664,7 +34693,7 @@ ], [ "sentence", - "", + "proper", 10103841011442966464, "TEXT", "#/texts/36", @@ -34042,7 +35071,7 @@ ], [ "sentence", - "", + "proper", 10103841011442966464, "TEXT", "#/texts/36", @@ -34315,7 +35344,7 @@ ], [ "sentence", - "", + "proper", 10103841011442966464, "TEXT", "#/texts/36", @@ -34462,7 +35491,7 @@ ], [ "sentence", - "", + "proper", 10103841011442966464, "TEXT", "#/texts/36", @@ -34840,7 +35869,7 @@ ], [ "sentence", - "", + "proper", 10982401368140758581, "TEXT", "#/texts/37", @@ -35153,6 +36182,27 @@ "minute", "minute" ], + [ + "sentence", + "improper", + 10982401368140758581, + "TEXT", + "#/texts/37", + 1.0, + 15293849339677975545, + 6301648977913682265, + null, + null, + 81, + 195, + 81, + 195, + 19, + 41, + true, + "The vertical red lines indicate that a training was performed on the annotated pages, and a new, improved model is", + "The vertical red lines indicate that a training was performed on the annotated pages, and a new, improved model is" + ], [ "term", "single-term", @@ -35342,6 +36392,27 @@ "is", "is" ], + [ + "sentence", + "improper", + 887751753527930563, + "TEXT", + "#/texts/38", + 1.0, + 839366828356612939, + 2690671102840519559, + null, + null, + 0, + 43, + 0, + 43, + 0, + 9, + true, + "used from that point to predict the labels.", + "used from that point to predict the labels." + ], [ "verb", "single-verb", @@ -35470,7 +36541,7 @@ ], [ "sentence", - "", + "proper", 887751753527930563, "TEXT", "#/texts/38", @@ -35638,7 +36709,7 @@ ], [ "sentence", - "", + "proper", 887751753527930563, "TEXT", "#/texts/38", @@ -36436,7 +37507,7 @@ ], [ "sentence", - "", + "proper", 887751753527930563, "TEXT", "#/texts/38", @@ -36751,7 +37822,133 @@ ], [ "sentence", - "", + "improper", + 4695688617288377564, + "TEXT", + "#/texts/39", + 1.0, + 16358094743947264, + 6433816586640848429, + null, + null, + 4, + 55, + 4, + 55, + 3, + 11, + true, + "Machine Learning: Training models & Applying models", + "Machine Learning: Training models & Applying models" + ], + [ + "term", + "single-term", + 4695688617288377564, + "TEXT", + "#/texts/39", + 1.0, + 13278563109182224937, + 14007099678725931642, + null, + null, + 4, + 20, + 4, + 20, + 3, + 5, + true, + "Machine Learning", + "Machine Learning" + ], + [ + "verb", + "single-verb", + 4695688617288377564, + "TEXT", + "#/texts/39", + 1.0, + 14652192900990142746, + 1742703337955175223, + null, + null, + 22, + 30, + 22, + 30, + 6, + 7, + true, + "Training", + "Training" + ], + [ + "term", + "single-term", + 4695688617288377564, + "TEXT", + "#/texts/39", + 1.0, + 16381206567230470443, + 13378873505183543995, + null, + null, + 31, + 37, + 31, + 37, + 7, + 8, + true, + "models", + "models" + ], + [ + "verb", + "single-verb", + 4695688617288377564, + "TEXT", + "#/texts/39", + 1.0, + 14650294412518490893, + 9206892751076057131, + null, + null, + 40, + 48, + 40, + 48, + 9, + 10, + true, + "Applying", + "Applying" + ], + [ + "term", + "single-term", + 4695688617288377564, + "TEXT", + "#/texts/39", + 1.0, + 16381206567230470443, + 13378873505183517146, + null, + null, + 49, + 55, + 49, + 55, + 10, + 11, + true, + "models", + "models" + ], + [ + "sentence", + "proper", 3275001812318455279, "TEXT", "#/texts/40", @@ -36961,7 +38158,7 @@ ], [ "sentence", - "", + "proper", 3275001812318455279, "TEXT", "#/texts/40", @@ -37108,7 +38305,7 @@ ], [ "sentence", - "", + "proper", 3275001812318455279, "TEXT", "#/texts/40", @@ -37885,7 +39082,7 @@ ], [ "sentence", - "", + "proper", 3275001812318455279, "TEXT", "#/texts/40", @@ -38074,7 +39271,7 @@ ], [ "sentence", - "", + "proper", 15354930767839681193, "TEXT", "#/texts/41", @@ -38179,7 +39376,7 @@ ], [ "sentence", - "", + "proper", 15354930767839681193, "TEXT", "#/texts/41", @@ -38473,7 +39670,7 @@ ], [ "sentence", - "", + "proper", 15354930767839681193, "TEXT", "#/texts/41", @@ -38830,7 +40027,7 @@ ], [ "sentence", - "", + "proper", 15354930767839681193, "TEXT", "#/texts/41", @@ -39059,6 +40256,27 @@ "precision metrics", "precision metrics" ], + [ + "sentence", + "improper", + 15354930767839681193, + "TEXT", + "#/texts/41", + 1.0, + 12903638239872928158, + 8509485504691770152, + null, + null, + 421, + 466, + 421, + 466, + 86, + 95, + true, + "The second observation is that we deal with a", + "The second observation is that we deal with a" + ], [ "term", "single-term", @@ -39166,7 +40384,7 @@ ], [ "sentence", - "", + "proper", 6337233386759158728, "TEXT", "#/texts/42", @@ -39563,6 +40781,48 @@ "label", "label" ], + [ + "sentence", + "improper", + 2249972239307071508, + "TEXT", + "#/texts/43", + 1.0, + 1131271437908497026, + 6501537939232378270, + null, + null, + 0, + 105, + 0, + 100, + 0, + 25, + true, + "The recall (= \u211b) and precision (= \ud835\udcab) for a given label on a page is defined by the standard formulas", + "The recall (= \u211b) and precision (= \ud835\udcab) for a given label on a page is defined by the standard formulas" + ], + [ + "term", + "single-term", + 2249972239307071508, + "TEXT", + "#/texts/43", + 1.0, + 16381206521531485437, + 13336791031081332791, + null, + null, + 4, + 10, + 4, + 10, + 1, + 2, + true, + "recall", + "recall" + ], [ "parenthesis", "round brackets", @@ -39584,6 +40844,27 @@ "(= \u211b)", "(= \u211b)" ], + [ + "term", + "single-term", + 2249972239307071508, + "TEXT", + "#/texts/43", + 1.0, + 6184954595655792282, + 6057810244653513926, + null, + null, + 23, + 32, + 21, + 30, + 7, + 8, + true, + "precision", + "precision" + ], [ "parenthesis", "round brackets", @@ -39605,6 +40886,195 @@ "(= \ud835\udcab)", "(= \ud835\udcab)" ], + [ + "conn", + "single-conn", + 2249972239307071508, + "TEXT", + "#/texts/43", + 1.0, + 329104161711024499, + 16207807302671295931, + null, + null, + 42, + 47, + 37, + 42, + 12, + 14, + true, + "for a", + "for a" + ], + [ + "verb", + "single-verb", + 2249972239307071508, + "TEXT", + "#/texts/43", + 1.0, + 329104159209890620, + 17263756844809834790, + null, + null, + 48, + 53, + 43, + 48, + 14, + 15, + true, + "given", + "given" + ], + [ + "term", + "single-term", + 2249972239307071508, + "TEXT", + "#/texts/43", + 1.0, + 329104161624445793, + 16205718723307651300, + null, + null, + 54, + 59, + 49, + 54, + 15, + 16, + true, + "label", + "label" + ], + [ + "conn", + "single-conn", + 2249972239307071508, + "TEXT", + "#/texts/43", + 1.0, + 389609625618762887, + 3511303195864256626, + null, + null, + 60, + 64, + 55, + 59, + 16, + 18, + true, + "on a", + "on a" + ], + [ + "term", + "single-term", + 2249972239307071508, + "TEXT", + "#/texts/43", + 1.0, + 389609625632301461, + 3508907653604072128, + null, + null, + 65, + 69, + 60, + 64, + 18, + 19, + true, + "page", + "page" + ], + [ + "verb", + "compound-verb", + 2249972239307071508, + "TEXT", + "#/texts/43", + 1.0, + 15603910126920359788, + 4006199286213053110, + null, + null, + 70, + 80, + 65, + 75, + 19, + 21, + true, + "is defined", + "is defined" + ], + [ + "conn", + "single-conn", + 2249972239307071508, + "TEXT", + "#/texts/43", + 1.0, + 16381206574363061705, + 5323448344738461337, + null, + null, + 81, + 87, + 76, + 82, + 21, + 23, + true, + "by the", + "by the" + ], + [ + "term", + "single-term", + 2249972239307071508, + "TEXT", + "#/texts/43", + 1.0, + 157922170442399677, + 16233929801837067262, + null, + null, + 88, + 105, + 83, + 100, + 23, + 25, + true, + "standard formulas", + "standard formulas" + ], + [ + "sentence", + "improper", + 12383805870947794174, + "TEXT", + "#/texts/44", + 1.0, + 16621297164481399662, + 10286298684766241302, + null, + null, + 0, + 68, + 0, + 63, + 0, + 50, + true, + "\u211b = t$_{p}$ t$_{p}$ + f$_{p}$ , \ud835\udcab = t$_{p}$ t$_{p}$ + f$_{n}$ ,", + "\u211b = t$_{p}$ t$_{p}$ + f$_{p}$ , \ud835\udcab = t$_{p}$ t$_{p}$ + f$_{n}$ ," + ], [ "expression", "wtoken-concatenation", @@ -39647,6 +41117,27 @@ "t_{p}", "t$_{p}$" ], + [ + "term", + "single-term", + 12383805870947794174, + "TEXT", + "#/texts/44", + 1.0, + 12178341415896405178, + 16140376238637985375, + null, + null, + 22, + 25, + 20, + 23, + 16, + 18, + true, + "+ f", + "+ f" + ], [ "expression", "wtoken-concatenation", @@ -39710,6 +41201,27 @@ "t_{p}", "t$_{p}$" ], + [ + "term", + "single-term", + 12383805870947794174, + "TEXT", + "#/texts/44", + 1.0, + 12178341415896405178, + 16140376238637983318, + null, + null, + 57, + 60, + 52, + 55, + 41, + 43, + true, + "+ f", + "+ f" + ], [ "expression", "wtoken-concatenation", @@ -39775,7 +41287,28 @@ ], [ "sentence", - "", + "improper", + 7053654953998543393, + "TEXT", + "#/texts/45", + 1.0, + 329104161580313375, + 14014731568156467696, + null, + null, + 0, + 5, + 0, + 5, + 0, + 1, + true, + "where", + "where" + ], + [ + "sentence", + "proper", 7053654953998543393, "TEXT", "#/texts/45", @@ -39922,7 +41455,7 @@ ], [ "sentence", - "", + "proper", 15921044595687116426, "TEXT", "#/texts/46", @@ -40027,7 +41560,7 @@ ], [ "sentence", - "", + "proper", 15921044595687116426, "TEXT", "#/texts/46", @@ -40216,7 +41749,7 @@ ], [ "sentence", - "", + "proper", 15921044595687116426, "TEXT", "#/texts/46", @@ -40699,7 +42232,7 @@ ], [ "sentence", - "", + "proper", 15921044595687116426, "TEXT", "#/texts/46", @@ -41266,7 +42799,7 @@ ], [ "sentence", - "", + "proper", 15921044595687116426, "TEXT", "#/texts/46", @@ -41707,7 +43240,7 @@ ], [ "sentence", - "", + "proper", 12234068400463628788, "TEXT", "#/texts/47", @@ -42064,7 +43597,7 @@ ], [ "sentence", - "", + "proper", 4628466594790006384, "TEXT", "#/texts/48", @@ -42251,6 +43784,27 @@ "11", "11" ], + [ + "sentence", + "improper", + 4628466594790006384, + "TEXT", + "#/texts/48", + 1.0, + 4991499868856994540, + 13217759163608639175, + null, + null, + 79, + 125, + 79, + 125, + 19, + 28, + true, + "We have annotated 30000 PDF pages and know the", + "We have annotated 30000 PDF pages and know the" + ], [ "verb", "compound-verb", @@ -42335,6 +43889,27 @@ "know", "know" ], + [ + "sentence", + "improper", + 9651706913678711778, + "TEXT", + "#/texts/49", + 1.0, + 7258462281647718782, + 8518881042476995585, + null, + null, + 0, + 44, + 0, + 44, + 0, + 10, + true, + "location of at least one table on each page.", + "location of at least one table on each page." + ], [ "term", "single-term", @@ -42463,7 +44038,7 @@ ], [ "sentence", - "", + "proper", 9651706913678711778, "TEXT", "#/texts/49", @@ -42757,7 +44332,7 @@ ], [ "sentence", - "", + "proper", 9651706913678711778, "TEXT", "#/texts/49", @@ -43135,7 +44710,7 @@ ], [ "sentence", - "", + "proper", 1363251178266051349, "TEXT", "#/texts/50", @@ -43429,7 +45004,7 @@ ], [ "sentence", - "", + "proper", 1363251178266051349, "TEXT", "#/texts/50", @@ -43702,7 +45277,7 @@ ], [ "sentence", - "", + "proper", 1363251178266051349, "TEXT", "#/texts/50", @@ -43870,7 +45445,7 @@ ], [ "sentence", - "", + "proper", 1363251178266051349, "TEXT", "#/texts/50", @@ -44248,7 +45823,7 @@ ], [ "sentence", - "", + "proper", 1363251178266051349, "TEXT", "#/texts/50", @@ -44542,7 +46117,7 @@ ], [ "sentence", - "", + "proper", 1363251178266051349, "TEXT", "#/texts/50", @@ -44815,7 +46390,7 @@ ], [ "sentence", - "", + "proper", 1363251178266051349, "TEXT", "#/texts/50", @@ -45088,7 +46663,7 @@ ], [ "sentence", - "", + "proper", 18259197018396996238, "TEXT", "#/texts/51", @@ -45214,7 +46789,7 @@ ], [ "sentence", - "", + "proper", 18259197018396996238, "TEXT", "#/texts/51", @@ -45634,7 +47209,7 @@ ], [ "sentence", - "", + "proper", 18259197018396996238, "TEXT", "#/texts/51", @@ -45991,7 +47566,7 @@ ], [ "sentence", - "", + "proper", 18259197018396996238, "TEXT", "#/texts/51", @@ -46558,7 +48133,7 @@ ], [ "sentence", - "", + "proper", 18259197018396996238, "TEXT", "#/texts/51", @@ -46852,7 +48427,7 @@ ], [ "sentence", - "", + "proper", 18259197018396996238, "TEXT", "#/texts/51", @@ -46936,7 +48511,7 @@ ], [ "sentence", - "", + "proper", 18259197018396996238, "TEXT", "#/texts/51", @@ -47398,7 +48973,7 @@ ], [ "sentence", - "", + "proper", 14663676516964431047, "TEXT", "#/texts/52", @@ -47671,7 +49246,7 @@ ], [ "sentence", - "", + "proper", 14663676516964431047, "TEXT", "#/texts/52", @@ -47963,6 +49538,27 @@ "1", "1" ], + [ + "sentence", + "improper", + 14663676516964431047, + "TEXT", + "#/texts/52", + 1.0, + 3134573113158163246, + 3504812983975446350, + null, + null, + 240, + 398, + 240, + 398, + 43, + 74, + true, + "We use these bounding boxes to associate with each cell a label, which is in this particular case either Table or Not-Table, depending on whether they overlap", + "We use these bounding boxes to associate with each cell a label, which is in this particular case either Table or Not-Table, depending on whether they overlap" + ], [ "verb", "single-verb", @@ -48301,7 +49897,7 @@ ], [ "sentence", - "", + "proper", 4577067829072175096, "TEXT", "#/texts/53", @@ -48448,7 +50044,7 @@ ], [ "sentence", - "", + "proper", 4577067829072175096, "TEXT", "#/texts/53", @@ -48658,7 +50254,7 @@ ], [ "sentence", - "", + "proper", 4577067829072175096, "TEXT", "#/texts/53", @@ -48887,6 +50483,27 @@ "label types", "label types" ], + [ + "sentence", + "improper", + 2569392033451362672, + "TEXT", + "#/texts/54", + 1.0, + 14779160475161540675, + 15519079009918015266, + null, + null, + 0, + 32, + 0, + 32, + 0, + 6, + true, + "with the predicted bounding box.", + "with the predicted bounding box." + ], [ "conn", "single-conn", @@ -48952,7 +50569,7 @@ ], [ "sentence", - "", + "proper", 2569392033451362672, "TEXT", "#/texts/54", @@ -49120,7 +50737,7 @@ ], [ "sentence", - "", + "proper", 2569392033451362672, "TEXT", "#/texts/54", @@ -49477,7 +51094,7 @@ ], [ "sentence", - "", + "proper", 2569392033451362672, "TEXT", "#/texts/54", @@ -50044,7 +51661,28 @@ ], [ "sentence", - "", + "improper", + 2569392033451362672, + "TEXT", + "#/texts/54", + 1.0, + 17767354399704235166, + 13994996428052197005, + null, + null, + 447, + 448, + 447, + 448, + 88, + 89, + true, + ".", + "." + ], + [ + "sentence", + "proper", 2569392033451362672, "TEXT", "#/texts/54", @@ -50233,7 +51871,7 @@ ], [ "sentence", - "", + "proper", 2569392033451362672, "TEXT", "#/texts/54", @@ -50443,7 +52081,7 @@ ], [ "sentence", - "", + "proper", 2569392033451362672, "TEXT", "#/texts/54", @@ -50653,7 +52291,7 @@ ], [ "sentence", - "", + "proper", 2569392033451362672, "TEXT", "#/texts/54", @@ -50884,7 +52522,7 @@ ], [ "sentence", - "", + "proper", 14539041145469267811, "TEXT", "#/texts/55", @@ -50989,7 +52627,7 @@ ], [ "sentence", - "", + "proper", 14539041145469267811, "TEXT", "#/texts/55", @@ -51241,7 +52879,7 @@ ], [ "sentence", - "", + "proper", 14539041145469267811, "TEXT", "#/texts/55", @@ -51472,7 +53110,7 @@ ], [ "sentence", - "", + "proper", 14539041145469267811, "TEXT", "#/texts/55", @@ -51829,7 +53467,7 @@ ], [ "sentence", - "", + "proper", 8607014065143641201, "TEXT", "#/texts/56", @@ -52018,7 +53656,7 @@ ], [ "sentence", - "", + "proper", 8607014065143641201, "TEXT", "#/texts/56", @@ -52228,7 +53866,7 @@ ], [ "sentence", - "", + "proper", 8607014065143641201, "TEXT", "#/texts/56", @@ -52648,7 +54286,7 @@ ], [ "sentence", - "", + "proper", 8607014065143641201, "TEXT", "#/texts/56", @@ -52816,7 +54454,7 @@ ], [ "sentence", - "", + "proper", 8607014065143641201, "TEXT", "#/texts/56", @@ -53089,7 +54727,7 @@ ], [ "sentence", - "", + "proper", 8607014065143641201, "TEXT", "#/texts/56", @@ -53320,7 +54958,7 @@ ], [ "sentence", - "", + "proper", 1994904537764312371, "TEXT", "#/texts/57", @@ -53593,7 +55231,7 @@ ], [ "sentence", - "", + "proper", 1994904537764312371, "TEXT", "#/texts/57", @@ -53759,6 +55397,27 @@ "structured data", "structured data" ], + [ + "sentence", + "improper", + 1994904537764312371, + "TEXT", + "#/texts/57", + 1.0, + 12038473850244884267, + 11637496100495629462, + null, + null, + 244, + 256, + 244, + 256, + 44, + 48, + true, + "In our case,", + "In our case," + ], [ "conn", "single-conn", @@ -53801,6 +55460,27 @@ "case", "case" ], + [ + "sentence", + "improper", + 7742256726079628058, + "TEXT", + "#/texts/58", + 1.0, + 7096189018160292824, + 5773193797521145767, + null, + null, + 0, + 54, + 0, + 54, + 0, + 9, + true, + "this structure originates of course from the template.", + "this structure originates of course from the template." + ], [ "term", "single-term", @@ -53908,7 +55588,7 @@ ], [ "sentence", - "", + "proper", 7742256726079628058, "TEXT", "#/texts/58", @@ -54160,7 +55840,7 @@ ], [ "sentence", - "", + "proper", 7742256726079628058, "TEXT", "#/texts/58", @@ -54412,7 +56092,7 @@ ], [ "sentence", - "", + "proper", 8810233123818174294, "TEXT", "#/texts/59", @@ -54685,7 +56365,7 @@ ], [ "sentence", - "", + "proper", 8810233123818174294, "TEXT", "#/texts/59", @@ -55042,7 +56722,7 @@ ], [ "sentence", - "", + "proper", 8810233123818174294, "TEXT", "#/texts/59", @@ -55294,7 +56974,7 @@ ], [ "sentence", - "", + "proper", 8810233123818174294, "TEXT", "#/texts/59", @@ -55693,7 +57373,7 @@ ], [ "sentence", - "", + "proper", 16446711449286912460, "TEXT", "#/texts/60", @@ -55861,7 +57541,7 @@ ], [ "sentence", - "", + "proper", 16446711449286912460, "TEXT", "#/texts/60", @@ -56071,7 +57751,7 @@ ], [ "sentence", - "", + "proper", 9558434107504657973, "TEXT", "#/texts/61", @@ -56323,7 +58003,7 @@ ], [ "sentence", - "", + "proper", 9558434107504657973, "TEXT", "#/texts/61", @@ -56575,7 +58255,7 @@ ], [ "sentence", - "", + "proper", 9558434107504657973, "TEXT", "#/texts/61", @@ -56869,7 +58549,7 @@ ], [ "sentence", - "", + "proper", 9558434107504657973, "TEXT", "#/texts/61", @@ -57121,7 +58801,7 @@ ], [ "sentence", - "", + "proper", 9558434107504657973, "TEXT", "#/texts/61", @@ -57289,7 +58969,7 @@ ], [ "sentence", - "", + "proper", 18349896906192842040, "TEXT", "#/texts/62", @@ -57688,7 +59368,7 @@ ], [ "sentence", - "", + "proper", 18349896906192842040, "TEXT", "#/texts/62", @@ -58192,7 +59872,7 @@ ], [ "sentence", - "", + "proper", 18349896906192842040, "TEXT", "#/texts/62", @@ -58507,7 +60187,7 @@ ], [ "sentence", - "", + "proper", 18349896906192842040, "TEXT", "#/texts/62", @@ -58612,7 +60292,28 @@ ], [ "sentence", - "", + "improper", + 10082834006373808153, + "TEXT", + "#/texts/63", + 1.0, + 14650296242544931649, + 11940302954647468228, + null, + null, + 4, + 12, + 4, + 12, + 3, + 4, + true, + "Assembly", + "Assembly" + ], + [ + "sentence", + "proper", 15253541252152665681, "TEXT", "#/texts/64", @@ -59009,6 +60710,27 @@ "layout semantics", "layout semantics" ], + [ + "sentence", + "improper", + 15253541252152665681, + "TEXT", + "#/texts/64", + 1.0, + 1747679379755568353, + 7184510188177392483, + null, + null, + 188, + 277, + 188, + 277, + 40, + 55, + true, + "This structured data file is constructed by assembling all the cells from the parsed file", + "This structured data file is constructed by assembling all the cells from the parsed file" + ], [ "term", "single-term", @@ -59158,7 +60880,7 @@ ], [ "sentence", - "", + "proper", 3904142170608486950, "TEXT", "#/texts/65", @@ -59389,7 +61111,28 @@ ], [ "sentence", - "", + "improper", + 6410818076508661508, + "TEXT", + "#/texts/66", + 1.0, + 17767354399704235219, + 18295152020937953197, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "{", + "{" + ], + [ + "sentence", + "proper", 6410818076508661508, "TEXT", "#/texts/66", @@ -59681,6 +61424,27 @@ "scale", "scale" ], + [ + "sentence", + "improper", + 6410818076508661508, + "TEXT", + "#/texts/66", + 1.0, + 8173783615378291854, + 10056493462698779949, + null, + null, + 116, + 723, + 116, + 717, + 25, + 259, + true, + "',, \u2192 'abstract ': 'Over the past few decades, the amount of scientific articles [...] ',, \u2192 'affiliations ': 'IBM Research Rueschlikon, Switzerland ', 'authors ': 'Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas ', \u2192 }, 'main-text ': [{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...], 'tables ': [{...},...], 'images ': [{...},...] }", + "',, \u2192 'abstract ': 'Over the past few decades, the amount of scientific articles [...] ',, \u2192 'affiliations ': 'IBM Research Rueschlikon, Switzerland ', 'authors ': 'Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas ', \u2192 }, 'main-text ': [{ 'prov ': [{ 'bbox ': [52.304, 509.750, 168.099, 523.980], 'page ': 1 }], 'type ': 'subtitle-level-1 ', 'text ': '1 INTRODUCTION ' }, { 'prov ': [{ 'bbox ': [52.304, 337.678, 286.067, 380.475], 'page ': 1 }], 'type ': 'paragraph ', 'text ': 'It is estimated that [...] put these into context. ' },...], 'tables ': [{...},...], 'images ': [{...},...] }" + ], [ "quote", "quote", @@ -61403,6 +63167,27 @@ "[{...},...]", "[{...},...]" ], + [ + "sentence", + "improper", + 12813875992986832439, + "TEXT", + "#/texts/67", + 1.0, + 10716470035282398678, + 3845438819509552305, + null, + null, + 0, + 91, + 0, + 91, + 0, + 16, + true, + "in combination with their associated predicted (or human-annotated) layout semantic labels.", + "in combination with their associated predicted (or human-annotated) layout semantic labels." + ], [ "conn", "single-conn", @@ -61552,7 +63337,7 @@ ], [ "sentence", - "", + "proper", 12813875992986832439, "TEXT", "#/texts/67", @@ -61699,7 +63484,7 @@ ], [ "sentence", - "", + "proper", 12813875992986832439, "TEXT", "#/texts/67", @@ -61783,7 +63568,7 @@ ], [ "sentence", - "", + "proper", 11030869010407626539, "TEXT", "#/texts/68", @@ -61867,7 +63652,7 @@ ], [ "sentence", - "", + "proper", 11030869010407626539, "TEXT", "#/texts/68", @@ -62077,7 +63862,7 @@ ], [ "sentence", - "", + "proper", 11030869010407626539, "TEXT", "#/texts/68", @@ -62266,7 +64051,7 @@ ], [ "sentence", - "", + "proper", 11030869010407626539, "TEXT", "#/texts/68", @@ -62518,7 +64303,7 @@ ], [ "sentence", - "", + "proper", 11030869010407626539, "TEXT", "#/texts/68", @@ -62686,7 +64471,7 @@ ], [ "sentence", - "", + "proper", 11030869010407626539, "TEXT", "#/texts/68", @@ -62875,7 +64660,133 @@ ], [ "sentence", - "", + "improper", + 2142320548375900929, + "TEXT", + "#/texts/69", + 1.0, + 649350565569305499, + 18054658577706005032, + null, + null, + 2, + 61, + 2, + 61, + 1, + 8, + true, + "ARCHITECTURE AND ORCHESTRATION OF CLOUD BASED MICROSERVICES", + "ARCHITECTURE AND ORCHESTRATION OF CLOUD BASED MICROSERVICES" + ], + [ + "term", + "enum-term-mark-4", + 2142320548375900929, + "TEXT", + "#/texts/69", + 1.0, + 10848792430657515756, + 3471796174410053708, + null, + null, + 2, + 32, + 2, + 32, + 1, + 4, + true, + "ARCHITECTURE AND ORCHESTRATION", + "ARCHITECTURE AND ORCHESTRATION" + ], + [ + "term", + "single-term", + 2142320548375900929, + "TEXT", + "#/texts/69", + 1.0, + 13396680420363072591, + 5322706739862517961, + null, + null, + 2, + 14, + 2, + 14, + 1, + 2, + true, + "ARCHITECTURE", + "ARCHITECTURE" + ], + [ + "term", + "single-term", + 2142320548375900929, + "TEXT", + "#/texts/69", + 1.0, + 12374441967935089365, + 12879880928883366015, + null, + null, + 19, + 32, + 19, + 32, + 3, + 4, + true, + "ORCHESTRATION", + "ORCHESTRATION" + ], + [ + "conn", + "single-conn", + 2142320548375900929, + "TEXT", + "#/texts/69", + 1.0, + 15441160910541487726, + 2682264733047639861, + null, + null, + 33, + 35, + 33, + 35, + 4, + 5, + true, + "OF", + "OF" + ], + [ + "term", + "single-term", + 2142320548375900929, + "TEXT", + "#/texts/69", + 1.0, + 7446456195434763041, + 10893490412896720588, + null, + null, + 36, + 61, + 36, + 61, + 5, + 8, + true, + "CLOUD BASED MICROSERVICES", + "CLOUD BASED MICROSERVICES" + ], + [ + "sentence", + "proper", 12747011194397783283, "TEXT", "#/texts/70", @@ -63127,7 +65038,7 @@ ], [ "sentence", - "", + "proper", 12747011194397783283, "TEXT", "#/texts/70", @@ -63358,7 +65269,7 @@ ], [ "sentence", - "", + "proper", 12747011194397783283, "TEXT", "#/texts/70", @@ -63463,7 +65374,7 @@ ], [ "sentence", - "", + "proper", 12747011194397783283, "TEXT", "#/texts/70", @@ -63820,7 +65731,7 @@ ], [ "sentence", - "", + "proper", 12747011194397783283, "TEXT", "#/texts/70", @@ -64345,7 +66256,7 @@ ], [ "sentence", - "", + "proper", 12747011194397783283, "TEXT", "#/texts/70", @@ -64555,7 +66466,49 @@ ], [ "sentence", - "", + "improper", + 174789262945188010, + "TEXT", + "#/texts/71", + 1.0, + 1327769741348408014, + 12966876912839977056, + null, + null, + 4, + 19, + 4, + 19, + 3, + 5, + true, + "Platform layers", + "Platform layers" + ], + [ + "term", + "single-term", + 174789262945188010, + "TEXT", + "#/texts/71", + 1.0, + 1327769741348408014, + 12966876912839977056, + null, + null, + 4, + 19, + 4, + 19, + 3, + 5, + true, + "Platform layers", + "Platform layers" + ], + [ + "sentence", + "proper", 7228893318503650455, "TEXT", "#/texts/72", @@ -64826,6 +66779,27 @@ "documents", "documents" ], + [ + "sentence", + "improper", + 7228893318503650455, + "TEXT", + "#/texts/72", + 1.0, + 16186551996817221792, + 3648029315127422911, + null, + null, + 91, + 127, + 91, + 127, + 19, + 29, + true, + "In Figure 6, we show a sketch of its", + "In Figure 6, we show a sketch of its" + ], [ "conn", "single-conn", @@ -64952,6 +66926,27 @@ "of", "of" ], + [ + "sentence", + "improper", + 9230667184712205690, + "TEXT", + "#/texts/73", + 1.0, + 6795737645642343893, + 9165468484284022713, + null, + null, + 0, + 13, + 0, + 13, + 0, + 2, + true, + "architecture.", + "architecture." + ], [ "term", "single-term", @@ -64975,7 +66970,7 @@ ], [ "sentence", - "", + "proper", 9230667184712205690, "TEXT", "#/texts/73", @@ -65120,6 +67115,27 @@ "layers", "layers" ], + [ + "sentence", + "improper", + 9230667184712205690, + "TEXT", + "#/texts/73", + 1.0, + 1435105462728696001, + 8118258606096822065, + null, + null, + 80, + 97, + 80, + 97, + 16, + 20, + true, + "These layers are:", + "These layers are:" + ], [ "term", "single-term", @@ -65206,7 +67222,7 @@ ], [ "sentence", - "", + "proper", 17419815751432442882, "TEXT", "#/texts/74", @@ -65689,7 +67705,7 @@ ], [ "sentence", - "", + "proper", 17419815751432442882, "TEXT", "#/texts/74", @@ -65962,7 +67978,7 @@ ], [ "sentence", - "", + "proper", 11194226403360998426, "TEXT", "#/texts/75", @@ -66151,7 +68167,7 @@ ], [ "sentence", - "", + "proper", 11194226403360998426, "TEXT", "#/texts/75", @@ -66298,7 +68314,7 @@ ], [ "sentence", - "", + "proper", 11194226403360998426, "TEXT", "#/texts/75", @@ -66529,7 +68545,7 @@ ], [ "sentence", - "", + "proper", 11194226403360998426, "TEXT", "#/texts/75", @@ -67222,7 +69238,7 @@ ], [ "sentence", - "", + "proper", 11194226403360998426, "TEXT", "#/texts/75", @@ -67432,7 +69448,7 @@ ], [ "sentence", - "", + "proper", 9005324696118733701, "TEXT", "#/texts/76", @@ -67852,7 +69868,7 @@ ], [ "sentence", - "", + "proper", 9005324696118733701, "TEXT", "#/texts/76", @@ -68230,7 +70246,7 @@ ], [ "sentence", - "", + "proper", 9005324696118733701, "TEXT", "#/texts/76", @@ -68503,7 +70519,7 @@ ], [ "sentence", - "", + "proper", 9005324696118733701, "TEXT", "#/texts/76", @@ -68669,6 +70685,27 @@ "new ones", "new ones" ], + [ + "sentence", + "improper", + 9005324696118733701, + "TEXT", + "#/texts/76", + 1.0, + 8483868933235717643, + 12187788331648570798, + null, + null, + 580, + 613, + 580, + 613, + 115, + 122, + true, + "This is the case for the requests", + "This is the case for the requests" + ], [ "verb", "single-verb", @@ -68753,6 +70790,27 @@ "requests", "requests" ], + [ + "sentence", + "improper", + 8082547756621048511, + "TEXT", + "#/texts/77", + 1.0, + 4327050488414438041, + 5242140265994150891, + null, + null, + 0, + 30, + 0, + 30, + 0, + 6, + true, + "operating on the whole corpus.", + "operating on the whole corpus." + ], [ "verb", "single-verb", @@ -68818,7 +70876,7 @@ ], [ "sentence", - "", + "proper", 8082547756621048511, "TEXT", "#/texts/77", @@ -69070,7 +71128,7 @@ ], [ "sentence", - "", + "proper", 7791113385466815951, "TEXT", "#/texts/78", @@ -69826,7 +71884,7 @@ ], [ "sentence", - "", + "proper", 7791113385466815951, "TEXT", "#/texts/78", @@ -70141,7 +72199,7 @@ ], [ "sentence", - "", + "proper", 7791113385466815951, "TEXT", "#/texts/78", @@ -70645,7 +72703,7 @@ ], [ "sentence", - "", + "proper", 2845012065511066307, "TEXT", "#/texts/79", @@ -70939,7 +72997,7 @@ ], [ "sentence", - "", + "proper", 2845012065511066307, "TEXT", "#/texts/79", @@ -71315,6 +73373,27 @@ "RabbitMQ", "RabbitMQ" ], + [ + "sentence", + "improper", + 2845012065511066307, + "TEXT", + "#/texts/79", + 1.0, + 2526568585986604533, + 4796033821977691033, + null, + null, + 288, + 565, + 288, + 565, + 66, + 117, + true, + "Being a cloud-based platform, our solution allows for these software assets to be detached from the main deployment and to be served by specialised vendors services which are certified to the latest industry requirements such as data-at-rest encryption, high availability, etc.", + "Being a cloud-based platform, our solution allows for these software assets to be detached from the main deployment and to be served by specialised vendors services which are certified to the latest industry requirements such as data-at-rest encryption, high availability, etc." + ], [ "verb", "single-verb", @@ -71884,7 +73963,7 @@ ], [ "sentence", - "", + "proper", 15072914837937068796, "TEXT", "#/texts/80", @@ -72115,7 +74194,7 @@ ], [ "sentence", - "", + "proper", 15072914837937068796, "TEXT", "#/texts/80", @@ -72430,7 +74509,7 @@ ], [ "sentence", - "", + "proper", 15072914837937068796, "TEXT", "#/texts/80", @@ -72722,6 +74801,27 @@ "scaling bottlenecks", "scaling bottlenecks" ], + [ + "sentence", + "improper", + 15072914837937068796, + "TEXT", + "#/texts/80", + 1.0, + 15939592681427075311, + 12724765425882393373, + null, + null, + 398, + 605, + 398, + 605, + 73, + 111, + true, + "For example other result-backends didn't offer the auto-cleaning functionality offered by Redis and, before opting for a custom solution mixing MongoDB with an object storage, we evaluated other solutions as", + "For example other result-backends didn't offer the auto-cleaning functionality offered by Redis and, before opting for a custom solution mixing MongoDB with an object storage, we evaluated other solutions as" + ], [ "conn", "single-conn", @@ -73228,7 +75328,28 @@ ], [ "sentence", - "", + "improper", + 15263283599394646155, + "TEXT", + "#/texts/81", + 1.0, + 12178341415895529772, + 10663586828193627466, + null, + null, + 0, + 3, + 0, + 3, + 0, + 1, + true, + "the", + "the" + ], + [ + "sentence", + "proper", 15263283599394646155, "TEXT", "#/texts/81", @@ -73438,7 +75559,49 @@ ], [ "sentence", - "", + "improper", + 11417717357379295278, + "TEXT", + "#/texts/82", + 1.0, + 7182846004778473180, + 11133207535091681780, + null, + null, + 4, + 14, + 4, + 14, + 3, + 4, + true, + "Deployment", + "Deployment" + ], + [ + "term", + "single-term", + 11417717357379295278, + "TEXT", + "#/texts/82", + 1.0, + 7182846004778473180, + 11133207535091681780, + null, + null, + 4, + 14, + 4, + 14, + 3, + 4, + true, + "Deployment", + "Deployment" + ], + [ + "sentence", + "proper", 9031137420247852045, "TEXT", "#/texts/83", @@ -73774,7 +75937,7 @@ ], [ "sentence", - "", + "proper", 9031137420247852045, "TEXT", "#/texts/83", @@ -74005,7 +76168,7 @@ ], [ "sentence", - "", + "proper", 18436578077535696718, "TEXT", "#/texts/84", @@ -74152,7 +76315,7 @@ ], [ "sentence", - "", + "proper", 18436578077535696718, "TEXT", "#/texts/84", @@ -74362,7 +76525,7 @@ ], [ "sentence", - "", + "proper", 18436578077535696718, "TEXT", "#/texts/84", @@ -74866,7 +77029,7 @@ ], [ "sentence", - "", + "proper", 11734907767490759865, "TEXT", "#/texts/85", @@ -75286,7 +77449,7 @@ ], [ "sentence", - "", + "proper", 11734907767490759865, "TEXT", "#/texts/85", @@ -75412,7 +77575,7 @@ ], [ "sentence", - "", + "proper", 7845460979782401889, "TEXT", "#/texts/86", @@ -75895,7 +78058,7 @@ ], [ "sentence", - "", + "proper", 7845460979782401889, "TEXT", "#/texts/86", @@ -76105,7 +78268,7 @@ ], [ "sentence", - "", + "proper", 7845460979782401889, "TEXT", "#/texts/86", @@ -76336,7 +78499,70 @@ ], [ "sentence", - "", + "improper", + 17769988780693768120, + "TEXT", + "#/texts/87", + 1.0, + 12674958461648271097, + 14919215603836704456, + null, + null, + 4, + 22, + 4, + 22, + 3, + 5, + true, + "Scaling benchmarks", + "Scaling benchmarks" + ], + [ + "verb", + "single-verb", + 17769988780693768120, + "TEXT", + "#/texts/87", + 1.0, + 8106352781048289419, + 7608329028597452617, + null, + null, + 4, + 11, + 4, + 11, + 3, + 4, + true, + "Scaling", + "Scaling" + ], + [ + "term", + "single-term", + 17769988780693768120, + "TEXT", + "#/texts/87", + 1.0, + 7098563342517489092, + 5499562772807639261, + null, + null, + 12, + 22, + 12, + 22, + 4, + 5, + true, + "benchmarks", + "benchmarks" + ], + [ + "sentence", + "proper", 12387489643011067991, "TEXT", "#/texts/88", @@ -76483,7 +78709,7 @@ ], [ "sentence", - "", + "proper", 12387489643011067991, "TEXT", "#/texts/88", @@ -76943,6 +79169,27 @@ "compute resources", "compute resources" ], + [ + "sentence", + "improper", + 12387489643011067991, + "TEXT", + "#/texts/88", + 1.0, + 16951325966787702217, + 14182089789789694478, + null, + null, + 248, + 320, + 248, + 320, + 46, + 62, + true, + "In Figure 7, we show the number of users and the number of processed PDF", + "In Figure 7, we show the number of users and the number of processed PDF" + ], [ "conn", "single-conn", @@ -77174,6 +79421,27 @@ "PDF", "PDF" ], + [ + "sentence", + "improper", + 10375772475809458895, + "TEXT", + "#/texts/89", + 1.0, + 3763642508235824098, + 10404671326662849558, + null, + null, + 0, + 31, + 0, + 31, + 0, + 8, + true, + "pages 20 as a function of time.", + "pages 20 as a function of time." + ], [ "term", "single-term", @@ -77302,7 +79570,7 @@ ], [ "sentence", - "", + "proper", 10375772475809458895, "TEXT", "#/texts/89", @@ -77680,7 +79948,7 @@ ], [ "sentence", - "", + "proper", 10375772475809458895, "TEXT", "#/texts/89", @@ -78100,7 +80368,7 @@ ], [ "sentence", - "", + "proper", 10375772475809458895, "TEXT", "#/texts/89", @@ -78415,7 +80683,7 @@ ], [ "sentence", - "", + "proper", 7054726458191881751, "TEXT", "#/texts/90", @@ -79024,7 +81292,7 @@ ], [ "sentence", - "", + "proper", 7054726458191881751, "TEXT", "#/texts/90", @@ -79234,7 +81502,7 @@ ], [ "sentence", - "", + "proper", 7054726458191881751, "TEXT", "#/texts/90", @@ -79507,7 +81775,7 @@ ], [ "sentence", - "", + "proper", 7054726458191881751, "TEXT", "#/texts/90", @@ -79801,7 +82069,7 @@ ], [ "sentence", - "", + "proper", 7054726458191881751, "TEXT", "#/texts/90", @@ -80053,7 +82321,7 @@ ], [ "sentence", - "", + "proper", 7054726458191881751, "TEXT", "#/texts/90", @@ -80347,7 +82615,7 @@ ], [ "sentence", - "", + "proper", 7054726458191881751, "TEXT", "#/texts/90", @@ -80641,7 +82909,7 @@ ], [ "sentence", - "", + "proper", 7054726458191881751, "TEXT", "#/texts/90", @@ -81019,7 +83287,49 @@ ], [ "sentence", - "", + "improper", + 7794115281016062068, + "TEXT", + "#/texts/91", + 1.0, + 188451247496835434, + 15420269579743129515, + null, + null, + 2, + 12, + 2, + 12, + 1, + 2, + true, + "CONCLUSION", + "CONCLUSION" + ], + [ + "term", + "single-term", + 7794115281016062068, + "TEXT", + "#/texts/91", + 1.0, + 188451247496835434, + 15420269579743129515, + null, + null, + 2, + 12, + 2, + 12, + 1, + 2, + true, + "CONCLUSION", + "CONCLUSION" + ], + [ + "sentence", + "proper", 7038163015905900647, "TEXT", "#/texts/92", @@ -81502,7 +83812,7 @@ ], [ "sentence", - "", + "proper", 1508626318915838319, "TEXT", "#/texts/93", @@ -81670,7 +83980,7 @@ ], [ "sentence", - "", + "proper", 1508626318915838319, "TEXT", "#/texts/93", @@ -81733,7 +84043,7 @@ ], [ "sentence", - "", + "proper", 1508626318915838319, "TEXT", "#/texts/93", @@ -81964,7 +84274,7 @@ ], [ "sentence", - "", + "proper", 1508626318915838319, "TEXT", "#/texts/93", @@ -82489,7 +84799,7 @@ ], [ "sentence", - "", + "proper", 17247086344435786796, "TEXT", "#/texts/94", @@ -82657,7 +84967,7 @@ ], [ "sentence", - "", + "proper", 17247086344435786796, "TEXT", "#/texts/94", @@ -82888,7 +85198,7 @@ ], [ "sentence", - "", + "proper", 17247086344435786796, "TEXT", "#/texts/94", @@ -83245,7 +85555,7 @@ ], [ "sentence", - "", + "proper", 17247086344435786796, "TEXT", "#/texts/94", @@ -83581,7 +85891,7 @@ ], [ "sentence", - "", + "proper", 17247086344435786796, "TEXT", "#/texts/94", @@ -83749,7 +86059,7 @@ ], [ "sentence", - "", + "proper", 17247086344435786796, "TEXT", "#/texts/94", @@ -84146,6 +86456,27 @@ "5", "5" ], + [ + "sentence", + "improper", + 17247086344435786796, + "TEXT", + "#/texts/94", + 1.0, + 6472132817642437228, + 9623284840783718592, + null, + null, + 724, + 808, + 724, + 808, + 144, + 158, + true, + "To leverage this growing use of deep learning models, we will additionally introduce", + "To leverage this growing use of deep learning models, we will additionally introduce" + ], [ "conn", "single-conn", @@ -84293,6 +86624,27 @@ "introduce", "introduce" ], + [ + "sentence", + "improper", + 10287541089279789496, + "TEXT", + "#/texts/95", + 1.0, + 541003494147177743, + 2376460771711104984, + null, + null, + 0, + 11, + 0, + 11, + 0, + 1, + true, + "specialised", + "specialised" + ], [ "verb", "single-verb", @@ -84316,7 +86668,7 @@ ], [ "sentence", - "", + "proper", 10287541089279789496, "TEXT", "#/texts/95", @@ -84589,7 +86941,49 @@ ], [ "sentence", - "", + "improper", + 7819882792760965882, + "TEXT", + "#/texts/96", + 1.0, + 18322720810464861272, + 18075421416195196021, + null, + null, + 0, + 15, + 0, + 15, + 0, + 1, + true, + "ACKNOWLEDGMENTS", + "ACKNOWLEDGMENTS" + ], + [ + "term", + "single-term", + 7819882792760965882, + "TEXT", + "#/texts/96", + 1.0, + 18322720810464861272, + 18075421416195196021, + null, + null, + 0, + 15, + 0, + 15, + 0, + 1, + true, + "ACKNOWLEDGMENTS", + "ACKNOWLEDGMENTS" + ], + [ + "sentence", + "proper", 15983582675278266440, "TEXT", "#/texts/97", @@ -84862,7 +87256,7 @@ ], [ "sentence", - "", + "proper", 12711351442546714716, "TEXT", "#/texts/98", @@ -85093,7 +87487,7 @@ ], [ "sentence", - "", + "proper", 12711351442546714716, "TEXT", "#/texts/98", @@ -85513,7 +87907,7 @@ ], [ "reference", - "author", + "authors", 1225384713519841338, "TEXT", "#/texts/99", @@ -85534,108 +87928,45 @@ ], [ "reference", - "citation-number", + "reference-number", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 17767354399704235161, - 10735882159598120618, + 10735882159598120682, null, null, - 1, - 2, - 1, - 2, - 1, - 2, + 0, + 3, + 0, + 3, + 0, + 3, true, "1", - "1" + "[1]" ], [ "reference", - "author", + "authors", 1712774266196702392, "TEXT", "#/texts/100", 1.0, - 10921193442290853772, - 7808176325166967948, + 6803215666489738843, + 14280338454439813011, null, null, 4, - 21, - 4, - 21, - 3, - 6, - true, - "A. Antonacopoulos", - "A. Antonacopoulos" - ], - [ - "reference", - "author", - 1712774266196702392, - "TEXT", - "#/texts/100", - 1.0, - 5181382481262336037, - 5307751930075227018, - null, - null, - 23, - 34, - 23, - 34, - 7, - 10, - true, - "C. Clausner", - "C. Clausner" - ], - [ - "reference", - "author", - 1712774266196702392, - "TEXT", - "#/texts/100", - 1.0, - 18410882341323932977, - 3950678732393374894, - null, - null, - 36, - 51, - 36, - 51, - 11, - 14, - true, - "C. Papadopoulos", - "C. Papadopoulos" - ], - [ - "reference", - "author", - 1712774266196702392, - "TEXT", - "#/texts/100", - 1.0, - 6326253284428776844, - 2242368337149903292, - null, - null, - 57, 73, - 57, + 4, 73, - 16, + 3, 20, true, - "S. Pletschacher.", - "S. Pletschacher." + "A. Antonacopoulos, C. Clausner, C. Papadopoulos, and S. Pletschacher", + "A. Antonacopoulos, C. Clausner, C. Papadopoulos, and S. Pletschacher." ], [ "reference", @@ -85644,19 +87975,19 @@ "TEXT", "#/texts/100", 1.0, - 389609625548777059, - 4138332198474599496, + 10797770575721560862, + 6247087653733616816, null, null, 74, - 78, + 89, 74, - 78, + 89, 20, - 21, + 24, true, - "2015", - "2015" + "2015. ICDAR2015", + "2015. ICDAR2015" ], [ "reference", @@ -85665,134 +87996,92 @@ "TEXT", "#/texts/100", 1.0, - 17804212744220731295, - 13329383501201933373, + 1396556335874361340, + 396648570188163248, null, null, - 80, - 159, - 80, - 159, - 22, - 35, + 90, + 160, + 90, + 160, + 24, + 36, true, - "ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015", - "ICDAR2015 Competition on Recognition of Documents with Complex Layouts-RDCL2015" + "Competition on Recognition of Documents with Complex Layouts-RDCL2015", + "Competition on Recognition of Documents with Complex Layouts-RDCL2015." ], [ "reference", - "container-title", + "conference", 1712774266196702392, "TEXT", "#/texts/100", 1.0, - 2527079864200222812, - 474810476780653321, + 401381660143969539, + 5381086215452928630, null, null, 161, - 249, + 269, 161, - 249, + 269, 36, - 49, - true, - "In Proceedings of the 13th International Conference on Document Analysis and Recognition", - "In Proceedings of the 13th International Conference on Document Analysis and Recognition" - ], - [ - "reference", - "container-title", - 1712774266196702392, - "TEXT", - "#/texts/100", - 1.0, - 6558131902220562236, - 4761966619744782752, - null, - null, - 251, - 260, - 251, - 260, - 50, - 52, - true, - "ICDAR2015", - "ICDAR2015" - ], - [ - "reference", - "location", - 1712774266196702392, - "TEXT", - "#/texts/100", - 1.0, - 329104162200796337, - 14591806354842233425, - null, - null, - 263, - 268, - 263, - 268, - 54, - 55, + 56, true, - "Nancy", - "Nancy" + "In Proceedings of the 13th International Conference on Document Analysis and Recognition (ICDAR2015). Nancy", + "In Proceedings of the 13th International Conference on Document Analysis and Recognition (ICDAR2015). Nancy," ], [ "reference", - "date", + "pages", 1712774266196702392, "TEXT", "#/texts/100", 1.0, 10303630957638511768, - 3815340683710445282, + 3815340683710445283, null, null, 270, - 279, + 280, 270, - 279, + 280, 56, - 59, + 60, true, "1151-1155", - "1151-1155" + "1151-1155." ], [ "reference", - "citation-number", + "reference-number", 14718288547983000340, "TEXT", "#/texts/101", 1.0, 17767354399704235162, - 1208869658565501501, + 1208869658565501567, null, null, - 1, - 2, - 1, - 2, - 1, - 2, + 0, + 3, + 0, + 3, + 0, + 3, true, "2", - "2" + "[2]" ], [ "reference", - "author", + "authors", 14718288547983000340, "TEXT", "#/texts/101", 1.0, - 2649929445531557889, - 7202581822078924410, + 4182884638369411954, + 3549752055104827894, null, null, 4, @@ -85802,7 +88091,7 @@ 3, 6, true, - "Leo Breiman.", + "Leo Breiman", "Leo Breiman." ], [ @@ -85813,18 +88102,18 @@ "#/texts/101", 1.0, 389609625548757414, - 14515784463162085628, + 14515784463162085631, null, null, 17, - 21, + 22, 17, - 21, + 22, 6, - 7, + 8, true, "2001", - "2001" + "2001." ], [ "reference", @@ -85834,18 +88123,18 @@ "#/texts/101", 1.0, 2109081024677782429, - 14560503901773287747, + 14560503901773287746, null, null, 23, - 37, + 38, 23, - 37, + 38, 8, - 10, + 11, true, "Random Forests", - "Random Forests" + "Random Forests." ], [ "reference", @@ -85870,150 +88159,129 @@ ], [ "reference", - "date", + "volume", 14718288547983000340, "TEXT", "#/texts/101", 1.0, - 10551073428908397011, - 16087676618282063646, + 329104066357379118, + 10684994590363784595, null, null, - 63, - 74, - 63, - 74, - 17, - 20, + 56, + 61, + 56, + 61, + 13, + 16, true, - "01 Oct 2001", - "01 Oct 2001" + "45, 1", + "45, 1" ], [ "reference", - "url", + "date", 14718288547983000340, "TEXT", "#/texts/101", 1.0, - 1225079762841478321, - 13531790532415888950, - null, - null, - 83, - 122, - 83, - 122, - 26, - 41, - true, - "https://doi.org/10.1023/A:1010933404324", - "https://doi.org/10.1023/A:1010933404324" - ], - [ - "reference", - "citation-number", - 16943780574244090186, - "TEXT", - "#/texts/102", - 1.0, - 17767354399704235163, - 17460648837280544429, + 10551073428908397011, + 16087676618282063709, null, null, - 1, - 2, - 1, - 2, - 1, - 2, + 62, + 76, + 62, + 76, + 16, + 22, true, - "3", - "3" + "01 Oct 2001", + "(01 Oct 2001)," ], [ "reference", - "author", - 16943780574244090186, + "pages", + 14718288547983000340, "TEXT", - "#/texts/102", + "#/texts/101", 1.0, - 1401374873664364883, - 11647727014815681179, + 389609625655395305, + 14454171207833729212, null, null, - 4, - 14, - 4, - 14, - 3, - 6, + 77, + 82, + 77, + 82, + 22, + 26, true, - "R. Cattoni", - "R. Cattoni" + "5-32", + "5-32." ], [ "reference", - "author", - 16943780574244090186, + "url", + 14718288547983000340, "TEXT", - "#/texts/102", + "#/texts/101", 1.0, - 8489759580118410179, - 13292301803598722609, + 1225079762841478321, + 13531790532415888950, null, null, - 16, - 26, - 16, + 83, + 122, + 83, + 122, 26, - 7, - 10, + 41, true, - "T. Coianiz", - "T. Coianiz" + "https://doi.org/10.1023/A:1010933404324", + "https://doi.org/10.1023/A:1010933404324" ], [ "reference", - "author", + "reference-number", 16943780574244090186, "TEXT", "#/texts/102", 1.0, - 6842824740074268202, - 13861579202330443089, + 17767354399704235163, + 17460648837280544495, null, null, - 28, - 40, - 28, - 40, - 11, - 14, + 0, + 3, + 0, + 3, + 0, + 3, true, - "S. Messelodi", - "S. Messelodi" + "3", + "[3]" ], [ "reference", - "author", + "authors", 16943780574244090186, "TEXT", "#/texts/102", 1.0, - 3186691256225071720, - 5893020180892593571, + 287758023622786845, + 10192956804230263297, null, null, - 46, + 4, 59, - 46, + 4, 59, - 16, + 3, 22, true, - "C. M. Modena.", - "C. M. Modena." + "R. Cattoni, T. Coianiz, S. Messelodi, and C. M. Modena", + "R. Cattoni, T. Coianiz, S. Messelodi, and C. M. Modena." ], [ "reference", @@ -86023,18 +88291,18 @@ "#/texts/102", 1.0, 389609625536085742, - 14383425253514843049, + 14383425253514843050, null, null, 60, - 64, + 65, 60, - 64, + 65, 22, - 23, + 24, true, "1998", - "1998" + "1998." ], [ "reference", @@ -86044,186 +88312,60 @@ "#/texts/102", 1.0, 10272469742902868819, - 13721964765306049914, + 13721964765306049915, null, null, 66, - 145, + 146, 66, - 145, + 146, 24, - 35, + 36, true, "Geometric layout analysis techniques for document image understanding: a review", - "Geometric layout analysis techniques for document image understanding: a review" + "Geometric layout analysis techniques for document image understanding: a review." ], [ "reference", - "citation-number", + "reference-number", 8004985786049140169, "TEXT", "#/texts/103", 1.0, 17767354399704235156, - 16958274266322700811, + 16958274266322701001, null, null, - 1, - 2, - 1, - 2, - 1, - 2, + 0, + 3, + 0, + 3, + 0, + 3, true, "4", - "4" + "[4]" ], [ "reference", - "author", + "authors", 8004985786049140169, "TEXT", "#/texts/103", 1.0, - 17855541178416775013, - 15770720280543811824, + 15389519391925881720, + 1390714659311403659, null, null, 4, - 22, - 4, - 22, - 3, - 7, - true, - "Jean-Pierre Chanod", - "Jean-Pierre Chanod" - ], - [ - "reference", - "author", - 8004985786049140169, - "TEXT", - "#/texts/103", - 1.0, - 7554933550167443736, - 13411551703313480687, - null, - null, - 24, - 41, - 24, - 41, - 8, - 10, - true, - "Boris Chidlovskii", - "Boris Chidlovskii" - ], - [ - "reference", - "author", - 8004985786049140169, - "TEXT", - "#/texts/103", - 1.0, - 16299981998052668228, - 10120159009512117499, - null, - null, - 43, - 56, - 43, - 55, - 11, - 13, - true, - "Herv\u00e9 Dejean", - "Herv\u00e9 Dejean" - ], - [ - "reference", - "author", - 8004985786049140169, - "TEXT", - "#/texts/103", - 1.0, - 12186041413076963653, - 1815357622671572381, - null, - null, - 58, - 72, - 57, - 71, - 14, - 16, - true, - "Olivier Fambon", - "Olivier Fambon" - ], - [ - "reference", - "author", - 8004985786049140169, - "TEXT", - "#/texts/103", - 1.0, - 10757542349073996342, - 681372576460736923, - null, - null, - 74, - 91, - 73, - 88, - 17, - 19, - true, - "J\u00e9r\u00f4me Fuselier", - "J\u00e9r\u00f4me Fuselier" - ], - [ - "reference", - "author", - 8004985786049140169, - "TEXT", - "#/texts/103", - 1.0, - 17756104824925179897, - 12319066590629211102, - null, - null, - 93, - 108, - 90, - 105, - 20, - 22, - true, - "Thierry Jacquin", - "Thierry Jacquin" - ], - [ - "reference", - "author", - 8004985786049140169, - "TEXT", - "#/texts/103", - 1.0, - 12029578715874344754, - 13070806463269187443, - null, - null, - 114, 131, - 111, + 4, 128, - 24, + 3, 29, true, - "Jean-Luc Meunier.", - "Jean-Luc Meunier." + "Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier", + "Jean-Pierre Chanod, Boris Chidlovskii, Herv\u00e9 Dejean, Olivier Fambon, J\u00e9r\u00f4me Fuselier, Thierry Jacquin, and Jean-Luc Meunier." ], [ "reference", @@ -86233,18 +88375,18 @@ "#/texts/103", 1.0, 389609625548757410, - 11746200903899729970, + 11746200903899729973, null, null, 132, - 136, + 137, 129, - 133, + 134, 29, - 30, + 31, true, "2005", - "2005" + "2005." ], [ "reference", @@ -86254,60 +88396,39 @@ "#/texts/103", 1.0, 8741239478611349123, - 10862343017243987125, + 10862343017243987126, null, null, 138, - 190, + 191, 135, - 187, + 188, 31, - 40, + 41, true, "From Legacy Documents to XML: A Conversion Framework", - "From Legacy Documents to XML: A Conversion Framework" - ], - [ - "reference", - "location", - 8004985786049140169, - "TEXT", - "#/texts/103", - 1.0, - 13464702443011780443, - 8119228962970051206, - null, - null, - 201, - 238, - 198, - 235, - 42, - 48, - true, - "Berlin Heidelberg, Berlin, Heidelberg", - "Berlin Heidelberg, Berlin, Heidelberg" + "From Legacy Documents to XML: A Conversion Framework." ], [ "reference", - "url", + "pages", 8004985786049140169, "TEXT", "#/texts/103", 1.0, - 3534146179424153776, - 4685575908489947590, + 16380810033755625172, + 7099395661617449599, null, null, - 248, - 266, - 245, - 263, + 240, + 247, + 237, + 244, + 49, 53, - 62, true, - "https://doi.org/10", - "https://doi.org/10" + "92-103", + "92-103." ], [ "reference", @@ -86316,50 +88437,50 @@ "TEXT", "#/texts/103", 1.0, - 17297012968265468209, - 10114419193417306093, + 9115058383761225167, + 648438667166468655, null, null, - 267, + 248, 282, - 264, + 245, 279, - 63, + 53, 68, true, - "1007/11551362_9", - "1007/11551362_9" + "https://doi.org/10.1007/11551362_9", + "https://doi.org/10.1007/11551362_9" ], [ "reference", - "citation-number", + "reference-number", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 17767354399704235157, - 4269524292241754489, + 4269524292241754674, null, null, - 1, - 2, - 1, - 2, - 1, - 2, + 0, + 3, + 0, + 3, + 0, + 3, true, "5", - "5" + "[5]" ], [ "reference", - "author", + "authors", 12744546813104546377, "TEXT", "#/texts/104", 1.0, - 4582708537308058782, - 10402294110981991066, + 13123599834782083842, + 8538907007420179435, null, null, 4, @@ -86369,7 +88490,7 @@ 3, 6, true, - "Ross Girshick.", + "Ross Girshick", "Ross Girshick." ], [ @@ -86380,18 +88501,18 @@ "#/texts/104", 1.0, 389609625548777059, - 1587769393776818040, + 1587769393776818039, null, null, 19, - 23, + 24, 19, - 23, + 24, 6, - 7, + 8, true, "2015", - "2015" + "2015." ], [ "reference", @@ -86401,270 +88522,123 @@ "#/texts/104", 1.0, 15491004285883184028, - 17483261521377705764, + 17483261521377705765, null, null, 25, - 35, + 36, 25, - 35, + 36, 8, - 12, + 13, true, "Fast R-CNN", - "Fast R-CNN" + "Fast R-CNN." ], [ "reference", - "container-title", + "conference", 12744546813104546377, "TEXT", "#/texts/104", 1.0, - 9927524698181440404, - 1963067726741096427, + 363489350613285544, + 17497514254577536629, null, null, 37, - 112, + 131, 37, - 112, + 131, 13, - 24, - true, - "In Proceedings of the 2015 IEEE International Conference on Computer Vision", - "In Proceedings of the 2015 IEEE International Conference on Computer Vision" - ], - [ - "reference", - "container-title", - 12744546813104546377, - "TEXT", - "#/texts/104", - 1.0, - 389609625537760670, - 1654267914364558446, - null, - null, - 114, - 118, - 114, - 118, - 25, - 26, - true, - "ICCV", - "ICCV" - ], - [ - "reference", - "container-title", - 12744546813104546377, - "TEXT", - "#/texts/104", - 1.0, - 14650472600731532908, - 6597684399889991790, - null, - null, - 121, - 129, - 121, - 129, - 28, - 31, - true, - "ICCV '15", - "ICCV '15" - ], - [ - "reference", - "location", - 12744546813104546377, - "TEXT", - "#/texts/104", - 1.0, - 12788924170991110125, - 5659206141059843753, - null, - null, - 155, - 174, - 155, - 174, - 37, - 42, + 33, true, - "Washington, DC, USA", - "Washington, DC, USA" + "In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15", + "In Proceedings of the 2015 IEEE International Conference on Computer Vision (ICCV) (ICCV '15)." ], [ "reference", - "date", + "pages", 12744546813104546377, "TEXT", "#/texts/104", 1.0, 10303975503395430788, - 13846363068497305469, + 13846363068497305466, null, null, 176, - 185, + 186, 176, - 185, + 186, 43, - 46, + 47, true, "1440-1448", - "1440-1448" + "1440-1448." ], [ "reference", - "url", + "doi", 12744546813104546377, "TEXT", "#/texts/104", 1.0, - 3301781339572596013, - 17531137372088121631, + 8704287819835955947, + 1152182854074722114, null, null, 187, - 215, + 224, 187, - 215, + 224, 47, - 60, - true, - "https://doi.org/10.1109/ICCV", - "https://doi.org/10.1109/ICCV" - ], - [ - "reference", - "date", - 12744546813104546377, - "TEXT", - "#/texts/104", - 1.0, - 389609625548777059, - 1587769393776757579, - null, - null, - 216, - 220, - 216, - 220, - 61, - 62, + 64, true, - "2015", - "2015" + "https://doi.org/10.1109/ICCV.2015.169", + "https://doi.org/10.1109/ICCV.2015.169" ], [ "reference", - "citation-number", + "reference-number", 16061746189176848219, "TEXT", "#/texts/105", 1.0, 17767354399704235158, - 6179291161904875846, + 6179291161904875782, null, null, - 1, - 2, - 1, - 2, - 1, - 2, + 0, + 3, + 0, + 3, + 0, + 3, true, "6", - "6" + "[6]" ], [ "reference", - "author", + "authors", 16061746189176848219, "TEXT", "#/texts/105", 1.0, - 141995704861070506, - 4358412458884164235, + 11234219573053078726, + 1377369452494354948, null, null, 4, - 20, - 4, - 20, - 3, - 7, - true, - "Ross B. Girshick", - "Ross B. Girshick" - ], - [ - "reference", - "author", - 16061746189176848219, - "TEXT", - "#/texts/105", - 1.0, - 16700235966000105766, - 16857612526578801697, - null, - null, - 22, - 34, - 22, - 34, - 8, - 10, - true, - "Jeff Donahue", - "Jeff Donahue" - ], - [ - "reference", - "author", - 16061746189176848219, - "TEXT", - "#/texts/105", - 1.0, - 3125822382074464058, - 13386372949081827875, - null, - null, - 36, - 50, - 36, - 50, - 11, - 13, - true, - "Trevor Darrell", - "Trevor Darrell" - ], - [ - "reference", - "author", - 16061746189176848219, - "TEXT", - "#/texts/105", - 1.0, - 10076860098015848351, - 1698280748488935181, - null, - null, - 56, 71, - 56, + 4, 71, - 15, + 3, 18, true, - "Jitendra Malik.", - "Jitendra Malik." + "Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik", + "Ross B. Girshick, Jeff Donahue, Trevor Darrell, and Jitendra Malik." ], [ "reference", @@ -86674,18 +88648,18 @@ "#/texts/105", 1.0, 389609625548777061, - 894814354396885943, + 894814354396885942, null, null, 72, - 76, + 77, 72, - 76, + 77, 18, - 19, + 20, true, "2013", - "2013" + "2013." ], [ "reference", @@ -86695,18 +88669,18 @@ "#/texts/105", 1.0, 4208693923929480551, - 3754197794849426338, + 3754197794849426341, null, null, 78, - 158, + 159, 78, - 158, + 159, 20, - 30, + 31, true, "Rich feature hierarchies for accurate object detection and semantic segmentation", - "Rich feature hierarchies for accurate object detection and semantic segmentation" + "Rich feature hierarchies for accurate object detection and semantic segmentation." ], [ "reference", @@ -86731,7 +88705,7 @@ ], [ "reference", - "date", + "volume", 16061746189176848219, "TEXT", "#/texts/105", @@ -86758,186 +88732,102 @@ "#/texts/105", 1.0, 389609625548777061, - 894814354396890826, + 894814354396890504, null, null, - 180, - 184, - 180, - 184, - 38, - 39, + 179, + 186, + 179, + 186, + 37, + 41, true, "2013", - "2013" + "(2013)." ], [ "reference", - "citation-number", - 11872392946390819176, + "note", + 16061746189176848219, "TEXT", - "#/texts/106", + "#/texts/105", 1.0, - 17767354399704235159, - 2815502747956890639, - null, - null, - 1, - 2, - 1, - 2, - 1, - 2, - true, - "7", - "7" - ], - [ - "reference", - "author", - 11872392946390819176, - "TEXT", - "#/texts/106", - 1.0, - 8106351942713029604, - 15468997146309510455, - null, - null, - 4, - 11, - 4, - 11, - 3, - 5, - true, - "Wei Liu", - "Wei Liu" - ], - [ - "reference", - "author", - 11872392946390819176, - "TEXT", - "#/texts/106", - 1.0, - 7132768279271695, - 1832821379686674159, - null, - null, - 13, - 30, - 13, - 30, - 6, - 8, - true, - "Dragomir Anguelov", - "Dragomir Anguelov" - ], - [ - "reference", - "author", - 11872392946390819176, - "TEXT", - "#/texts/106", - 1.0, - 12871845148221275510, - 11451573001119547147, - null, - null, - 32, - 45, - 32, - 45, - 9, - 11, - true, - "Dumitru Erhan", - "Dumitru Erhan" - ], - [ - "reference", - "author", - 11872392946390819176, - "TEXT", - "#/texts/106", - 1.0, - 6963214204149412896, - 11905902671968880924, + 17983367136859093708, + 10181841351396540004, null, null, - 47, - 64, - 47, - 64, - 12, - 14, + 187, + 202, + 187, + 202, + 41, + 46, true, - "Christian Szegedy", - "Christian Szegedy" + "arXiv:1311.2524", + "arXiv:1311.2524" ], [ "reference", - "author", - 11872392946390819176, + "url", + 16061746189176848219, "TEXT", - "#/texts/106", + "#/texts/105", 1.0, - 1399468129531522089, - 15637271748350955016, + 7173935269710786926, + 11621494925214516432, null, null, - 66, - 76, - 66, - 76, - 15, - 17, + 203, + 233, + 203, + 233, + 46, + 59, true, - "Scott Reed", - "Scott Reed" + "http://arxiv.org/abs/1311.2524", + "http://arxiv.org/abs/1311.2524" ], [ "reference", - "author", + "reference-number", 11872392946390819176, "TEXT", "#/texts/106", 1.0, - 12712965187511148158, - 5061563798042056469, + 17767354399704235159, + 2815502747956890701, null, null, - 78, - 91, - 78, - 91, - 18, - 22, + 0, + 3, + 0, + 3, + 0, + 3, true, - "Cheng-Yang Fu", - "Cheng-Yang Fu" + "7", + "[7]" ], [ "reference", - "author", + "authors", 11872392946390819176, "TEXT", "#/texts/106", 1.0, - 3733048493609069913, - 12058083979397468329, + 3193523784680267563, + 3174963989484511174, null, null, - 97, + 4, 115, - 97, + 4, 115, - 24, + 3, 29, true, - "Alexander C. Berg.", - "Alexander C. Berg." + "Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C. Berg", + "Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, and Alexander C. Berg." ], [ "reference", @@ -86947,18 +88837,18 @@ "#/texts/106", 1.0, 389609625548777056, - 12418382060406794776, + 12418382060406794777, null, null, 116, - 120, + 121, 116, - 120, + 121, 29, - 30, + 31, true, "2016", - "2016" + "2016." ], [ "reference", @@ -86968,165 +88858,102 @@ "#/texts/106", 1.0, 10201684882899222639, - 16463858842282873959, + 16463858842282873958, null, null, 122, - 156, + 157, 122, - 156, + 157, 31, - 37, + 38, true, "SSD: Single Shot MultiBox Detector", - "SSD: Single Shot MultiBox Detector" + "SSD: Single Shot MultiBox Detector." ], [ "reference", - "location", + "pages", 11872392946390819176, "TEXT", "#/texts/106", 1.0, - 389609625536506042, - 12420143175742824125, + 329104147696968014, + 12309257817181187525, null, null, - 193, - 197, - 193, - 197, - 42, - 43, + 199, + 205, + 199, + 205, + 44, + 48, true, - "Cham", - "Cham" + "21-37", + "21-37." ], [ "reference", - "url", + "doi", 11872392946390819176, "TEXT", "#/texts/106", 1.0, - 3534146179424153776, - 1525705277889903310, + 3406939305301157505, + 3207939843081913311, null, null, 206, - 224, + 250, 206, - 224, + 250, 48, - 57, + 71, true, - "https://doi.org/10", - "https://doi.org/10" + "https://doi.org/10.1007/978-3-319-46448-0_2", + "https://doi.org/10. 1007/978-3-319-46448-0_2" ], [ "reference", - "citation-number", + "reference-number", 2956849475535726296, "TEXT", "#/texts/107", 1.0, 17767354399704235152, - 1329071568736778037, + 1329071568736777847, null, null, - 1, - 2, - 1, - 2, - 1, - 2, + 0, + 3, + 0, + 3, + 0, + 3, true, "8", - "8" + "[8]" ], [ "reference", - "author", + "authors", 2956849475535726296, "TEXT", "#/texts/107", 1.0, - 5088659084289352829, - 5811844525036759114, + 5765494399172242245, + 17236276175505838249, null, null, 4, - 17, - 4, - 17, - 3, - 5, - true, - "Joseph Redmon", - "Joseph Redmon" - ], - [ - "reference", - "author", - 2956849475535726296, - "TEXT", - "#/texts/107", - 1.0, - 417695209021750783, - 13441950925666715191, - null, - null, - 19, - 40, - 19, - 40, - 6, - 9, - true, - "Santosh Kumar Divvala", - "Santosh Kumar Divvala" - ], - [ - "reference", - "author", - 2956849475535726296, - "TEXT", - "#/texts/107", - 1.0, - 141995704861070506, - 13286696794844996383, - null, - null, - 42, - 58, - 42, - 58, - 10, - 14, - true, - "Ross B. Girshick", - "Ross B. Girshick" - ], - [ - "reference", - "author", - 2956849475535726296, - "TEXT", - "#/texts/107", - 1.0, - 16947174234018208722, - 13965552924856577071, - null, - null, - 64, 76, - 64, + 4, 76, - 16, + 3, 19, true, - "Ali Farhadi.", - "Ali Farhadi." + "Joseph Redmon, Santosh Kumar Divvala, Ross B. Girshick, and Ali Farhadi", + "Joseph Redmon, Santosh Kumar Divvala, Ross B. Girshick, and Ali Farhadi." ], [ "reference", @@ -87136,18 +88963,18 @@ "#/texts/107", 1.0, 389609625548777056, - 17837801987031958568, + 17837801987031958571, null, null, 77, - 81, + 82, 77, - 81, + 82, 19, - 20, + 21, true, "2016", - "2016" + "2016." ], [ "reference", @@ -87157,144 +88984,123 @@ "#/texts/107", 1.0, 5895818558987270699, - 2974553673873283962, + 2974553673873283961, null, null, 83, - 138, + 139, 83, - 138, + 139, 21, - 33, + 34, true, "You Only Look Once: Unified, Real-Time Object Detection", - "You Only Look Once: Unified, Real-Time Object Detection" + "You Only Look Once: Unified, Real-Time Object Detection." ], [ "reference", - "container-title", + "conference", 2956849475535726296, "TEXT", "#/texts/107", 1.0, - 17631274803144515959, - 18105892991402137032, + 3250630386837467209, + 6654509023390656317, null, null, 140, - 203, + 210, 140, - 203, + 210, 34, - 43, + 46, true, - "2016 IEEE Conference on Computer Vision and Pattern Recognition", - "2016 IEEE Conference on Computer Vision and Pattern Recognition" + "2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR", + "2016 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)" ], [ "reference", - "container-title", + "date", 2956849475535726296, "TEXT", "#/texts/107", 1.0, - 389609625526699487, - 17849764824838617245, + 389609625548777056, + 17837801987031983302, null, null, - 205, - 209, - 205, - 209, - 44, - 45, + 211, + 218, + 211, + 218, + 46, + 50, true, - "CVPR", - "CVPR" + "2016", + "(2016)," ], [ "reference", - "date", + "pages", 2956849475535726296, "TEXT", "#/texts/107", 1.0, - 389609625548777056, - 17837801987031982734, + 8104408789271407267, + 9641140559480270339, null, null, - 212, - 216, - 212, - 216, - 47, - 48, + 219, + 227, + 219, + 227, + 50, + 54, true, - "2016", - "2016" + "779-788", + "779-788." ], [ "reference", - "citation-number", + "reference-number", 6623297047995432604, "TEXT", "#/texts/108", 1.0, 17767354399704235153, - 12707124795166019365, + 12707124795166019429, null, null, - 1, - 2, - 1, - 2, - 1, - 2, + 0, + 3, + 0, + 3, + 0, + 3, true, "9", - "9" + "[9]" ], [ "reference", - "author", + "authors", 6623297047995432604, "TEXT", "#/texts/108", 1.0, - 5088659084289352829, - 16235259739729085297, + 14673090853528677233, + 3576487261407722712, null, null, 4, - 17, - 4, - 17, - 3, - 5, - true, - "Joseph Redmon", - "Joseph Redmon" - ], - [ - "reference", - "author", - 6623297047995432604, - "TEXT", - "#/texts/108", - 1.0, - 16947174234018208722, - 7021580680610188634, - null, - null, - 22, 34, - 22, + 4, 34, - 6, + 3, 9, true, - "Ali Farhadi.", - "Ali Farhadi." + "Joseph Redmon and Ali Farhadi", + "Joseph Redmon and Ali Farhadi." ], [ "reference", @@ -87304,144 +89110,123 @@ "#/texts/108", 1.0, 389609625548777056, - 2625243571990787508, + 2625243571990787509, null, null, 35, - 39, + 40, 35, - 39, + 40, 9, - 10, + 11, true, "2016", - "2016" + "2016." ], [ "reference", - "date", + "title", 6623297047995432604, "TEXT", "#/texts/108", 1.0, - 389609625548777056, - 2625243571990783197, + 7902896717348718078, + 10069511086613482031, null, null, - 110, - 114, - 110, - 114, - 28, - 29, - true, - "2016", - "2016" - ], - [ - "reference", - "citation-number", - 2507285765516108280, - "TEXT", - "#/texts/109", - 1.0, - 15441160910541481982, - 10018948798042409332, - null, - null, - 1, - 3, - 1, - 3, - 1, - 2, + 41, + 76, + 41, + 76, + 11, + 20, true, - "10", - "10" + "YOLO9000: Better, Faster, Stronger", + "YOLO9000: Better, Faster, Stronger." ], [ "reference", - "author", - 2507285765516108280, + "note", + 6623297047995432604, "TEXT", - "#/texts/109", + "#/texts/108", 1.0, - 9337887504118347047, - 4966377796769374289, + 3451161195694319017, + 4884487754360265520, null, null, - 5, - 17, - 5, - 17, - 3, - 5, + 77, + 108, + 77, + 108, + 20, + 27, true, - "Shaoqing Ren", - "Shaoqing Ren" + "arXiv:1612.08242", + "arXiv preprint arXiv:1612.08242" ], [ "reference", - "author", - 2507285765516108280, + "date", + 6623297047995432604, "TEXT", - "#/texts/109", + "#/texts/108", 1.0, - 7339447509685488310, - 1490181006860316744, + 389609625548777056, + 2625243571990783774, null, null, - 19, - 29, - 19, - 29, - 6, - 8, + 109, + 116, + 109, + 116, + 27, + 31, true, - "Kaiming He", - "Kaiming He" + "2016", + "(2016)." ], [ "reference", - "author", + "reference-number", 2507285765516108280, "TEXT", "#/texts/109", 1.0, - 13123599834782083842, - 7292467665049010344, + 15441160910541481982, + 10018948798042409140, null, null, - 31, - 44, - 31, - 44, - 9, - 11, + 0, + 4, + 0, + 4, + 0, + 3, true, - "Ross Girshick", - "Ross Girshick" + "10", + "[10]" ], [ "reference", - "author", + "authors", 2507285765516108280, "TEXT", "#/texts/109", 1.0, - 2904781337729160811, - 16221483782846728585, + 15484848138159173238, + 2901960744210333260, null, null, - 50, + 5, 59, - 50, + 5, 59, - 13, + 3, 16, true, - "Jian Sun.", - "Jian Sun." + "Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun", + "Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun." ], [ "reference", @@ -87451,18 +89236,18 @@ "#/texts/109", 1.0, 389609625548777059, - 1924763351573441882, + 1924763351573441885, null, null, 60, - 64, + 65, 60, - 64, + 65, 16, - 17, + 18, true, "2015", - "2015" + "2015." ], [ "reference", @@ -87472,165 +89257,123 @@ "#/texts/109", 1.0, 695901516261617265, - 14331097264748910677, + 14331097264748910678, null, null, 66, - 144, + 145, 66, - 144, + 145, 18, - 33, + 34, true, "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks", - "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" + "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks." ], [ "reference", - "container-title", + "conference", 2507285765516108280, "TEXT", "#/texts/109", 1.0, 17791264228691503041, - 2574823334558986016, + 2574823334558986031, null, null, 146, - 201, + 202, 146, - 201, + 202, 34, - 42, + 43, true, "In Advances in Neural Information Processing Systems 28", - "In Advances in Neural Information Processing Systems 28" + "In Advances in Neural Information Processing Systems 28," ], [ "reference", - "url", + "pages", 2507285765516108280, "TEXT", "#/texts/109", 1.0, - 3374974501831695503, - 17450904193872703176, + 329104147624368040, + 1170990603960953396, null, null, - 309, - 420, - 309, - 420, + 302, + 308, + 302, + 308, + 78, 82, - 119, true, - "http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks", - "http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks" + "91-99", + "91-99." ], [ "reference", - "citation-number", - 14905276480471286920, + "doi", + 2507285765516108280, "TEXT", - "#/texts/110", + "#/texts/109", 1.0, - 15441160910541481983, - 9122823161539738610, + 10238575549097958729, + 17712172308012161849, null, null, - 1, - 3, - 1, - 3, - 1, - 2, + 309, + 425, + 309, + 425, + 82, + 121, true, - "11", - "11" + "http://papers.nips.cc/paper/5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks.pdf", + "http://papers.nips.cc/paper/ 5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks. pdf" ], [ "reference", - "author", + "reference-number", 14905276480471286920, "TEXT", "#/texts/110", 1.0, - 4686361850733567621, - 5253767773577297512, + 15441160910541481983, + 9122823161539738418, null, null, - 5, - 20, - 5, - 20, + 0, + 4, + 0, + 4, + 0, 3, - 7, - true, - "Peter W J Staar", - "Peter W J Staar" - ], - [ - "reference", - "author", - 14905276480471286920, - "TEXT", - "#/texts/110", - 1.0, - 1571808557594152175, - 1746337992895366641, - null, - null, - 22, - 35, - 22, - 35, - 8, - 10, - true, - "Michele Dolfi", - "Michele Dolfi" - ], - [ - "reference", - "author", - 14905276480471286920, - "TEXT", - "#/texts/110", - 1.0, - 9737597816447750448, - 2973540942666074124, - null, - null, - 37, - 51, - 37, - 51, - 11, - 13, true, - "Christoph Auer", - "Christoph Auer" + "11", + "[11]" ], [ "reference", - "author", + "authors", 14905276480471286920, "TEXT", "#/texts/110", 1.0, - 13732913329338511598, - 166477832047526898, + 8071600523918303141, + 17370433855038416232, null, null, - 57, + 5, 70, - 57, + 5, 70, - 15, + 3, 18, true, - "Costas Bekas.", - "Costas Bekas." + "Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas", + "Peter W J Staar, Michele Dolfi, Christoph Auer, and Costas Bekas." ], [ "reference", @@ -87640,18 +89383,18 @@ "#/texts/110", 1.0, 389609625548777054, - 16555452686088781228, + 16555452686088781227, null, null, 71, - 75, + 76, 71, - 75, + 76, 18, - 19, + 20, true, "2018", - "2018" + "2018." ], [ "reference", @@ -87661,18 +89404,18 @@ "#/texts/110", 1.0, 16083247419427271197, - 18033265608713009513, + 18033265608713009512, null, null, 77, - 133, + 134, 77, - 133, + 134, 20, - 28, + 29, true, "Corpus Conversion Service poster at the SysML conference", - "Corpus Conversion Service poster at the SysML conference" + "Corpus Conversion Service poster at the SysML conference." ], [ "reference", @@ -87681,8 +89424,8 @@ "TEXT", "#/texts/110", 1.0, - 18429963590603622561, - 12432928173216692023, + 2295735467267412578, + 3825294483095122475, null, null, 135, @@ -87692,7 +89435,7 @@ 29, 44, true, - "http://www.sysml.cc/doc/ 76.pdf", + "http://www.sysml.cc/doc/76.pdf", "http://www.sysml.cc/doc/ 76.pdf" ], [ diff --git a/tests/data/docs/doc_01.nlp.json b/tests/data/docs/doc_01.nlp.json index 1c6d5450..a1c8e412 100644 --- a/tests/data/docs/doc_01.nlp.json +++ b/tests/data/docs/doc_01.nlp.json @@ -1803,6 +1803,153 @@ "1", "1" ], + [ + "sentence", + "improper", + 2144509362215609527, + "TEXT", + "#/texts/0", + 1.0, + 16381206540184854990, + 14425920664139507693, + null, + null, + 0, + 6, + 0, + 6, + 0, + 1, + true, + "LETTER", + "LETTER" + ], + [ + "term", + "single-term", + 2144509362215609527, + "TEXT", + "#/texts/0", + 1.0, + 16381206540184854990, + 14425920664139507693, + null, + null, + 0, + 6, + 0, + 6, + 0, + 1, + true, + "LETTER", + "LETTER" + ], + [ + "sentence", + "improper", + 16672720454366774824, + "TEXT", + "#/texts/1", + 1.0, + 4375081646508065875, + 17937643764841871217, + null, + null, + 0, + 97, + 0, + 97, + 0, + 15, + true, + "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", + "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora" + ], + [ + "term", + "single-term", + 16672720454366774824, + "TEXT", + "#/texts/1", + 1.0, + 1821123588367592853, + 16426525811825614286, + null, + null, + 0, + 25, + 0, + 25, + 0, + 3, + true, + "Corpus processing service", + "Corpus processing service" + ], + [ + "term", + "single-term", + 16672720454366774824, + "TEXT", + "#/texts/1", + 1.0, + 12981440865159980116, + 11006111043094060810, + null, + null, + 29, + 53, + 29, + 53, + 5, + 8, + true, + "Knowledge Graph platform", + "Knowledge Graph platform" + ], + [ + "term", + "single-term", + 16672720454366774824, + "TEXT", + "#/texts/1", + 1.0, + 13671659409933113155, + 11123384186454545195, + null, + null, + 65, + 86, + 65, + 86, + 10, + 13, + true, + "deep data exploration", + "deep data exploration" + ], + [ + "term", + "single-term", + 16672720454366774824, + "TEXT", + "#/texts/1", + 1.0, + 8106398483106473371, + 17881878960560572679, + null, + null, + 90, + 97, + 90, + 97, + 14, + 15, + true, + "corpora", + "corpora" + ], [ "name", "person-name", @@ -1826,7 +1973,196 @@ ], [ "sentence", - "", + "improper", + 16781763356419781679, + "TEXT", + "#/texts/2", + 1.0, + 4049808513512976982, + 3005455696801338121, + null, + null, + 0, + 17, + 0, + 17, + 0, + 6, + true, + "Peter W. J. Staar", + "Peter W. J. Staar" + ], + [ + "sentence", + "improper", + 3352447812305581329, + "TEXT", + "#/texts/3", + 1.0, + 17767354399704232748, + 4745352077475809797, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "|", + "|" + ], + [ + "sentence", + "improper", + 14877831450145300436, + "TEXT", + "#/texts/4", + 1.0, + 1571808557594152175, + 17012149340953235049, + null, + null, + 0, + 13, + 0, + 13, + 0, + 2, + true, + "Michele Dolfi", + "Michele Dolfi" + ], + [ + "sentence", + "improper", + 3352447812305581329, + "TEXT", + "#/texts/5", + 1.0, + 17767354399704232748, + 4745352077475809797, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "|", + "|" + ], + [ + "sentence", + "improper", + 13336841394978214677, + "TEXT", + "#/texts/6", + 1.0, + 9737597816447750448, + 8444331438348317113, + null, + null, + 0, + 14, + 0, + 14, + 0, + 2, + true, + "Christoph Auer", + "Christoph Auer" + ], + [ + "sentence", + "improper", + 15325526562897377208, + "TEXT", + "#/texts/7", + 1.0, + 3204757815416943811, + 763233274214000765, + null, + null, + 0, + 38, + 0, + 38, + 0, + 6, + true, + "IBM Research, Rueschlikon, Switzerland", + "IBM Research, Rueschlikon, Switzerland" + ], + [ + "term", + "single-term", + 15325526562897377208, + "TEXT", + "#/texts/7", + 1.0, + 16114797969310195405, + 5003108984492695015, + null, + null, + 0, + 12, + 0, + 12, + 0, + 2, + true, + "IBM Research", + "IBM Research" + ], + [ + "term", + "single-term", + 15325526562897377208, + "TEXT", + "#/texts/7", + 1.0, + 13928399879966460166, + 6391346902522156792, + null, + null, + 14, + 25, + 14, + 25, + 3, + 4, + true, + "Rueschlikon", + "Rueschlikon" + ], + [ + "term", + "single-term", + 15325526562897377208, + "TEXT", + "#/texts/7", + 1.0, + 2664439525053388608, + 9463372035423896361, + null, + null, + 27, + 38, + 27, + 38, + 5, + 6, + true, + "Switzerland", + "Switzerland" + ], + [ + "sentence", + "proper", 4017434568255781081, "TEXT", "#/texts/8", @@ -1992,6 +2328,27 @@ "Switzerland", "Switzerland" ], + [ + "sentence", + "improper", + 4017434568255781081, + "TEXT", + "#/texts/8", + 1.0, + 16381206541204198479, + 1509528765771495324, + null, + null, + 96, + 102, + 96, + 102, + 19, + 21, + true, + "Email:", + "Email:" + ], [ "term", "single-term", @@ -2036,7 +2393,28 @@ ], [ "sentence", - "", + "improper", + 8487024695951375934, + "TEXT", + "#/texts/9", + 1.0, + 14650447666970618949, + 1661150218301863544, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "Abstract", + "Abstract" + ], + [ + "sentence", + "proper", 11695737263227886476, "TEXT", "#/texts/10", @@ -2141,7 +2519,7 @@ ], [ "sentence", - "", + "proper", 11695737263227886476, "TEXT", "#/texts/10", @@ -2288,7 +2666,7 @@ ], [ "sentence", - "", + "proper", 11695737263227886476, "TEXT", "#/texts/10", @@ -2540,7 +2918,7 @@ ], [ "sentence", - "", + "proper", 11695737263227886476, "TEXT", "#/texts/10", @@ -2687,7 +3065,7 @@ ], [ "sentence", - "", + "proper", 11695737263227886476, "TEXT", "#/texts/10", @@ -2813,7 +3191,7 @@ ], [ "sentence", - "", + "proper", 11695737263227886476, "TEXT", "#/texts/10", @@ -3023,7 +3401,7 @@ ], [ "sentence", - "", + "proper", 11695737263227886476, "TEXT", "#/texts/10", @@ -3128,7 +3506,7 @@ ], [ "sentence", - "", + "proper", 11695737263227886476, "TEXT", "#/texts/10", @@ -3191,7 +3569,7 @@ ], [ "sentence", - "", + "proper", 11695737263227886476, "TEXT", "#/texts/10", @@ -3296,7 +3674,7 @@ ], [ "sentence", - "", + "proper", 11695737263227886476, "TEXT", "#/texts/10", @@ -3380,7 +3758,7 @@ ], [ "sentence", - "", + "proper", 11695737263227886476, "TEXT", "#/texts/10", @@ -3609,6 +3987,48 @@ "gas industry", "gas industry" ], + [ + "sentence", + "improper", + 8500733160758672230, + "TEXT", + "#/texts/11", + 1.0, + 14650267244735310237, + 10335481804989336458, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "KEYWORDS", + "KEYWORDS" + ], + [ + "sentence", + "improper", + 4452030907228745864, + "TEXT", + "#/texts/12", + 1.0, + 243147861724212659, + 2926802342881658249, + null, + null, + 0, + 53, + 0, + 53, + 0, + 8, + true, + "document processing, knowledge graph, semantic search", + "document processing, knowledge graph, semantic search" + ], [ "reference", "title", @@ -3630,6 +4050,69 @@ "document processing, knowledge graph, semantic search", "document processing, knowledge graph, semantic search" ], + [ + "term", + "single-term", + 4452030907228745864, + "TEXT", + "#/texts/12", + 1.0, + 16912782321081403463, + 5557299811823150638, + null, + null, + 0, + 19, + 0, + 19, + 0, + 2, + true, + "document processing", + "document processing" + ], + [ + "term", + "single-term", + 4452030907228745864, + "TEXT", + "#/texts/12", + 1.0, + 11554018427322248517, + 11890723026510958271, + null, + null, + 21, + 36, + 21, + 36, + 3, + 5, + true, + "knowledge graph", + "knowledge graph" + ], + [ + "term", + "single-term", + 4452030907228745864, + "TEXT", + "#/texts/12", + 1.0, + 17144398259818337595, + 9555084629540376732, + null, + null, + 38, + 53, + 38, + 53, + 6, + 8, + true, + "semantic search", + "semantic search" + ], [ "numval", "ival", @@ -3653,7 +4136,49 @@ ], [ "sentence", - "", + "improper", + 11913688961435238004, + "TEXT", + "#/texts/13", + 1.0, + 533161484238803215, + 18030457052149769745, + null, + null, + 2, + 16, + 2, + 16, + 1, + 3, + true, + "| INTRODUCTION", + "| INTRODUCTION" + ], + [ + "term", + "single-term", + 11913688961435238004, + "TEXT", + "#/texts/13", + 1.0, + 8523954622022126279, + 16214061308619757979, + null, + null, + 4, + 16, + 4, + 16, + 2, + 3, + true, + "INTRODUCTION", + "INTRODUCTION" + ], + [ + "sentence", + "proper", 9977041563469582014, "TEXT", "#/texts/14", @@ -3779,7 +4304,7 @@ ], [ "sentence", - "", + "proper", 9977041563469582014, "TEXT", "#/texts/14", @@ -3842,7 +4367,7 @@ ], [ "sentence", - "", + "proper", 9977041563469582014, "TEXT", "#/texts/14", @@ -4010,7 +4535,7 @@ ], [ "sentence", - "", + "proper", 9977041563469582014, "TEXT", "#/texts/14", @@ -4115,7 +4640,7 @@ ], [ "sentence", - "", + "proper", 9977041563469582014, "TEXT", "#/texts/14", @@ -4365,6 +4890,27 @@ "2", "2" ], + [ + "sentence", + "improper", + 4361549266817300114, + "TEXT", + "#/texts/15", + 1.0, + 15441160910541485670, + 7890928514636693241, + null, + null, + 1, + 3, + 1, + 3, + 1, + 2, + true, + "of", + "of" + ], [ "numval", "ival", @@ -4388,7 +4934,7 @@ ], [ "sentence", - "", + "proper", 8425126282903547933, "TEXT", "#/texts/16", @@ -4514,7 +5060,7 @@ ], [ "sentence", - "", + "proper", 8425126282903547933, "TEXT", "#/texts/16", @@ -4829,7 +5375,7 @@ ], [ "sentence", - "", + "proper", 8425126282903547933, "TEXT", "#/texts/16", @@ -4976,7 +5522,7 @@ ], [ "sentence", - "", + "proper", 8425126282903547933, "TEXT", "#/texts/16", @@ -5186,7 +5732,28 @@ ], [ "sentence", - "", + "improper", + 8425126282903547933, + "TEXT", + "#/texts/16", + 1.0, + 17767354399704235143, + 14071188582173006670, + null, + null, + 581, + 582, + 581, + 582, + 114, + 115, + true, + "'", + "'" + ], + [ + "sentence", + "proper", 8425126282903547933, "TEXT", "#/texts/16", @@ -5291,7 +5858,7 @@ ], [ "sentence", - "", + "proper", 8425126282903547933, "TEXT", "#/texts/16", @@ -5417,7 +5984,7 @@ ], [ "sentence", - "", + "proper", 8425126282903547933, "TEXT", "#/texts/16", @@ -5606,7 +6173,7 @@ ], [ "sentence", - "", + "proper", 8425126282903547933, "TEXT", "#/texts/16", @@ -5774,7 +6341,7 @@ ], [ "sentence", - "", + "proper", 16507313240019459642, "TEXT", "#/texts/17", @@ -5879,7 +6446,7 @@ ], [ "sentence", - "", + "proper", 16507313240019459642, "TEXT", "#/texts/17", @@ -6173,7 +6740,7 @@ ], [ "sentence", - "", + "proper", 16507313240019459642, "TEXT", "#/texts/17", @@ -6362,7 +6929,7 @@ ], [ "sentence", - "", + "proper", 16507313240019459642, "TEXT", "#/texts/17", @@ -6488,7 +7055,28 @@ ], [ "sentence", - "", + "improper", + 16507313240019459642, + "TEXT", + "#/texts/17", + 1.0, + 17767354399704235138, + 4390211557450324078, + null, + null, + 596, + 597, + 596, + 597, + 109, + 110, + true, + "*", + "*" + ], + [ + "sentence", + "proper", 16507313240019459642, "TEXT", "#/texts/17", @@ -6654,6 +7242,27 @@ "actual deep data exploration", "actual deep data exploration" ], + [ + "sentence", + "improper", + 16507313240019459642, + "TEXT", + "#/texts/17", + 1.0, + 14823265707172761733, + 13886506062245764968, + null, + null, + 787, + 836, + 787, + 836, + 146, + 156, + true, + "Those datasets can then be used for further anal-", + "Those datasets can then be used for further anal-" + ], [ "term", "single-term", @@ -6698,7 +7307,196 @@ ], [ "sentence", - "", + "improper", + 7900229969942228522, + "TEXT", + "#/texts/18", + 1.0, + 12931323242585971793, + 5584903039670660830, + null, + null, + 0, + 69, + 0, + 69, + 0, + 13, + true, + "ysis, which might lead to new discoveries or support decision making.", + "ysis, which might lead to new discoveries or support decision making." + ], + [ + "term", + "single-term", + 7900229969942228522, + "TEXT", + "#/texts/18", + 1.0, + 389609625740596187, + 6401258763499079935, + null, + null, + 0, + 4, + 0, + 4, + 0, + 1, + true, + "ysis", + "ysis" + ], + [ + "term", + "single-term", + 7900229969942228522, + "TEXT", + "#/texts/18", + 1.0, + 13137373831138315414, + 5309524059360002790, + null, + null, + 26, + 41, + 26, + 41, + 6, + 8, + true, + "new discoveries", + "new discoveries" + ], + [ + "term", + "single-term", + 7900229969942228522, + "TEXT", + "#/texts/18", + 1.0, + 12230352936556214642, + 5916258249560511303, + null, + null, + 45, + 68, + 45, + 68, + 9, + 12, + true, + "support decision making", + "support decision making" + ], + [ + "sentence", + "improper", + 10081303962589804251, + "TEXT", + "#/texts/19", + 1.0, + 6426882630003520482, + 13470221577302260209, + null, + null, + 0, + 101, + 0, + 101, + 0, + 16, + true, + "To better distinguish this approach from conventional search, let us consider some example questions:", + "To better distinguish this approach from conventional search, let us consider some example questions:" + ], + [ + "term", + "single-term", + 10081303962589804251, + "TEXT", + "#/texts/19", + 1.0, + 14650448032998792781, + 13384454628370813432, + null, + null, + 27, + 35, + 27, + 35, + 4, + 5, + true, + "approach", + "approach" + ], + [ + "term", + "single-term", + 10081303962589804251, + "TEXT", + "#/texts/19", + 1.0, + 11445917366009547553, + 2007466505850842682, + null, + null, + 41, + 60, + 41, + 60, + 6, + 8, + true, + "conventional search", + "conventional search" + ], + [ + "term", + "single-term", + 10081303962589804251, + "TEXT", + "#/texts/19", + 1.0, + 8440910223051288106, + 3622565067267193267, + null, + null, + 83, + 100, + 83, + 100, + 13, + 15, + true, + "example questions", + "example questions" + ], + [ + "sentence", + "improper", + 12186698460099365002, + "TEXT", + "#/texts/20", + 1.0, + 15441160910541486976, + 4998800377492731468, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "a.", + "a." + ], + [ + "sentence", + "proper", 12186698460099365002, "TEXT", "#/texts/20", @@ -6759,30 +7557,51 @@ "high temperature superconductor", "high temperature superconductor" ], + [ + "sentence", + "improper", + 14190244699299580163, + "TEXT", + "#/texts/21", + 1.0, + 15441160910541486912, + 10781177312015720536, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "b.", + "b." + ], [ "reference", - "citation-number", + "authors", 14190244699299580163, "TEXT", "#/texts/21", 1.0, 17767354399704235210, - 16574868136523762017, + 16574868136523762016, null, null, 0, - 1, + 2, 0, - 1, + 2, 0, - 1, + 2, true, "b", - "b" + "b." ], [ "sentence", - "", + "proper", 14190244699299580163, "TEXT", "#/texts/21", @@ -6808,19 +7627,19 @@ "TEXT", "#/texts/21", 1.0, - 9702659979962978705, - 13607072099360924822, + 5938585443202988569, + 13176291235909531708, null, null, 3, - 44, + 39, 3, - 44, + 39, 2, - 10, + 9, true, - "Publications of before year 2010", - "Publications of before year 2010" + "Publications of before year", + "Publications of before year" ], [ "term", @@ -6908,7 +7727,28 @@ ], [ "reference", - "author", + "date", + 14190244699299580163, + "TEXT", + "#/texts/21", + 1.0, + 389609625548777062, + 16322066304153845813, + null, + null, + 40, + 45, + 40, + 45, + 9, + 11, + true, + "2010", + "2010." + ], + [ + "sentence", + "improper", 1376279050886549305, "TEXT", "#/texts/22", @@ -6927,9 +7767,30 @@ "c.", "c." ], + [ + "reference", + "authors", + 1376279050886549305, + "TEXT", + "#/texts/22", + 1.0, + 17767354399704235211, + 4876440209134886407, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "c", + "c." + ], [ "sentence", - "", + "proper", 1376279050886549305, "TEXT", "#/texts/22", @@ -6956,18 +7817,18 @@ "#/texts/22", 1.0, 9449659440238098202, - 18118593112648843890, + 18118593112648843891, null, null, 3, - 28, + 29, 3, - 28, + 29, 2, - 7, + 8, true, "Maps of the Permian basin", - "Maps of the Permian basin" + "Maps of the Permian basin." ], [ "term", @@ -7013,7 +7874,28 @@ ], [ "sentence", - "", + "improper", + 10155628801693924200, + "TEXT", + "#/texts/23", + 1.0, + 15441160910541487298, + 18395000339474183225, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "d.", + "d." + ], + [ + "sentence", + "proper", 10155628801693924200, "TEXT", "#/texts/23", @@ -7181,7 +8063,28 @@ ], [ "sentence", - "", + "improper", + 9107499507097280105, + "TEXT", + "#/texts/24", + 1.0, + 15441160910541487235, + 11864515451990234441, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "e.", + "e." + ], + [ + "sentence", + "proper", 9107499507097280105, "TEXT", "#/texts/24", @@ -7286,7 +8189,7 @@ ], [ "sentence", - "", + "proper", 7248467870339433322, "TEXT", "#/texts/25", @@ -7496,7 +8399,7 @@ ], [ "sentence", - "", + "proper", 7248467870339433322, "TEXT", "#/texts/25", @@ -7622,7 +8525,7 @@ ], [ "sentence", - "", + "proper", 7248467870339433322, "TEXT", "#/texts/25", @@ -7832,7 +8735,7 @@ ], [ "sentence", - "", + "proper", 7248467870339433322, "TEXT", "#/texts/25", @@ -7979,7 +8882,7 @@ ], [ "sentence", - "", + "proper", 7248467870339433322, "TEXT", "#/texts/25", @@ -8250,6 +9153,132 @@ "respective properties", "respective properties" ], + [ + "sentence", + "improper", + 13346892078888080449, + "TEXT", + "#/texts/26", + 1.0, + 9732050976592056956, + 7055672841020251338, + null, + null, + 0, + 140, + 0, + 140, + 0, + 23, + true, + "Concluding from the above examples, we define the following qualifying criteria for a system that supports deep data exploration on corpora:", + "Concluding from the above examples, we define the following qualifying criteria for a system that supports deep data exploration on corpora:" + ], + [ + "term", + "single-term", + 13346892078888080449, + "TEXT", + "#/texts/26", + 1.0, + 16652112846725585848, + 6419752225232606784, + null, + null, + 20, + 34, + 20, + 34, + 3, + 5, + true, + "above examples", + "above examples" + ], + [ + "term", + "single-term", + 13346892078888080449, + "TEXT", + "#/texts/26", + 1.0, + 14652282445985817695, + 8459337319831918928, + null, + null, + 71, + 79, + 71, + 79, + 11, + 12, + true, + "criteria", + "criteria" + ], + [ + "term", + "single-term", + 13346892078888080449, + "TEXT", + "#/texts/26", + 1.0, + 16381206550376895780, + 3357521497593263256, + null, + null, + 86, + 92, + 86, + 92, + 14, + 15, + true, + "system", + "system" + ], + [ + "term", + "single-term", + 13346892078888080449, + "TEXT", + "#/texts/26", + 1.0, + 13671659409933113155, + 17581719947633067695, + null, + null, + 107, + 128, + 107, + 128, + 17, + 20, + true, + "deep data exploration", + "deep data exploration" + ], + [ + "term", + "single-term", + 13346892078888080449, + "TEXT", + "#/texts/26", + 1.0, + 8106398483106473371, + 5210834614831011291, + null, + null, + 132, + 139, + 132, + 139, + 21, + 22, + true, + "corpora", + "corpora" + ], [ "numval", "ival", @@ -8273,7 +9302,28 @@ ], [ "sentence", - "", + "improper", + 1118972765223422660, + "TEXT", + "#/texts/27", + 1.0, + 17767354399704235166, + 16395526851487480286, + null, + null, + 1, + 2, + 1, + 2, + 1, + 2, + true, + ".", + "." + ], + [ + "sentence", + "proper", 1118972765223422660, "TEXT", "#/texts/27", @@ -8399,7 +9449,28 @@ ], [ "sentence", - "", + "improper", + 324023167304456371, + "TEXT", + "#/texts/28", + 1.0, + 17767354399704235166, + 964743056328104984, + null, + null, + 1, + 2, + 1, + 2, + 1, + 2, + true, + ".", + "." + ], + [ + "sentence", + "proper", 324023167304456371, "TEXT", "#/texts/28", @@ -8525,7 +9596,7 @@ ], [ "sentence", - "", + "proper", 324023167304456371, "TEXT", "#/texts/28", @@ -8588,7 +9659,28 @@ ], [ "sentence", - "", + "improper", + 4651508276868765576, + "TEXT", + "#/texts/29", + 1.0, + 17767354399704235166, + 12716136938048933799, + null, + null, + 1, + 2, + 1, + 2, + 1, + 2, + true, + ".", + "." + ], + [ + "sentence", + "proper", 4651508276868765576, "TEXT", "#/texts/29", @@ -8714,7 +9806,7 @@ ], [ "sentence", - "", + "proper", 3052020526349962744, "TEXT", "#/texts/30", @@ -8798,7 +9890,7 @@ ], [ "sentence", - "", + "proper", 3052020526349962744, "TEXT", "#/texts/30", @@ -9155,7 +10247,7 @@ ], [ "sentence", - "", + "proper", 3052020526349962744, "TEXT", "#/texts/30", @@ -9323,7 +10415,7 @@ ], [ "sentence", - "", + "proper", 3052020526349962744, "TEXT", "#/texts/30", @@ -9407,7 +10499,7 @@ ], [ "sentence", - "", + "proper", 6725501529910185390, "TEXT", "#/texts/31", @@ -9638,7 +10730,7 @@ ], [ "sentence", - "", + "proper", 14814111183601762276, "TEXT", "#/texts/32", @@ -9806,7 +10898,7 @@ ], [ "sentence", - "", + "proper", 14814111183601762276, "TEXT", "#/texts/32", @@ -9972,6 +11064,27 @@ "massive scale", "massive scale" ], + [ + "sentence", + "improper", + 14814111183601762276, + "TEXT", + "#/texts/32", + 1.0, + 7914280693915181635, + 14854893084684010129, + null, + null, + 240, + 346, + 240, + 346, + 46, + 68, + true, + "In section 3, we go into detail of designing deep queries and show how we compute them in a very efficient", + "In section 3, we go into detail of designing deep queries and show how we compute them in a very efficient" + ], [ "term", "single-term", @@ -10077,6 +11190,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/33", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -10121,7 +11255,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/33", @@ -10371,6 +11505,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/33", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", @@ -10581,6 +11736,27 @@ "3", "3" ], + [ + "sentence", + "improper", + 4361549266681704196, + "TEXT", + "#/texts/34", + 1.0, + 15441160910541485670, + 10132017202982233095, + null, + null, + 1, + 3, + 1, + 3, + 1, + 2, + true, + "of", + "of" + ], [ "numval", "ival", @@ -10602,6 +11778,27 @@ "15", "15" ], + [ + "sentence", + "improper", + 8043608144162608258, + "TEXT", + "#/texts/35", + 1.0, + 15874830917445991279, + 14751652686275270830, + null, + null, + 0, + 12, + 0, + 12, + 0, + 3, + true, + "way with our", + "way with our" + ], [ "term", "single-term", @@ -10625,7 +11822,7 @@ ], [ "sentence", - "", + "proper", 8043608144162608258, "TEXT", "#/texts/35", @@ -10688,7 +11885,7 @@ ], [ "sentence", - "", + "proper", 8043608144162608258, "TEXT", "#/texts/35", @@ -10814,7 +12011,7 @@ ], [ "sentence", - "", + "proper", 8043608144162608258, "TEXT", "#/texts/35", @@ -10961,7 +12158,49 @@ ], [ "sentence", - "", + "improper", + 7159467829896778939, + "TEXT", + "#/texts/36", + 1.0, + 7707525670076367550, + 550173016828544359, + null, + null, + 2, + 37, + 2, + 37, + 1, + 6, + true, + "| SCALABLE KNOWLEDGE GRAPH CREATION", + "| SCALABLE KNOWLEDGE GRAPH CREATION" + ], + [ + "term", + "single-term", + 7159467829896778939, + "TEXT", + "#/texts/36", + 1.0, + 8338193261817505168, + 7647354265926273138, + null, + null, + 4, + 37, + 4, + 37, + 2, + 6, + true, + "SCALABLE KNOWLEDGE GRAPH CREATION", + "SCALABLE KNOWLEDGE GRAPH CREATION" + ], + [ + "sentence", + "proper", 5617240156952377, "TEXT", "#/texts/37", @@ -11150,7 +12389,7 @@ ], [ "sentence", - "", + "proper", 5617240156952377, "TEXT", "#/texts/37", @@ -11234,7 +12473,7 @@ ], [ "sentence", - "", + "proper", 5617240156952377, "TEXT", "#/texts/37", @@ -11402,7 +12641,7 @@ ], [ "sentence", - "", + "proper", 5617240156952377, "TEXT", "#/texts/37", @@ -11759,7 +12998,7 @@ ], [ "sentence", - "", + "proper", 5617240156952377, "TEXT", "#/texts/37", @@ -11843,7 +13082,7 @@ ], [ "sentence", - "", + "proper", 5617240156952377, "TEXT", "#/texts/37", @@ -12032,7 +13271,7 @@ ], [ "sentence", - "", + "proper", 5617240156952377, "TEXT", "#/texts/37", @@ -12179,7 +13418,7 @@ ], [ "sentence", - "", + "proper", 3276490574487379366, "TEXT", "#/texts/38", @@ -12284,7 +13523,7 @@ ], [ "sentence", - "", + "proper", 3276490574487379366, "TEXT", "#/texts/38", @@ -12599,7 +13838,7 @@ ], [ "sentence", - "", + "proper", 3276490574487379366, "TEXT", "#/texts/38", @@ -12641,7 +13880,7 @@ ], [ "sentence", - "", + "proper", 3276490574487379366, "TEXT", "#/texts/38", @@ -12767,7 +14006,7 @@ ], [ "sentence", - "", + "proper", 3276490574487379366, "TEXT", "#/texts/38", @@ -12998,7 +14237,7 @@ ], [ "sentence", - "", + "proper", 3276490574487379366, "TEXT", "#/texts/38", @@ -13292,7 +14531,7 @@ ], [ "sentence", - "", + "proper", 3276490574487379366, "TEXT", "#/texts/38", @@ -13628,7 +14867,7 @@ ], [ "sentence", - "", + "proper", 3276490574487379366, "TEXT", "#/texts/38", @@ -13754,7 +14993,7 @@ ], [ "sentence", - "", + "proper", 3276490574487379366, "TEXT", "#/texts/38", @@ -13964,7 +15203,49 @@ ], [ "sentence", - "", + "improper", + 3367451956962330174, + "TEXT", + "#/texts/39", + 1.0, + 8611251901308287420, + 7694453792543019958, + null, + null, + 4, + 14, + 4, + 14, + 3, + 6, + true, + "| DF tasks", + "| DF tasks" + ], + [ + "term", + "single-term", + 3367451956962330174, + "TEXT", + "#/texts/39", + 1.0, + 14650437071608036927, + 6203129036654967457, + null, + null, + 6, + 14, + 6, + 14, + 4, + 6, + true, + "DF tasks", + "DF tasks" + ], + [ + "sentence", + "proper", 5509744459704235873, "TEXT", "#/texts/40", @@ -14174,7 +15455,7 @@ ], [ "sentence", - "", + "proper", 5509744459704235873, "TEXT", "#/texts/40", @@ -14298,6 +15579,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -14342,7 +15644,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/41", @@ -14592,6 +15894,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", @@ -14802,6 +16125,27 @@ "4", "4" ], + [ + "sentence", + "improper", + 4361549176688508574, + "TEXT", + "#/texts/42", + 1.0, + 15441160910541485670, + 7918927380167181789, + null, + null, + 1, + 3, + 1, + 3, + 1, + 2, + true, + "of", + "of" + ], [ "numval", "ival", @@ -14823,6 +16167,27 @@ "15", "15" ], + [ + "sentence", + "improper", + 12374482891052873875, + "TEXT", + "#/texts/43", + 1.0, + 8758905122433574314, + 10402008902852922243, + null, + null, + 0, + 18, + 0, + 18, + 0, + 7, + true, + "2.1.1 | Extraction", + "2.1.1 | Extraction" + ], [ "expression", "wtoken-concatenation", @@ -14886,9 +16251,30 @@ "1", "1" ], + [ + "term", + "single-term", + 12374482891052873875, + "TEXT", + "#/texts/43", + 1.0, + 6329470030377853550, + 13847055292419842236, + null, + null, + 6, + 18, + 6, + 18, + 5, + 7, + true, + "| Extraction", + "| Extraction" + ], [ "sentence", - "", + "proper", 2755397864153233778, "TEXT", "#/texts/44", @@ -15119,7 +16505,7 @@ ], [ "sentence", - "", + "proper", 2755397864153233778, "TEXT", "#/texts/44", @@ -15224,7 +16610,7 @@ ], [ "sentence", - "", + "proper", 2755397864153233778, "TEXT", "#/texts/44", @@ -15434,7 +16820,7 @@ ], [ "sentence", - "", + "proper", 4698316471746130896, "TEXT", "#/texts/45", @@ -15560,7 +16946,7 @@ ], [ "sentence", - "", + "proper", 4698316471746130896, "TEXT", "#/texts/45", @@ -15728,7 +17114,7 @@ ], [ "sentence", - "", + "proper", 4698316471746130896, "TEXT", "#/texts/45", @@ -15791,7 +17177,7 @@ ], [ "sentence", - "", + "proper", 11827267218358801841, "TEXT", "#/texts/46", @@ -15938,7 +17324,7 @@ ], [ "sentence", - "", + "proper", 11827267218358801841, "TEXT", "#/texts/46", @@ -16104,6 +17490,27 @@ "certain fact", "certain fact" ], + [ + "sentence", + "improper", + 6297710299044869343, + "TEXT", + "#/texts/47", + 1.0, + 12444247655523627494, + 11369889824975445759, + null, + null, + 0, + 18, + 0, + 18, + 0, + 7, + true, + "2.1.2 | Annotation", + "2.1.2 | Annotation" + ], [ "expression", "wtoken-concatenation", @@ -16169,7 +17576,7 @@ ], [ "sentence", - "", + "proper", 7158837349769150986, "TEXT", "#/texts/48", @@ -16295,7 +17702,7 @@ ], [ "sentence", - "", + "proper", 7158837349769150986, "TEXT", "#/texts/48", @@ -16526,7 +17933,7 @@ ], [ "sentence", - "", + "proper", 7158837349769150986, "TEXT", "#/texts/48", @@ -16694,7 +18101,7 @@ ], [ "sentence", - "", + "proper", 7158837349769150986, "TEXT", "#/texts/48", @@ -17051,7 +18458,7 @@ ], [ "sentence", - "", + "proper", 1150871476689677866, "TEXT", "#/texts/49", @@ -17198,7 +18605,7 @@ ], [ "sentence", - "", + "proper", 1150871476689677866, "TEXT", "#/texts/49", @@ -17387,7 +18794,7 @@ ], [ "sentence", - "", + "proper", 1150871476689677866, "TEXT", "#/texts/49", @@ -17492,7 +18899,7 @@ ], [ "sentence", - "", + "proper", 1150871476689677866, "TEXT", "#/texts/49", @@ -17702,7 +19109,7 @@ ], [ "sentence", - "", + "proper", 5163702913945903725, "TEXT", "#/texts/50", @@ -17891,7 +19298,7 @@ ], [ "sentence", - "", + "proper", 5163702913945903725, "TEXT", "#/texts/50", @@ -17975,7 +19382,28 @@ ], [ "sentence", - "", + "improper", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 17767354399704232711, + 8171001056559370747, + null, + null, + 234, + 236, + 234, + 235, + 44, + 45, + true, + "\u00a7", + "\u00a7" + ], + [ + "sentence", + "proper", 5163702913945903725, "TEXT", "#/texts/50", @@ -18122,7 +19550,7 @@ ], [ "sentence", - "", + "proper", 5163702913945903725, "TEXT", "#/texts/50", @@ -18374,7 +19802,7 @@ ], [ "sentence", - "", + "proper", 5462319091745771382, "TEXT", "#/texts/51", @@ -18540,6 +19968,27 @@ "8", "8" ], + [ + "sentence", + "improper", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 3748962472868307378, + 12273141133482550738, + null, + null, + 114, + 220, + 114, + 220, + 19, + 37, + true, + "The language entities here are all related to geological concepts in the domain of oil and gas exploration", + "The language entities here are all related to geological concepts in the domain of oil and gas exploration" + ], [ "term", "single-term", @@ -18687,6 +20136,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -18731,7 +20201,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/52", @@ -18981,6 +20451,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", @@ -19172,7 +20663,7 @@ ], [ "sentence", - "", + "proper", 958124839653591304, "TEXT", "#/texts/53", @@ -19382,7 +20873,7 @@ ], [ "sentence", - "", + "proper", 958124839653591304, "TEXT", "#/texts/53", @@ -19466,7 +20957,7 @@ ], [ "sentence", - "", + "proper", 958124839653591304, "TEXT", "#/texts/53", @@ -19569,6 +21060,27 @@ "extraction", "extraction" ], + [ + "sentence", + "improper", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 1784572687958230117, + 4252148808147073472, + null, + null, + 296, + 421, + 296, + 421, + 51, + 70, + true, + "Relationships are always defined on detected entities, and will therefore use references defining a link between two entities", + "Relationships are always defined on detected entities, and will therefore use references defining a link between two entities" + ], [ "term", "single-term", @@ -19676,7 +21188,7 @@ ], [ "sentence", - "", + "proper", 1448405324616602032, "TEXT", "#/texts/54", @@ -19739,7 +21251,7 @@ ], [ "sentence", - "", + "proper", 1448405324616602032, "TEXT", "#/texts/54", @@ -19865,7 +21377,7 @@ ], [ "sentence", - "", + "proper", 1448405324616602032, "TEXT", "#/texts/54", @@ -20117,7 +21629,7 @@ ], [ "sentence", - "", + "proper", 1448405324616602032, "TEXT", "#/texts/54", @@ -20180,7 +21692,7 @@ ], [ "sentence", - "", + "proper", 1448405324616602032, "TEXT", "#/texts/54", @@ -20327,7 +21839,7 @@ ], [ "sentence", - "", + "proper", 1448405324616602032, "TEXT", "#/texts/54", @@ -20430,6 +21942,27 @@ "million independent workers", "million independent workers" ], + [ + "sentence", + "improper", + 2617775076168299948, + "TEXT", + "#/texts/55", + 1.0, + 18150799209915986647, + 9689816716635830050, + null, + null, + 0, + 31, + 0, + 31, + 0, + 9, + true, + "2.1.3 | Aggregation of entities", + "2.1.3 | Aggregation of entities" + ], [ "expression", "wtoken-concatenation", @@ -20493,9 +22026,51 @@ "3", "3" ], + [ + "term", + "single-term", + 2617775076168299948, + "TEXT", + "#/texts/55", + 1.0, + 12405860233317252202, + 7842840693633921498, + null, + null, + 6, + 19, + 6, + 19, + 5, + 7, + true, + "| Aggregation", + "| Aggregation" + ], + [ + "term", + "single-term", + 2617775076168299948, + "TEXT", + "#/texts/55", + 1.0, + 14652256560445338257, + 9338367723294437133, + null, + null, + 23, + 31, + 23, + 31, + 8, + 9, + true, + "entities", + "entities" + ], [ "sentence", - "", + "proper", 13974986056043304735, "TEXT", "#/texts/56", @@ -20642,7 +22217,7 @@ ], [ "sentence", - "", + "proper", 13974986056043304735, "TEXT", "#/texts/56", @@ -20789,7 +22364,7 @@ ], [ "sentence", - "", + "proper", 13974986056043304735, "TEXT", "#/texts/56", @@ -20915,7 +22490,7 @@ ], [ "sentence", - "", + "proper", 13974986056043304735, "TEXT", "#/texts/56", @@ -21062,7 +22637,7 @@ ], [ "sentence", - "", + "proper", 13974986056043304735, "TEXT", "#/texts/56", @@ -21209,7 +22784,7 @@ ], [ "sentence", - "", + "proper", 13974986056043304735, "TEXT", "#/texts/56", @@ -21398,7 +22973,7 @@ ], [ "sentence", - "", + "proper", 13974986056043304735, "TEXT", "#/texts/56", @@ -21503,7 +23078,7 @@ ], [ "sentence", - "", + "proper", 13974986056043304735, "TEXT", "#/texts/56", @@ -21566,7 +23141,7 @@ ], [ "sentence", - "", + "proper", 13974986056043304735, "TEXT", "#/texts/56", @@ -21671,7 +23246,7 @@ ], [ "sentence", - "", + "proper", 13974986056043304735, "TEXT", "#/texts/56", @@ -21776,7 +23351,7 @@ ], [ "sentence", - "", + "proper", 13974986056043304735, "TEXT", "#/texts/56", @@ -21881,7 +23456,7 @@ ], [ "sentence", - "", + "proper", 13974986056043304735, "TEXT", "#/texts/56", @@ -21984,6 +23559,27 @@ "others", "others" ], + [ + "sentence", + "improper", + 5985285694705576020, + "TEXT", + "#/texts/57", + 1.0, + 12765605759878485615, + 12015650457911020971, + null, + null, + 0, + 36, + 0, + 36, + 0, + 9, + true, + "2.1.4 | Aggregation of relationships", + "2.1.4 | Aggregation of relationships" + ], [ "expression", "wtoken-concatenation", @@ -22047,9 +23643,51 @@ "4", "4" ], + [ + "term", + "single-term", + 5985285694705576020, + "TEXT", + "#/texts/57", + 1.0, + 12405860233317252202, + 3087312697670277359, + null, + null, + 6, + 19, + 6, + 19, + 5, + 7, + true, + "| Aggregation", + "| Aggregation" + ], + [ + "term", + "single-term", + 5985285694705576020, + "TEXT", + "#/texts/57", + 1.0, + 8279380567349713241, + 13167358372837789646, + null, + null, + 23, + 36, + 23, + 36, + 8, + 9, + true, + "relationships", + "relationships" + ], [ "sentence", - "", + "proper", 11235296141350659290, "TEXT", "#/texts/58", @@ -22175,7 +23813,7 @@ ], [ "sentence", - "", + "proper", 11235296141350659290, "TEXT", "#/texts/58", @@ -22322,7 +23960,7 @@ ], [ "sentence", - "", + "proper", 11235296141350659290, "TEXT", "#/texts/58", @@ -22551,6 +24189,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -22595,7 +24254,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/59", @@ -22845,6 +24504,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", @@ -23055,6 +24735,27 @@ "6", "6" ], + [ + "sentence", + "improper", + 4361549266576336732, + "TEXT", + "#/texts/60", + 1.0, + 15441160910541485670, + 15406507517050657965, + null, + null, + 1, + 3, + 1, + 3, + 1, + 2, + true, + "of", + "of" + ], [ "numval", "ival", @@ -23078,7 +24779,7 @@ ], [ "sentence", - "", + "proper", 5771309285006424458, "TEXT", "#/texts/61", @@ -23204,7 +24905,7 @@ ], [ "sentence", - "", + "proper", 5771309285006424458, "TEXT", "#/texts/61", @@ -23288,7 +24989,7 @@ ], [ "sentence", - "", + "proper", 5771309285006424458, "TEXT", "#/texts/61", @@ -23456,7 +25157,7 @@ ], [ "sentence", - "", + "proper", 5771309285006424458, "TEXT", "#/texts/61", @@ -23603,7 +25304,49 @@ ], [ "sentence", - "", + "improper", + 5371685212527510397, + "TEXT", + "#/texts/62", + 1.0, + 6804373445441261094, + 2924256996521789407, + null, + null, + 4, + 16, + 4, + 16, + 3, + 6, + true, + "| Data flows", + "| Data flows" + ], + [ + "term", + "single-term", + 5371685212527510397, + "TEXT", + "#/texts/62", + 1.0, + 389609625537659398, + 16585943581147490691, + null, + null, + 6, + 10, + 6, + 10, + 4, + 5, + true, + "Data", + "Data" + ], + [ + "sentence", + "proper", 7817257645383866853, "TEXT", "#/texts/63", @@ -23748,6 +25491,27 @@ "specific KG", "specific KG" ], + [ + "sentence", + "improper", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 5885770492001511082, + 11316457715017340686, + null, + null, + 145, + 225, + 145, + 225, + 29, + 45, + true, + "When instantiating a DF, one has the possibility to define in a declarative way:", + "When instantiating a DF, one has the possibility to define in a declarative way:" + ], [ "term", "single-term", @@ -23834,7 +25598,28 @@ ], [ "sentence", - "", + "improper", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 17767354399704235166, + 9308892478371477431, + null, + null, + 1, + 2, + 1, + 2, + 1, + 2, + true, + ".", + "." + ], + [ + "sentence", + "proper", 2929626768872004841, "TEXT", "#/texts/64", @@ -24107,7 +25892,28 @@ ], [ "sentence", - "", + "improper", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 17767354399704235166, + 8832343915883415512, + null, + null, + 1, + 2, + 1, + 2, + 1, + 2, + true, + ".", + "." + ], + [ + "sentence", + "proper", 15879756297712818143, "TEXT", "#/texts/65", @@ -24380,7 +26186,28 @@ ], [ "sentence", - "", + "improper", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 17767354399704235166, + 4307298561299144891, + null, + null, + 1, + 2, + 1, + 2, + 1, + 2, + true, + ".", + "." + ], + [ + "sentence", + "proper", 16116531546352845311, "TEXT", "#/texts/66", @@ -24506,7 +26333,7 @@ ], [ "sentence", - "", + "proper", 9541434157786316356, "TEXT", "#/texts/67", @@ -24611,7 +26438,7 @@ ], [ "sentence", - "", + "proper", 9541434157786316356, "TEXT", "#/texts/67", @@ -24905,7 +26732,7 @@ ], [ "sentence", - "", + "proper", 9541434157786316356, "TEXT", "#/texts/67", @@ -25094,7 +26921,7 @@ ], [ "sentence", - "", + "proper", 997682002692959482, "TEXT", "#/texts/68", @@ -25325,7 +27152,7 @@ ], [ "sentence", - "", + "proper", 997682002692959482, "TEXT", "#/texts/68", @@ -25409,7 +27236,7 @@ ], [ "sentence", - "", + "proper", 997682002692959482, "TEXT", "#/texts/68", @@ -25619,7 +27446,7 @@ ], [ "sentence", - "", + "proper", 997682002692959482, "TEXT", "#/texts/68", @@ -25745,7 +27572,7 @@ ], [ "sentence", - "", + "proper", 997682002692959482, "TEXT", "#/texts/68", @@ -25829,7 +27656,49 @@ ], [ "sentence", - "", + "improper", + 11590138063543342276, + "TEXT", + "#/texts/69", + 1.0, + 4476259065516669581, + 8481893244467504716, + null, + null, + 2, + 48, + 2, + 48, + 1, + 8, + true, + "| DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS", + "| DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS" + ], + [ + "term", + "single-term", + 11590138063543342276, + "TEXT", + "#/texts/69", + 1.0, + 14675774684852204629, + 10752707062932991144, + null, + null, + 4, + 48, + 4, + 48, + 2, + 8, + true, + "DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS", + "DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS" + ], + [ + "sentence", + "proper", 16380310806374538602, "TEXT", "#/texts/70", @@ -25911,6 +27780,27 @@ "populated Knowledge Graph", "populated Knowledge Graph" ], + [ + "sentence", + "improper", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 2801533934034477883, + 18076787076612138377, + null, + null, + 104, + 166, + 104, + 166, + 18, + 27, + true, + "A deep data exploration requires two fundamental capabilities:", + "A deep data exploration requires two fundamental capabilities:" + ], [ "term", "single-term", @@ -25974,6 +27864,69 @@ "1", "1" ], + [ + "sentence", + "improper", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 4806888013365821011, + 14967712159992384847, + null, + null, + 1, + 92, + 1, + 92, + 1, + 20, + true, + ". perform deep queries on the graph, that is, queries that require multi-hop traversals and", + ". perform deep queries on the graph, that is, queries that require multi-hop traversals and" + ], + [ + "term", + "single-term", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 7076268937724050913, + 757920359996890916, + null, + null, + 11, + 23, + 11, + 23, + 3, + 5, + true, + "deep queries", + "deep queries" + ], + [ + "term", + "single-term", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 329104159211247965, + 15479909791715347705, + null, + null, + 31, + 36, + 31, + 36, + 7, + 8, + true, + "graph", + "graph" + ], [ "expression", "word-concatenation", @@ -25995,6 +27948,48 @@ "multi-hop", "multi-hop" ], + [ + "term", + "single-term", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 329104161505536647, + 13265400285065104050, + null, + null, + 68, + 73, + 68, + 73, + 15, + 16, + true, + "multi", + "multi" + ], + [ + "term", + "single-term", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 10552270358215062354, + 15484484601665128769, + null, + null, + 74, + 88, + 74, + 88, + 17, + 19, + true, + "hop traversals", + "hop traversals" + ], [ "numval", "ival", @@ -26016,6 +28011,27 @@ "2", "2" ], + [ + "sentence", + "improper", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 11193184128140813540, + 1283446586766936557, + null, + null, + 1, + 61, + 1, + 61, + 1, + 13, + true, + ". perform graph analytics on the full graph or subsets of it", + ". perform graph analytics on the full graph or subsets of it" + ], [ "term", "single-term", @@ -26081,7 +28097,7 @@ ], [ "sentence", - "", + "proper", 1988335831916069382, "TEXT", "#/texts/72", @@ -26144,7 +28160,7 @@ ], [ "sentence", - "", + "proper", 5147764798816678886, "TEXT", "#/texts/73", @@ -26228,7 +28244,7 @@ ], [ "sentence", - "", + "proper", 5147764798816678886, "TEXT", "#/texts/73", @@ -26396,7 +28412,7 @@ ], [ "sentence", - "", + "proper", 5147764798816678886, "TEXT", "#/texts/73", @@ -26480,7 +28496,7 @@ ], [ "sentence", - "", + "proper", 5147764798816678886, "TEXT", "#/texts/73", @@ -26690,7 +28706,7 @@ ], [ "sentence", - "", + "proper", 285583876932865368, "TEXT", "#/texts/74", @@ -26921,7 +28937,7 @@ ], [ "sentence", - "", + "proper", 285583876932865368, "TEXT", "#/texts/74", @@ -27047,7 +29063,7 @@ ], [ "sentence", - "", + "proper", 285583876932865368, "TEXT", "#/texts/74", @@ -27173,7 +29189,7 @@ ], [ "sentence", - "", + "proper", 285583876932865368, "TEXT", "#/texts/74", @@ -27278,7 +29294,7 @@ ], [ "sentence", - "", + "proper", 285583876932865368, "TEXT", "#/texts/74", @@ -27591,6 +29607,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -27635,7 +29672,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/75", @@ -27885,6 +29922,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", @@ -28095,6 +30153,27 @@ "7", "7" ], + [ + "sentence", + "improper", + 4361549257370278754, + "TEXT", + "#/texts/76", + 1.0, + 15441160910541485670, + 2772137128821491569, + null, + null, + 1, + 3, + 1, + 3, + 1, + 2, + true, + "of", + "of" + ], [ "numval", "ival", @@ -28118,7 +30197,7 @@ ], [ "sentence", - "", + "proper", 13183039880198077038, "TEXT", "#/texts/77", @@ -28202,7 +30281,7 @@ ], [ "sentence", - "", + "proper", 13183039880198077038, "TEXT", "#/texts/77", @@ -28286,7 +30365,7 @@ ], [ "sentence", - "", + "proper", 13183039880198077038, "TEXT", "#/texts/77", @@ -28559,7 +30638,70 @@ ], [ "sentence", - "", + "improper", + 13428900458866068249, + "TEXT", + "#/texts/78", + 1.0, + 14601374641425905440, + 4335396186787100417, + null, + null, + 4, + 32, + 4, + 32, + 3, + 9, + true, + "| Design of the graph engine", + "| Design of the graph engine" + ], + [ + "term", + "single-term", + 13428900458866068249, + "TEXT", + "#/texts/78", + 1.0, + 16381206533755764332, + 8796133857281518442, + null, + null, + 6, + 12, + 6, + 12, + 4, + 5, + true, + "Design", + "Design" + ], + [ + "term", + "single-term", + 13428900458866068249, + "TEXT", + "#/texts/78", + 1.0, + 2924972194163802578, + 1091696909158573450, + null, + null, + 20, + 32, + 20, + 32, + 7, + 9, + true, + "graph engine", + "graph engine" + ], + [ + "sentence", + "proper", 1430911655724119030, "TEXT", "#/texts/79", @@ -28706,7 +30848,7 @@ ], [ "sentence", - "", + "proper", 1430911655724119030, "TEXT", "#/texts/79", @@ -28853,7 +30995,28 @@ ], [ "sentence", - "", + "improper", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 15441160910540903299, + 501565812163902881, + null, + null, + 280, + 286, + 280, + 282, + 49, + 50, + true, + "\u2020\u2020", + "\u2020\u2020" + ], + [ + "sentence", + "proper", 1430911655724119030, "TEXT", "#/texts/79", @@ -28937,7 +31100,7 @@ ], [ "sentence", - "", + "proper", 1430911655724119030, "TEXT", "#/texts/79", @@ -29126,7 +31289,7 @@ ], [ "sentence", - "", + "proper", 13770706479324480755, "TEXT", "#/texts/80", @@ -29273,7 +31436,7 @@ ], [ "sentence", - "", + "proper", 13770706479324480755, "TEXT", "#/texts/80", @@ -29378,7 +31541,7 @@ ], [ "sentence", - "", + "proper", 13770706479324480755, "TEXT", "#/texts/80", @@ -29523,6 +31686,27 @@ "nodes W", "nodes W" ], + [ + "sentence", + "improper", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 17852491679655048183, + 3567971614464094913, + null, + null, + 270, + 324, + 270, + 324, + 59, + 68, + true, + "This can be directly translated into linear algebra as", + "This can be directly translated into linear algebra as" + ], [ "term", "single-term", @@ -29546,7 +31730,28 @@ ], [ "sentence", - "", + "improper", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 17767354399704235223, + 16151623699919035273, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "w", + "w" + ], + [ + "sentence", + "proper", 11165481757050847950, "TEXT", "#/texts/81", @@ -29607,6 +31812,27 @@ "Av", "Av" ], + [ + "sentence", + "improper", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 15705096823684294974, + 17789840971827485592, + null, + null, + 15, + 91, + 15, + 89, + 10, + 47, + true, + "with v $^{!}$$_{i}$= 1 if node i \\b V 0 if node i = 2 V , GLYPH \u00f0 1 \u00de", + "with v $^{!}$$_{i}$= 1 if node i \\b V 0 if node i = 2 V , GLYPH \u00f0 1 \u00de" + ], [ "expression", "wtoken-concatenation", @@ -29798,7 +32024,28 @@ ], [ "sentence", - "", + "improper", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 14650452800874403184, + 10076746128805246269, + null, + null, + 0, + 8, + 0, + 8, + 0, + 2, + true, + "and with", + "and with" + ], + [ + "sentence", + "proper", 9572077971492738329, "TEXT", "#/texts/82", @@ -29861,7 +32108,7 @@ ], [ "sentence", - "", + "proper", 9572077971492738329, "TEXT", "#/texts/82", @@ -30006,6 +32253,27 @@ "graph traversals", "graph traversals" ], + [ + "sentence", + "improper", + 9572077971492738329, + "TEXT", + "#/texts/82", + 1.0, + 2878315017791093703, + 17572418673081669094, + null, + null, + 196, + 327, + 196, + 327, + 31, + 61, + true, + "For example, to obtain the k-order neighborhood of node set V, one simply needs to evaluate Equation (1) k times recursively, as in", + "For example, to obtain the k-order neighborhood of node set V, one simply needs to evaluate Equation (1) k times recursively, as in" + ], [ "term", "single-term", @@ -30176,7 +32444,28 @@ ], [ "sentence", - "", + "improper", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 17767354399704235223, + 10344599291656220805, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "w", + "w" + ], + [ + "sentence", + "proper", 14951391138799557075, "TEXT", "#/texts/83", @@ -30258,6 +32547,27 @@ "^{!}=", "$^{!}$=" ], + [ + "sentence", + "improper", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 8785325916061184795, + 2077844018639797129, + null, + null, + 39, + 113, + 37, + 109, + 27, + 61, + true, + "GLYPHGLYPH GLYPH GLYPH GLYPH GLYPH : \u00f0 2 \u00de", + "GLYPHGLYPH GLYPH GLYPH GLYPH GLYPH : \u00f0 2 \u00de" + ], [ "expression", "wtoken-concatenation", @@ -30512,7 +32822,7 @@ ], [ "sentence", - "", + "proper", 16602156009514813718, "TEXT", "#/texts/84", @@ -30617,7 +32927,7 @@ ], [ "sentence", - "", + "proper", 16602156009514813718, "TEXT", "#/texts/84", @@ -30890,7 +33200,7 @@ ], [ "sentence", - "", + "proper", 16602156009514813718, "TEXT", "#/texts/84", @@ -31142,7 +33452,7 @@ ], [ "sentence", - "", + "proper", 16602156009514813718, "TEXT", "#/texts/84", @@ -31247,7 +33557,7 @@ ], [ "sentence", - "", + "proper", 16602156009514813718, "TEXT", "#/texts/84", @@ -31457,7 +33767,28 @@ ], [ "sentence", - "", + "improper", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12264509556052062994, + 5123928862892494153, + null, + null, + 732, + 745, + 732, + 745, + 137, + 141, + true, + "is equal to v", + "is equal to v" + ], + [ + "sentence", + "proper", 16602156009514813718, "TEXT", "#/texts/84", @@ -31499,7 +33830,7 @@ ], [ "sentence", - "", + "proper", 16602156009514813718, "TEXT", "#/texts/84", @@ -31730,7 +34061,7 @@ ], [ "sentence", - "", + "proper", 7162849562576593449, "TEXT", "#/texts/85", @@ -31898,7 +34229,91 @@ ], [ "sentence", - "", + "improper", + 15385417954505503552, + "TEXT", + "#/texts/86", + 1.0, + 18417459381709426233, + 10020576354403167023, + null, + null, + 4, + 54, + 4, + 54, + 3, + 9, + true, + "| Memory architecture and performance optimization", + "| Memory architecture and performance optimization" + ], + [ + "term", + "single-term", + 15385417954505503552, + "TEXT", + "#/texts/86", + 1.0, + 870113469708492800, + 18315576896615919675, + null, + null, + 6, + 25, + 6, + 25, + 4, + 6, + true, + "Memory architecture", + "Memory architecture" + ], + [ + "term", + "enum-term-mark-2", + 15385417954505503552, + "TEXT", + "#/texts/86", + 1.0, + 718073221538665455, + 25931046609376007, + null, + null, + 13, + 54, + 13, + 54, + 5, + 9, + true, + "architecture and performance optimization", + "architecture and performance optimization" + ], + [ + "term", + "single-term", + 15385417954505503552, + "TEXT", + "#/texts/86", + 1.0, + 6000441818249848958, + 4119448448060994558, + null, + null, + 30, + 54, + 30, + 54, + 7, + 9, + true, + "performance optimization", + "performance optimization" + ], + [ + "sentence", + "proper", 10815650641518265876, "TEXT", "#/texts/87", @@ -32066,7 +34481,7 @@ ], [ "sentence", - "", + "proper", 10815650641518265876, "TEXT", "#/texts/87", @@ -32276,7 +34691,7 @@ ], [ "sentence", - "", + "proper", 10815650641518265876, "TEXT", "#/texts/87", @@ -32360,7 +34775,7 @@ ], [ "sentence", - "", + "proper", 10815650641518265876, "TEXT", "#/texts/87", @@ -32528,7 +34943,7 @@ ], [ "sentence", - "", + "proper", 10815650641518265876, "TEXT", "#/texts/87", @@ -32633,7 +35048,7 @@ ], [ "sentence", - "", + "proper", 10815650641518265876, "TEXT", "#/texts/87", @@ -32801,7 +35216,7 @@ ], [ "sentence", - "", + "proper", 10815650641518265876, "TEXT", "#/texts/87", @@ -32864,7 +35279,7 @@ ], [ "sentence", - "", + "proper", 10815650641518265876, "TEXT", "#/texts/87", @@ -33116,7 +35531,7 @@ ], [ "sentence", - "", + "proper", 10815650641518265876, "TEXT", "#/texts/87", @@ -33324,6 +35739,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -33368,7 +35804,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/88", @@ -33618,6 +36054,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", @@ -33809,7 +36266,7 @@ ], [ "sentence", - "", + "proper", 12004249365408683930, "TEXT", "#/texts/89", @@ -34208,7 +36665,7 @@ ], [ "sentence", - "", + "proper", 12004249365408683930, "TEXT", "#/texts/89", @@ -34523,7 +36980,7 @@ ], [ "sentence", - "", + "proper", 12004249365408683930, "TEXT", "#/texts/89", @@ -34565,7 +37022,7 @@ ], [ "sentence", - "", + "proper", 12004249365408683930, "TEXT", "#/texts/89", @@ -34649,7 +37106,7 @@ ], [ "sentence", - "", + "proper", 12004249365408683930, "TEXT", "#/texts/89", @@ -34817,7 +37274,7 @@ ], [ "sentence", - "", + "proper", 12004249365408683930, "TEXT", "#/texts/89", @@ -35006,7 +37463,7 @@ ], [ "sentence", - "", + "proper", 12004249365408683930, "TEXT", "#/texts/89", @@ -35195,7 +37652,7 @@ ], [ "sentence", - "", + "proper", 7223381657047466215, "TEXT", "#/texts/90", @@ -35342,7 +37799,7 @@ ], [ "sentence", - "", + "proper", 7223381657047466215, "TEXT", "#/texts/90", @@ -35468,7 +37925,7 @@ ], [ "sentence", - "", + "proper", 7223381657047466215, "TEXT", "#/texts/90", @@ -35615,7 +38072,7 @@ ], [ "sentence", - "", + "proper", 7223381657047466215, "TEXT", "#/texts/90", @@ -35867,7 +38324,7 @@ ], [ "sentence", - "", + "proper", 7223381657047466215, "TEXT", "#/texts/90", @@ -36140,7 +38597,28 @@ ], [ "sentence", - "", + "improper", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 12178341415896407674, + 14422417746503790758, + null, + null, + 685, + 688, + 685, + 688, + 140, + 141, + true, + "***", + "***" + ], + [ + "sentence", + "proper", 7223381657047466215, "TEXT", "#/texts/90", @@ -36392,7 +38870,7 @@ ], [ "sentence", - "", + "proper", 7223381657047466215, "TEXT", "#/texts/90", @@ -36791,7 +39269,112 @@ ], [ "sentence", - "", + "improper", + 15132906055887224772, + "TEXT", + "#/texts/91", + 1.0, + 14192878977779458197, + 10144190012485496573, + null, + null, + 4, + 48, + 4, + 48, + 3, + 10, + true, + "| Formulation and evaluation of deep queries", + "| Formulation and evaluation of deep queries" + ], + [ + "term", + "enum-term-mark-2", + 15132906055887224772, + "TEXT", + "#/texts/91", + 1.0, + 12865302163893152094, + 15940595769957357319, + null, + null, + 6, + 32, + 6, + 32, + 4, + 7, + true, + "Formulation and evaluation", + "Formulation and evaluation" + ], + [ + "term", + "single-term", + 15132906055887224772, + "TEXT", + "#/texts/91", + 1.0, + 2044684058342850165, + 14277022715477019349, + null, + null, + 6, + 17, + 6, + 17, + 4, + 5, + true, + "Formulation", + "Formulation" + ], + [ + "term", + "single-term", + 15132906055887224772, + "TEXT", + "#/texts/91", + 1.0, + 5456363662501675139, + 12901679329998763956, + null, + null, + 22, + 32, + 22, + 32, + 6, + 7, + true, + "evaluation", + "evaluation" + ], + [ + "term", + "single-term", + 15132906055887224772, + "TEXT", + "#/texts/91", + 1.0, + 7076268937724050913, + 7970870454235277029, + null, + null, + 36, + 48, + 36, + 48, + 8, + 10, + true, + "deep queries", + "deep queries" + ], + [ + "sentence", + "proper", 17129434987283608290, "TEXT", "#/texts/92", @@ -36875,7 +39458,7 @@ ], [ "sentence", - "", + "proper", 17129434987283608290, "TEXT", "#/texts/92", @@ -36980,7 +39563,7 @@ ], [ "sentence", - "", + "proper", 10350406469077463155, "TEXT", "#/texts/93", @@ -37148,7 +39731,7 @@ ], [ "sentence", - "", + "proper", 10350406469077463155, "TEXT", "#/texts/93", @@ -37253,7 +39836,7 @@ ], [ "sentence", - "", + "proper", 10350406469077463155, "TEXT", "#/texts/93", @@ -37505,7 +40088,7 @@ ], [ "sentence", - "", + "proper", 10350406469077463155, "TEXT", "#/texts/93", @@ -37547,7 +40130,7 @@ ], [ "sentence", - "", + "proper", 10350406469077463155, "TEXT", "#/texts/93", @@ -37757,7 +40340,7 @@ ], [ "sentence", - "", + "proper", 16949854269270315165, "TEXT", "#/texts/94", @@ -37946,7 +40529,7 @@ ], [ "sentence", - "", + "proper", 16949854269270315165, "TEXT", "#/texts/94", @@ -38091,6 +40674,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -38135,7 +40739,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/95", @@ -38385,6 +40989,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", @@ -38595,6 +41220,27 @@ "9", "9" ], + [ + "sentence", + "improper", + 4361549266593946746, + "TEXT", + "#/texts/96", + 1.0, + 15441160910541485670, + 7911155824351265465, + null, + null, + 1, + 3, + 1, + 3, + 1, + 2, + true, + "of", + "of" + ], [ "numval", "ival", @@ -38616,6 +41262,27 @@ "15", "15" ], + [ + "sentence", + "improper", + 9802652237802670052, + "TEXT", + "#/texts/97", + 1.0, + 6349660887815587103, + 9627223604255737762, + null, + null, + 0, + 22, + 0, + 22, + 0, + 8, + true, + "3.3.1 | Node retrieval", + "3.3.1 | Node retrieval" + ], [ "expression", "wtoken-concatenation", @@ -38681,7 +41348,7 @@ ], [ "sentence", - "", + "proper", 5524728206729419689, "TEXT", "#/texts/98", @@ -38786,7 +41453,7 @@ ], [ "sentence", - "", + "proper", 5524728206729419689, "TEXT", "#/texts/98", @@ -38931,6 +41598,27 @@ "particular property", "particular property" ], + [ + "sentence", + "improper", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 11273853394820260322, + 5543894756229177106, + null, + null, + 217, + 270, + 217, + 270, + 41, + 57, + true, + "The task constructs a node vector v $^{!}$, such that", + "The task constructs a node vector v $^{!}$, such that" + ], [ "term", "single-term", @@ -38994,6 +41682,27 @@ "^{!}", "$^{!}$" ], + [ + "sentence", + "improper", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 588808569772103507, + 3158630085314057550, + null, + null, + 0, + 71, + 0, + 69, + 0, + 36, + true, + "v $^{!}$$_{i}$= 1 if node i \\b S 0 if node i = 2 S , GLYPH \u00f0 3 \u00de", + "v $^{!}$$_{i}$= 1 if node i \\b S 0 if node i = 2 S , GLYPH \u00f0 3 \u00de" + ], [ "expression", "wtoken-concatenation", @@ -39143,7 +41852,28 @@ ], [ "sentence", - "", + "improper", + 11778884428660217326, + "TEXT", + "#/texts/100", + 1.0, + 329104161580313375, + 9731581819344976201, + null, + null, + 0, + 5, + 0, + 5, + 0, + 1, + true, + "where", + "where" + ], + [ + "sentence", + "proper", 11778884428660217326, "TEXT", "#/texts/100", @@ -39225,6 +41955,27 @@ "search criteria", "search criteria" ], + [ + "sentence", + "improper", + 12875050310340408203, + "TEXT", + "#/texts/101", + 1.0, + 10555101842315227314, + 3578570888443863693, + null, + null, + 0, + 23, + 0, + 23, + 0, + 8, + true, + "3.3.2 | Graph traversal", + "3.3.2 | Graph traversal" + ], [ "expression", "wtoken-concatenation", @@ -39288,9 +42039,30 @@ "2", "2" ], + [ + "term", + "single-term", + 12875050310340408203, + "TEXT", + "#/texts/101", + 1.0, + 14871935126973563211, + 10403115224383595903, + null, + null, + 6, + 23, + 6, + 23, + 5, + 8, + true, + "| Graph traversal", + "| Graph traversal" + ], [ "sentence", - "", + "proper", 3785875504044487339, "TEXT", "#/texts/102", @@ -39458,7 +42230,7 @@ ], [ "sentence", - "", + "proper", 3785875504044487339, "TEXT", "#/texts/102", @@ -39626,7 +42398,7 @@ ], [ "sentence", - "", + "proper", 3785875504044487339, "TEXT", "#/texts/102", @@ -39773,7 +42545,7 @@ ], [ "sentence", - "", + "proper", 3785875504044487339, "TEXT", "#/texts/102", @@ -40002,6 +42774,27 @@ "length", "length" ], + [ + "sentence", + "improper", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 7579237777413581592, + 7101335723397854438, + null, + null, + 487, + 580, + 487, + 580, + 102, + 121, + true, + "For example, consider the case in which we want to explore deeper, indirect paths as follows,", + "For example, consider the case in which we want to explore deeper, indirect paths as follows," + ], [ "term", "single-term", @@ -40088,7 +42881,28 @@ ], [ "sentence", - "", + "improper", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235223, + 9989301221673871682, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "w", + "w" + ], + [ + "sentence", + "proper", 12105626155924658285, "TEXT", "#/texts/103", @@ -40172,7 +42986,28 @@ ], [ "sentence", - "", + "improper", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235139, + 9989301225025680993, + null, + null, + 22, + 23, + 22, + 23, + 14, + 15, + true, + "+", + "+" + ], + [ + "sentence", + "proper", 12105626155924658285, "TEXT", "#/texts/103", @@ -40233,6 +43068,27 @@ "3", "3" ], + [ + "sentence", + "improper", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 3035011012940480021, + 1832428019455168426, + null, + null, + 32, + 144, + 32, + 142, + 19, + 83, + true, + "+ GLYPH GLYPH GLYPH GLYPH GLYPH v $^{!}$= e$^{A}$- 1 GLYPH GLYPH v $^{!}$: \u00f0 4 \u00de", + "+ GLYPH GLYPH GLYPH GLYPH GLYPH v $^{!}$= e$^{A}$- 1 GLYPH GLYPH v $^{!}$: \u00f0 4 \u00de" + ], [ "expression", "wtoken-concatenation", @@ -40634,7 +43490,7 @@ ], [ "sentence", - "", + "proper", 16265612055607243129, "TEXT", "#/texts/104", @@ -40886,7 +43742,7 @@ ], [ "sentence", - "", + "proper", 16265612055607243129, "TEXT", "#/texts/104", @@ -41052,6 +43908,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -41096,7 +43973,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/105", @@ -41346,6 +44223,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", @@ -41535,6 +44433,27 @@ "applicable Creative Commons License", "applicable Creative Commons License" ], + [ + "sentence", + "improper", + 10252446451495472512, + "TEXT", + "#/texts/106", + 1.0, + 6188098459342469819, + 1229703042810128321, + null, + null, + 0, + 26, + 0, + 26, + 0, + 8, + true, + "3.3.3 | Logical operations", + "3.3.3 | Logical operations" + ], [ "expression", "wtoken-concatenation", @@ -41598,9 +44517,30 @@ "3", "3" ], + [ + "term", + "single-term", + 10252446451495472512, + "TEXT", + "#/texts/106", + 1.0, + 17545402118559791717, + 17555948970743190738, + null, + null, + 6, + 26, + 6, + 26, + 5, + 8, + true, + "| Logical operations", + "| Logical operations" + ], [ "sentence", - "", + "proper", 17011944206067158637, "TEXT", "#/texts/107", @@ -41726,7 +44666,7 @@ ], [ "sentence", - "", + "proper", 17011944206067158637, "TEXT", "#/texts/107", @@ -41831,7 +44771,7 @@ ], [ "sentence", - "", + "proper", 17011944206067158637, "TEXT", "#/texts/107", @@ -41978,7 +44918,7 @@ ], [ "sentence", - "", + "proper", 17011944206067158637, "TEXT", "#/texts/107", @@ -42081,6 +45021,27 @@ "input vector", "input vector" ], + [ + "sentence", + "improper", + 16289627123982758705, + "TEXT", + "#/texts/108", + 1.0, + 4767177430745297228, + 228154443239687699, + null, + null, + 0, + 27, + 0, + 27, + 0, + 8, + true, + "3.3.4 | Transform functions", + "3.3.4 | Transform functions" + ], [ "expression", "wtoken-concatenation", @@ -42144,9 +45105,30 @@ "4", "4" ], + [ + "term", + "single-term", + 16289627123982758705, + "TEXT", + "#/texts/108", + 1.0, + 13342194518649961055, + 15246182238421227996, + null, + null, + 6, + 27, + 6, + 27, + 5, + 8, + true, + "| Transform functions", + "| Transform functions" + ], [ "sentence", - "", + "proper", 13969801897340997317, "TEXT", "#/texts/109", @@ -42230,7 +45212,7 @@ ], [ "sentence", - "", + "proper", 13969801897340997317, "TEXT", "#/texts/109", @@ -42314,7 +45296,7 @@ ], [ "sentence", - "", + "proper", 105697770555684555, "TEXT", "#/texts/110", @@ -42482,7 +45464,7 @@ ], [ "sentence", - "", + "proper", 105697770555684555, "TEXT", "#/texts/110", @@ -42545,7 +45527,7 @@ ], [ "sentence", - "", + "proper", 105697770555684555, "TEXT", "#/texts/110", @@ -42629,7 +45611,7 @@ ], [ "sentence", - "", + "proper", 105697770555684555, "TEXT", "#/texts/110", @@ -42734,7 +45716,7 @@ ], [ "sentence", - "", + "proper", 105697770555684555, "TEXT", "#/texts/110", @@ -42881,7 +45863,7 @@ ], [ "sentence", - "", + "proper", 105697770555684555, "TEXT", "#/texts/110", @@ -43091,7 +46073,7 @@ ], [ "sentence", - "", + "proper", 105697770555684555, "TEXT", "#/texts/110", @@ -43154,7 +46136,7 @@ ], [ "sentence", - "", + "proper", 15938840672015995359, "TEXT", "#/texts/111", @@ -43301,7 +46283,7 @@ ], [ "sentence", - "", + "proper", 15938840672015995359, "TEXT", "#/texts/111", @@ -43427,7 +46409,7 @@ ], [ "sentence", - "", + "proper", 15938840672015995359, "TEXT", "#/texts/111", @@ -43511,7 +46493,7 @@ ], [ "sentence", - "", + "proper", 15938840672015995359, "TEXT", "#/texts/111", @@ -43616,7 +46598,91 @@ ], [ "sentence", - "", + "improper", + 16505790528099785698, + "TEXT", + "#/texts/112", + 1.0, + 3669348819955245594, + 11254032301165265326, + null, + null, + 2, + 31, + 2, + 31, + 1, + 6, + true, + "| CLOUD DESIGN AND DEPLOYMENT", + "| CLOUD DESIGN AND DEPLOYMENT" + ], + [ + "term", + "enum-term-mark-4", + 16505790528099785698, + "TEXT", + "#/texts/112", + 1.0, + 5437625579903233791, + 18074126645980437463, + null, + null, + 4, + 31, + 4, + 31, + 2, + 6, + true, + "CLOUD DESIGN AND DEPLOYMENT", + "CLOUD DESIGN AND DEPLOYMENT" + ], + [ + "term", + "single-term", + 16505790528099785698, + "TEXT", + "#/texts/112", + 1.0, + 11753857894419936394, + 17603578644174785442, + null, + null, + 4, + 16, + 4, + 16, + 2, + 4, + true, + "CLOUD DESIGN", + "CLOUD DESIGN" + ], + [ + "term", + "single-term", + 16505790528099785698, + "TEXT", + "#/texts/112", + 1.0, + 7198623583390732929, + 15068365172116912497, + null, + null, + 21, + 31, + 21, + 31, + 5, + 6, + true, + "DEPLOYMENT", + "DEPLOYMENT" + ], + [ + "sentence", + "proper", 14738723905055920039, "TEXT", "#/texts/113", @@ -43721,7 +46787,7 @@ ], [ "sentence", - "", + "proper", 14738723905055920039, "TEXT", "#/texts/113", @@ -43826,7 +46892,7 @@ ], [ "sentence", - "", + "proper", 14738723905055920039, "TEXT", "#/texts/113", @@ -44078,7 +47144,7 @@ ], [ "sentence", - "", + "proper", 5699550326698755904, "TEXT", "#/texts/114", @@ -44204,7 +47270,7 @@ ], [ "sentence", - "", + "proper", 5699550326698755904, "TEXT", "#/texts/114", @@ -44330,7 +47396,7 @@ ], [ "sentence", - "", + "proper", 5699550326698755904, "TEXT", "#/texts/114", @@ -44582,7 +47648,7 @@ ], [ "sentence", - "", + "proper", 5699550326698755904, "TEXT", "#/texts/114", @@ -44708,7 +47774,7 @@ ], [ "sentence", - "", + "proper", 5699550326698755904, "TEXT", "#/texts/114", @@ -44813,7 +47879,7 @@ ], [ "sentence", - "", + "proper", 5699550326698755904, "TEXT", "#/texts/114", @@ -44981,7 +48047,7 @@ ], [ "sentence", - "", + "proper", 5699550326698755904, "TEXT", "#/texts/114", @@ -45086,7 +48152,7 @@ ], [ "sentence", - "", + "proper", 11609131422778723150, "TEXT", "#/texts/115", @@ -45191,7 +48257,7 @@ ], [ "sentence", - "", + "proper", 11609131422778723150, "TEXT", "#/texts/115", @@ -45338,7 +48404,7 @@ ], [ "sentence", - "", + "proper", 11609131422778723150, "TEXT", "#/texts/115", @@ -45464,7 +48530,7 @@ ], [ "sentence", - "", + "proper", 11609131422778723150, "TEXT", "#/texts/115", @@ -45569,7 +48635,7 @@ ], [ "sentence", - "", + "proper", 788128893109726279, "TEXT", "#/texts/116", @@ -45695,7 +48761,7 @@ ], [ "sentence", - "", + "proper", 788128893109726279, "TEXT", "#/texts/116", @@ -45905,7 +48971,7 @@ ], [ "sentence", - "", + "proper", 788128893109726279, "TEXT", "#/texts/116", @@ -46010,7 +49076,7 @@ ], [ "sentence", - "", + "proper", 7029344862946908483, "TEXT", "#/texts/117", @@ -46178,7 +49244,7 @@ ], [ "sentence", - "", + "proper", 7029344862946908483, "TEXT", "#/texts/117", @@ -46367,7 +49433,7 @@ ], [ "sentence", - "", + "proper", 7029344862946908483, "TEXT", "#/texts/117", @@ -46514,7 +49580,7 @@ ], [ "sentence", - "", + "proper", 7029344862946908483, "TEXT", "#/texts/117", @@ -46701,6 +49767,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -46745,7 +49832,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/118", @@ -46995,6 +50082,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", @@ -47205,6 +50313,27 @@ "11", "11" ], + [ + "sentence", + "improper", + 2144926686518491811, + "TEXT", + "#/texts/119", + 1.0, + 15441160910541485670, + 7629680776796918969, + null, + null, + 2, + 4, + 2, + 4, + 1, + 2, + true, + "of", + "of" + ], [ "numval", "ival", @@ -47228,7 +50357,7 @@ ], [ "sentence", - "", + "proper", 18333396269095847693, "TEXT", "#/texts/120", @@ -47354,7 +50483,7 @@ ], [ "sentence", - "", + "proper", 18333396269095847693, "TEXT", "#/texts/120", @@ -47459,7 +50588,7 @@ ], [ "sentence", - "", + "proper", 18333396269095847693, "TEXT", "#/texts/120", @@ -47522,7 +50651,7 @@ ], [ "sentence", - "", + "proper", 18333396269095847693, "TEXT", "#/texts/120", @@ -47627,7 +50756,7 @@ ], [ "sentence", - "", + "proper", 18333396269095847693, "TEXT", "#/texts/120", @@ -47774,7 +50903,112 @@ ], [ "sentence", - "", + "improper", + 4030998538427149966, + "TEXT", + "#/texts/121", + 1.0, + 2621907744440686475, + 9056515438346593466, + null, + null, + 2, + 39, + 2, + 39, + 1, + 9, + true, + "| CASE STUDY: OIL AND GAS EXPLORATION", + "| CASE STUDY: OIL AND GAS EXPLORATION" + ], + [ + "term", + "single-term", + 4030998538427149966, + "TEXT", + "#/texts/121", + 1.0, + 250883940722560618, + 5731782570955531308, + null, + null, + 4, + 14, + 4, + 14, + 2, + 4, + true, + "CASE STUDY", + "CASE STUDY" + ], + [ + "term", + "enum-term-mark-4", + 4030998538427149966, + "TEXT", + "#/texts/121", + 1.0, + 18038659283920252343, + 8396203383843088821, + null, + null, + 16, + 39, + 16, + 39, + 5, + 9, + true, + "OIL AND GAS EXPLORATION", + "OIL AND GAS EXPLORATION" + ], + [ + "term", + "single-term", + 4030998538427149966, + "TEXT", + "#/texts/121", + 1.0, + 12178341415896270517, + 129409062461846188, + null, + null, + 16, + 19, + 16, + 19, + 5, + 6, + true, + "OIL", + "OIL" + ], + [ + "term", + "single-term", + 4030998538427149966, + "TEXT", + "#/texts/121", + 1.0, + 7606713323162423099, + 12398485310842551463, + null, + null, + 24, + 39, + 24, + 39, + 7, + 9, + true, + "GAS EXPLORATION", + "GAS EXPLORATION" + ], + [ + "sentence", + "proper", 10295608624766759271, "TEXT", "#/texts/122", @@ -47900,7 +51134,7 @@ ], [ "sentence", - "", + "proper", 10295608624766759271, "TEXT", "#/texts/122", @@ -48173,7 +51407,7 @@ ], [ "sentence", - "", + "proper", 10295608624766759271, "TEXT", "#/texts/122", @@ -48299,7 +51533,7 @@ ], [ "sentence", - "", + "proper", 10295608624766759271, "TEXT", "#/texts/122", @@ -48719,7 +51953,7 @@ ], [ "sentence", - "", + "proper", 10295608624766759271, "TEXT", "#/texts/122", @@ -48950,7 +52184,7 @@ ], [ "sentence", - "", + "proper", 10633780781731536747, "TEXT", "#/texts/123", @@ -49097,7 +52331,7 @@ ], [ "sentence", - "", + "proper", 10633780781731536747, "TEXT", "#/texts/123", @@ -49202,7 +52436,7 @@ ], [ "sentence", - "", + "proper", 10633780781731536747, "TEXT", "#/texts/123", @@ -49412,7 +52646,7 @@ ], [ "sentence", - "", + "proper", 10633780781731536747, "TEXT", "#/texts/123", @@ -49580,7 +52814,7 @@ ], [ "sentence", - "", + "proper", 10633780781731536747, "TEXT", "#/texts/123", @@ -49706,7 +52940,7 @@ ], [ "sentence", - "", + "proper", 10633780781731536747, "TEXT", "#/texts/123", @@ -49874,7 +53108,7 @@ ], [ "sentence", - "", + "proper", 10633780781731536747, "TEXT", "#/texts/123", @@ -50042,7 +53276,7 @@ ], [ "sentence", - "", + "proper", 10633780781731536747, "TEXT", "#/texts/123", @@ -50168,7 +53402,7 @@ ], [ "sentence", - "", + "proper", 10633780781731536747, "TEXT", "#/texts/123", @@ -50481,6 +53715,27 @@ "constraints", "constraints" ], + [ + "sentence", + "improper", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 7956387888440268806, + 9171487369932444199, + null, + null, + 1163, + 1189, + 1163, + 1189, + 222, + 227, + true, + "For example, the reservoir", + "For example, the reservoir" + ], [ "term", "single-term", @@ -50544,6 +53799,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -50588,7 +53864,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/124", @@ -50838,6 +54114,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", @@ -51048,6 +54345,27 @@ "12", "12" ], + [ + "sentence", + "improper", + 4361549257087816853, + "TEXT", + "#/texts/126", + 1.0, + 15441160910541485670, + 9983842722140753537, + null, + null, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "of", + "of" + ], [ "numval", "ival", @@ -51069,6 +54387,27 @@ "15", "15" ], + [ + "sentence", + "improper", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 12390004558367100719, + 10045326222207556847, + null, + null, + 0, + 60, + 0, + 60, + 0, + 12, + true, + "formation has to have a lower depth than the seal formation.", + "formation has to have a lower depth than the seal formation." + ], [ "term", "single-term", @@ -51134,7 +54473,7 @@ ], [ "sentence", - "", + "proper", 10195664788154887804, "TEXT", "#/texts/127", @@ -51302,7 +54641,7 @@ ], [ "sentence", - "", + "proper", 7538054744015619336, "TEXT", "#/texts/128", @@ -51512,7 +54851,7 @@ ], [ "sentence", - "", + "proper", 7538054744015619336, "TEXT", "#/texts/128", @@ -51596,7 +54935,7 @@ ], [ "sentence", - "", + "proper", 7538054744015619336, "TEXT", "#/texts/128", @@ -51827,7 +55166,7 @@ ], [ "sentence", - "", + "proper", 7538054744015619336, "TEXT", "#/texts/128", @@ -51995,7 +55334,7 @@ ], [ "sentence", - "", + "proper", 12426662601736619109, "TEXT", "#/texts/129", @@ -52331,7 +55670,7 @@ ], [ "sentence", - "", + "proper", 12426662601736619109, "TEXT", "#/texts/129", @@ -52497,6 +55836,27 @@ "queryable KG", "queryable KG" ], + [ + "sentence", + "improper", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 15728969365616328105, + 17085098925010968686, + null, + null, + 411, + 460, + 411, + 460, + 78, + 87, + true, + "The key components of this specific pipeline are,", + "The key components of this specific pipeline are," + ], [ "term", "single-term", @@ -52560,6 +55920,111 @@ "1", "1" ], + [ + "sentence", + "improper", + 4162783521620221579, + "TEXT", + "#/texts/130", + 1.0, + 11889799938246613874, + 1416225359394283175, + null, + null, + 1, + 57, + 1, + 57, + 1, + 12, + true, + ". the conversion of PDF documents into JSON through CCS,", + ". the conversion of PDF documents into JSON through CCS," + ], + [ + "term", + "single-term", + 4162783521620221579, + "TEXT", + "#/texts/130", + 1.0, + 2703018679320364082, + 10926776708418742663, + null, + null, + 7, + 17, + 7, + 17, + 3, + 4, + true, + "conversion", + "conversion" + ], + [ + "term", + "single-term", + 4162783521620221579, + "TEXT", + "#/texts/130", + 1.0, + 12653831733608918357, + 7130150499537747604, + null, + null, + 21, + 34, + 21, + 34, + 5, + 7, + true, + "PDF documents", + "PDF documents" + ], + [ + "term", + "single-term", + 4162783521620221579, + "TEXT", + "#/texts/130", + 1.0, + 389609625541450799, + 1148415792138977757, + null, + null, + 40, + 44, + 40, + 44, + 8, + 9, + true, + "JSON", + "JSON" + ], + [ + "term", + "single-term", + 4162783521620221579, + "TEXT", + "#/texts/130", + 1.0, + 12178341415896221596, + 1383368125015642049, + null, + null, + 53, + 56, + 53, + 56, + 10, + 11, + true, + "CCS", + "CCS" + ], [ "numval", "ival", @@ -52581,6 +56046,111 @@ "2", "2" ], + [ + "sentence", + "improper", + 5135259059216244866, + "TEXT", + "#/texts/131", + 1.0, + 10360766580882649633, + 9418275448049882729, + null, + null, + 1, + 65, + 1, + 65, + 1, + 16, + true, + ". the creation of the KG in the CPS from the JSON documents, and", + ". the creation of the KG in the CPS from the JSON documents, and" + ], + [ + "term", + "single-term", + 5135259059216244866, + "TEXT", + "#/texts/131", + 1.0, + 14652282930648707075, + 1296612546179459976, + null, + null, + 7, + 15, + 7, + 15, + 3, + 4, + true, + "creation", + "creation" + ], + [ + "term", + "single-term", + 5135259059216244866, + "TEXT", + "#/texts/131", + 1.0, + 15441160910541480204, + 13433689011768330761, + null, + null, + 23, + 25, + 23, + 25, + 6, + 7, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 5135259059216244866, + "TEXT", + "#/texts/131", + 1.0, + 12178341415896222428, + 8639440310989100808, + null, + null, + 33, + 36, + 33, + 36, + 9, + 10, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 5135259059216244866, + "TEXT", + "#/texts/131", + 1.0, + 2351632970423386126, + 8632517790462141146, + null, + null, + 46, + 60, + 46, + 60, + 12, + 14, + true, + "JSON documents", + "JSON documents" + ], [ "numval", "ival", @@ -52602,6 +56172,27 @@ "3", "3" ], + [ + "sentence", + "improper", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 7192632164357775195, + 2976029623734261195, + null, + null, + 1, + 22, + 1, + 22, + 1, + 6, + true, + ". the querying of the", + ". the querying of the" + ], [ "term", "single-term", @@ -52625,7 +56216,7 @@ ], [ "sentence", - "", + "proper", 16998817296948099535, "TEXT", "#/texts/132", @@ -52730,7 +56321,7 @@ ], [ "sentence", - "", + "proper", 1205649569241141618, "TEXT", "#/texts/133", @@ -52919,7 +56510,7 @@ ], [ "sentence", - "", + "proper", 1205649569241141618, "TEXT", "#/texts/133", @@ -53045,7 +56636,7 @@ ], [ "sentence", - "", + "proper", 1205649569241141618, "TEXT", "#/texts/133", @@ -53213,7 +56804,7 @@ ], [ "sentence", - "", + "proper", 1205649569241141618, "TEXT", "#/texts/133", @@ -53339,7 +56930,7 @@ ], [ "sentence", - "", + "proper", 1205649569241141618, "TEXT", "#/texts/133", @@ -53528,7 +57119,7 @@ ], [ "sentence", - "", + "proper", 1205649569241141618, "TEXT", "#/texts/133", @@ -53633,7 +57224,7 @@ ], [ "sentence", - "", + "proper", 1205649569241141618, "TEXT", "#/texts/133", @@ -53885,7 +57476,7 @@ ], [ "sentence", - "", + "proper", 12257840490666828590, "TEXT", "#/texts/134", @@ -54179,7 +57770,7 @@ ], [ "sentence", - "", + "proper", 12257840490666828590, "TEXT", "#/texts/134", @@ -54389,7 +57980,7 @@ ], [ "sentence", - "", + "proper", 12257840490666828590, "TEXT", "#/texts/134", @@ -54494,7 +58085,7 @@ ], [ "sentence", - "", + "proper", 12257840490666828590, "TEXT", "#/texts/134", @@ -54641,7 +58232,7 @@ ], [ "sentence", - "", + "proper", 7040847965650746591, "TEXT", "#/texts/135", @@ -54851,7 +58442,7 @@ ], [ "sentence", - "", + "proper", 7040847965650746591, "TEXT", "#/texts/135", @@ -55271,7 +58862,7 @@ ], [ "sentence", - "", + "proper", 7040847965650746591, "TEXT", "#/texts/135", @@ -55523,7 +59114,7 @@ ], [ "sentence", - "", + "proper", 7040847965650746591, "TEXT", "#/texts/135", @@ -55733,7 +59324,7 @@ ], [ "sentence", - "", + "proper", 7040847965650746591, "TEXT", "#/texts/135", @@ -55901,7 +59492,7 @@ ], [ "sentence", - "", + "proper", 7927601225025519287, "TEXT", "#/texts/136", @@ -56025,6 +59616,27 @@ "evaluation workflow", "evaluation workflow" ], + [ + "sentence", + "improper", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 460808421414733701, + 12585670206319025150, + null, + null, + 80, + 218, + 80, + 218, + 17, + 42, + true, + "This workflow allows us to identify PSEs and their connected properties in the Knowledge Graph, for example, their age, formation and rock", + "This workflow allows us to identify PSEs and their connected properties in the Knowledge Graph, for example, their age, formation and rock" + ], [ "term", "single-term", @@ -56235,6 +59847,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -56279,7 +59912,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/137", @@ -56529,6 +60162,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", @@ -56739,6 +60393,27 @@ "13", "13" ], + [ + "sentence", + "improper", + 4361549257087816853, + "TEXT", + "#/texts/139", + 1.0, + 15441160910541485670, + 9983842722140753537, + null, + null, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "of", + "of" + ], [ "numval", "ival", @@ -56760,6 +60435,27 @@ "15", "15" ], + [ + "sentence", + "improper", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 4575797946527946612, + 5297035185336180529, + null, + null, + 0, + 12, + 0, + 12, + 0, + 2, + true, + "composition.", + "composition." + ], [ "term", "single-term", @@ -56783,7 +60479,7 @@ ], [ "sentence", - "", + "proper", 8207961846673301043, "TEXT", "#/texts/140", @@ -56888,7 +60584,7 @@ ], [ "sentence", - "", + "proper", 8207961846673301043, "TEXT", "#/texts/140", @@ -57014,7 +60710,7 @@ ], [ "sentence", - "", + "proper", 11998199584890640594, "TEXT", "#/texts/141", @@ -57161,7 +60857,7 @@ ], [ "sentence", - "", + "proper", 11998199584890640594, "TEXT", "#/texts/141", @@ -57266,7 +60962,7 @@ ], [ "sentence", - "", + "proper", 11998199584890640594, "TEXT", "#/texts/141", @@ -57539,7 +61235,7 @@ ], [ "sentence", - "", + "proper", 11998199584890640594, "TEXT", "#/texts/141", @@ -57581,7 +61277,7 @@ ], [ "sentence", - "", + "proper", 11998199584890640594, "TEXT", "#/texts/141", @@ -57854,7 +61550,7 @@ ], [ "sentence", - "", + "proper", 11998199584890640594, "TEXT", "#/texts/141", @@ -58106,7 +61802,7 @@ ], [ "sentence", - "", + "proper", 11998199584890640594, "TEXT", "#/texts/141", @@ -58211,7 +61907,49 @@ ], [ "sentence", - "", + "improper", + 16446129547721407877, + "TEXT", + "#/texts/142", + 1.0, + 16842535493722576894, + 9070661535139415199, + null, + null, + 2, + 15, + 2, + 15, + 1, + 3, + true, + "| CONCLUSIONS", + "| CONCLUSIONS" + ], + [ + "term", + "single-term", + 16446129547721407877, + "TEXT", + "#/texts/142", + 1.0, + 4494148153097800926, + 5377935386843765038, + null, + null, + 4, + 15, + 4, + 15, + 2, + 3, + true, + "CONCLUSIONS", + "CONCLUSIONS" + ], + [ + "sentence", + "proper", 6720443978031524294, "TEXT", "#/texts/143", @@ -58421,7 +62159,7 @@ ], [ "sentence", - "", + "proper", 6720443978031524294, "TEXT", "#/texts/143", @@ -58629,6 +62367,27 @@ "scale analytics", "scale analytics" ], + [ + "sentence", + "improper", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 6626158461069440872, + 1924856031834324320, + null, + null, + 450, + 699, + 450, + 699, + 72, + 117, + true, + "This is accomplished through three fundamental design considerations: (1) We do not require manual data curation or annotation; (2) We built a scalable, efficient architecture to support the ingestion, processing and query workloads, all embedded in", + "This is accomplished through three fundamental design considerations: (1) We do not require manual data curation or annotation; (2) We built a scalable, efficient architecture to support the ingestion, processing and query workloads, all embedded in" + ], [ "term", "single-term", @@ -58923,6 +62682,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -58967,7 +62747,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/144", @@ -59217,6 +62997,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", @@ -59406,9 +63207,30 @@ "applicable Creative Commons License", "applicable Creative Commons License" ], + [ + "sentence", + "improper", + 2144926730621142072, + "TEXT", + "#/texts/145", + 1.0, + 16380805732317250115, + 5189702932560370903, + null, + null, + 0, + 6, + 0, + 6, + 0, + 3, + true, + "14of15", + "14of15" + ], [ "reference", - "citation-number", + "reference-number", 2144926730621142072, "TEXT", "#/texts/145", @@ -59427,6 +63249,27 @@ "14of15", "14of15" ], + [ + "sentence", + "improper", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 8274346334061681675, + 2314658471919352980, + null, + null, + 0, + 22, + 0, + 22, + 0, + 5, + true, + "a single platform; and", + "a single platform; and" + ], [ "term", "single-term", @@ -59492,7 +63335,7 @@ ], [ "sentence", - "", + "proper", 14222671032550229818, "TEXT", "#/texts/146", @@ -59576,7 +63419,7 @@ ], [ "sentence", - "", + "proper", 17486770941839589126, "TEXT", "#/texts/147", @@ -59786,7 +63629,7 @@ ], [ "sentence", - "", + "proper", 17486770941839589126, "TEXT", "#/texts/147", @@ -59975,7 +63818,7 @@ ], [ "sentence", - "", + "proper", 16574813224778118841, "TEXT", "#/texts/148", @@ -60206,7 +64049,49 @@ ], [ "sentence", - "", + "improper", + 3356142343274371864, + "TEXT", + "#/texts/149", + 1.0, + 17772737780533561635, + 1151622547388974028, + null, + null, + 0, + 27, + 0, + 27, + 0, + 3, + true, + "DATA AVAILABILITY STATEMENT", + "DATA AVAILABILITY STATEMENT" + ], + [ + "term", + "single-term", + 3356142343274371864, + "TEXT", + "#/texts/149", + 1.0, + 17772737780533561635, + 1151622547388974028, + null, + null, + 0, + 27, + 0, + 27, + 0, + 3, + true, + "DATA AVAILABILITY STATEMENT", + "DATA AVAILABILITY STATEMENT" + ], + [ + "sentence", + "proper", 4778022085288441371, "TEXT", "#/texts/150", @@ -60267,6 +64152,48 @@ "third party restrictions", "third party restrictions" ], + [ + "sentence", + "improper", + 4361549257598904601, + "TEXT", + "#/texts/151", + 1.0, + 329104162230294308, + 18235196107168082832, + null, + null, + 0, + 5, + 0, + 5, + 0, + 1, + true, + "ORCID", + "ORCID" + ], + [ + "sentence", + "improper", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 9234676532203821814, + 9496298137639648491, + null, + null, + 0, + 122, + 0, + 122, + 0, + 40, + true, + "Peter W. J. Staar https://orcid.org/0000-0002-8088-0823 Michele Dolfi https://orcid.org/0000-0001-7216-8505 Christoph Auer", + "Peter W. J. Staar https://orcid.org/0000-0002-8088-0823 Michele Dolfi https://orcid.org/0000-0001-7216-8505 Christoph Auer" + ], [ "name", "person-name", @@ -60372,6 +64299,27 @@ "8088-0823", "8088-0823" ], + [ + "term", + "single-term", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 1571808557594152175, + 14010767871411326211, + null, + null, + 56, + 69, + 56, + 69, + 21, + 23, + true, + "Michele Dolfi", + "Michele Dolfi" + ], [ "link", "url", @@ -60456,6 +64404,27 @@ "7216-8505", "7216-8505" ], + [ + "term", + "single-term", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 9737597816447750448, + 4222775986855314534, + null, + null, + 108, + 122, + 108, + 122, + 38, + 40, + true, + "Christoph Auer", + "Christoph Auer" + ], [ "link", "url", @@ -60521,7 +64490,70 @@ ], [ "sentence", - "", + "improper", + 8500729849894221215, + "TEXT", + "#/texts/153", + 1.0, + 14650266124350583462, + 13656738482730710169, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "ENDNOTES", + "ENDNOTES" + ], + [ + "term", + "single-term", + 8500729849894221215, + "TEXT", + "#/texts/153", + 1.0, + 14650266124350583462, + 13656738482730710169, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "ENDNOTES", + "ENDNOTES" + ], + [ + "sentence", + "improper", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 17767354399704235138, + 12639988856153391105, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "*", + "*" + ], + [ + "sentence", + "proper", 7813503946963688644, "TEXT", "#/texts/154", @@ -60710,7 +64742,28 @@ ], [ "sentence", - "", + "improper", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 17767354399704341640, + 1655277645618781842, + null, + null, + 0, + 3, + 0, + 1, + 0, + 1, + true, + "\u2020", + "\u2020" + ], + [ + "sentence", + "proper", 9230987401345399746, "TEXT", "#/texts/155", @@ -60815,7 +64868,7 @@ ], [ "sentence", - "", + "proper", 9230987401345399746, "TEXT", "#/texts/155", @@ -60918,6 +64971,27 @@ "representations", "representations" ], + [ + "sentence", + "improper", + 1997735398126013155, + "TEXT", + "#/texts/156", + 1.0, + 17767354399704341641, + 15453018270956350746, + null, + null, + 0, + 3, + 0, + 1, + 0, + 1, + true, + "\u2021", + "\u2021" + ], [ "link", "url", @@ -60962,7 +65036,28 @@ ], [ "sentence", - "", + "improper", + 13566764974477978642, + "TEXT", + "#/texts/157", + 1.0, + 17767354399704232711, + 4203992233791646194, + null, + null, + 0, + 2, + 0, + 1, + 0, + 1, + true, + "\u00a7", + "\u00a7" + ], + [ + "sentence", + "proper", 13566764974477978642, "TEXT", "#/texts/157", @@ -61067,7 +65162,28 @@ ], [ "sentence", - "", + "improper", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 17767354399704232726, + 13902072770511598079, + null, + null, + 0, + 2, + 0, + 1, + 0, + 1, + true, + "\u00b6", + "\u00b6" + ], + [ + "sentence", + "proper", 4925537010788978399, "TEXT", "#/texts/158", @@ -61256,7 +65372,7 @@ ], [ "sentence", - "", + "proper", 4925537010788978399, "TEXT", "#/texts/158", @@ -61340,7 +65456,28 @@ ], [ "sentence", - "", + "improper", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 15441160910541482490, + 16703317440425394779, + null, + null, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "**", + "**" + ], + [ + "sentence", + "proper", 16552665876195410077, "TEXT", "#/texts/159", @@ -61529,7 +65666,28 @@ ], [ "sentence", - "", + "improper", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 15441160910540903299, + 12657466972806319238, + null, + null, + 0, + 6, + 0, + 2, + 0, + 1, + true, + "\u2020\u2020", + "\u2020\u2020" + ], + [ + "sentence", + "proper", 17579390613842440572, "TEXT", "#/texts/160", @@ -61821,6 +65979,27 @@ "4", "4" ], + [ + "sentence", + "improper", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 15713827668903361733, + 72772065845729394, + null, + null, + 0, + 156, + 0, + 152, + 0, + 40, + true, + "\u2021\u2021 We chose Neo4J as a reference since it is currently the most popular graph database solution, see https://db-engines.com/en/ranking_ trend/graph+dbms", + "\u2021\u2021 We chose Neo4J as a reference since it is currently the most popular graph database solution, see https://db-engines.com/en/ranking_ trend/graph+dbms" + ], [ "expression", "wtoken-concatenation", @@ -61842,6 +66021,27 @@ "Neo4J", "Neo4J" ], + [ + "term", + "single-term", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 12178341415896300292, + 17809976417017763541, + null, + null, + 16, + 19, + 12, + 15, + 3, + 4, + true, + "Neo", + "Neo" + ], [ "numval", "ival", @@ -61863,6 +66063,48 @@ "4", "4" ], + [ + "term", + "single-term", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 6165957175602596780, + 12883719775212934404, + null, + null, + 27, + 36, + 23, + 32, + 8, + 9, + true, + "reference", + "reference" + ], + [ + "term", + "single-term", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 17930183089767229669, + 15389002456544844346, + null, + null, + 68, + 99, + 64, + 95, + 15, + 19, + true, + "popular graph database solution", + "popular graph database solution" + ], [ "link", "url", @@ -61906,8 +66148,29 @@ "https://db-engines.com/en/ranking_" ], [ - "reference", - "url", + "term", + "single-term", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 2831507266554097914, + 1808153544976831155, + null, + null, + 146, + 156, + 142, + 152, + 37, + 40, + true, + "graph+dbms", + "graph+dbms" + ], + [ + "sentence", + "improper", 11085577343317113173, "TEXT", "#/texts/162", @@ -61926,9 +66189,114 @@ "\u00a7\u00a7 http://graph500.org/", "\u00a7\u00a7 http://graph500.org/" ], + [ + "reference", + "authors", + 11085577343317113173, + "TEXT", + "#/texts/162", + 1.0, + 15441160910541474145, + 13051332902755974487, + null, + null, + 0, + 4, + 0, + 2, + 0, + 1, + true, + "\u00a7\u00a7", + "\u00a7\u00a7" + ], [ "reference", "url", + 11085577343317113173, + "TEXT", + "#/texts/162", + 1.0, + 1244385257359010144, + 3127203609822040452, + null, + null, + 5, + 25, + 3, + 23, + 1, + 10, + true, + "http://graph500.org/", + "http://graph500.org/" + ], + [ + "term", + "single-term", + 11085577343317113173, + "TEXT", + "#/texts/162", + 1.0, + 389609625695173007, + 3836236615687027220, + null, + null, + 5, + 9, + 3, + 7, + 1, + 2, + true, + "http", + "http" + ], + [ + "term", + "single-term", + 11085577343317113173, + "TEXT", + "#/texts/162", + 1.0, + 329104159211247965, + 10630887676941884603, + null, + null, + 12, + 17, + 10, + 15, + 5, + 6, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 11085577343317113173, + "TEXT", + "#/texts/162", + 1.0, + 389609625618846162, + 3823428058951951811, + null, + null, + 21, + 25, + 19, + 23, + 8, + 10, + true, + "org/", + "org/" + ], + [ + "sentence", + "improper", 1792096630133661292, "TEXT", "#/texts/163", @@ -61947,9 +66315,72 @@ "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html", "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html" ], + [ + "reference", + "reference-number", + 1792096630133661292, + "TEXT", + "#/texts/163", + 1.0, + 15441160910541473069, + 11916476354364763757, + null, + null, + 0, + 4, + 0, + 2, + 0, + 1, + true, + "\u00b6\u00b6", + "\u00b6\u00b6" + ], + [ + "reference", + "url", + 1792096630133661292, + "TEXT", + "#/texts/163", + 1.0, + 773494417653844359, + 2919336056783602673, + null, + null, + 5, + 54, + 3, + 52, + 1, + 18, + true, + "https://snap.stanford.edu/data/higgs-twitter.html", + "https://snap.stanford.edu/data/higgs-twitter.html" + ], [ "sentence", - "", + "improper", + 11462638369524745676, + "TEXT", + "#/texts/164", + 1.0, + 12178341415896407674, + 16045680922123672072, + null, + null, + 0, + 3, + 0, + 3, + 0, + 1, + true, + "***", + "***" + ], + [ + "sentence", + "proper", 11462638369524745676, "TEXT", "#/texts/164", @@ -62011,8 +66442,8 @@ "float value" ], [ - "reference", - "url", + "sentence", + "improper", 16611805225457383637, "TEXT", "#/texts/165", @@ -62031,9 +66462,135 @@ "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/" ], + [ + "reference", + "reference-number", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 12178341417198250328, + 1575262081256116873, + null, + null, + 0, + 9, + 0, + 3, + 0, + 1, + true, + "\u2020\u2020\u2020", + "\u2020\u2020\u2020" + ], [ "reference", "url", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 14268042929131437441, + 234824939381677632, + null, + null, + 10, + 75, + 4, + 69, + 1, + 23, + true, + "https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", + "https://neo4j.com/developer/guide-sizing-and-hardware-calculator/" + ], + [ + "term", + "single-term", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 329104161533497127, + 16180224231918255666, + null, + null, + 10, + 15, + 4, + 9, + 1, + 2, + true, + "https", + "https" + ], + [ + "term", + "single-term", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 3943595989042214060, + 16915067796660432078, + null, + null, + 24, + 37, + 18, + 31, + 9, + 12, + true, + "com/developer", + "com/developer" + ], + [ + "term", + "single-term", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 14637910066475074126, + 6114961828553378919, + null, + null, + 55, + 63, + 49, + 57, + 19, + 20, + true, + "hardware", + "hardware" + ], + [ + "term", + "single-term", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 2879593163591796188, + 5335026245912853509, + null, + null, + 64, + 74, + 58, + 68, + 21, + 22, + true, + "calculator", + "calculator" + ], + [ + "sentence", + "improper", 1531505125666754945, "TEXT", "#/texts/166", @@ -62052,9 +66609,135 @@ "\u2021\u2021\u2021 https://www.naturalearthdata.com/", "\u2021\u2021\u2021 https://www.naturalearthdata.com/" ], + [ + "reference", + "reference-number", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 12178341417198254221, + 18213045800656724647, + null, + null, + 0, + 9, + 0, + 3, + 0, + 1, + true, + "\u2021\u2021\u2021", + "\u2021\u2021\u2021" + ], [ "reference", "url", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 10760936391898733584, + 8275004636990824295, + null, + null, + 10, + 43, + 4, + 37, + 1, + 11, + true, + "https://www.naturalearthdata.com/", + "https://www.naturalearthdata.com/" + ], + [ + "term", + "single-term", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 329104161533497127, + 17768638491100025109, + null, + null, + 10, + 15, + 4, + 9, + 1, + 2, + true, + "https", + "https" + ], + [ + "term", + "single-term", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 12178341415895524451, + 18051100498086497778, + null, + null, + 18, + 21, + 12, + 15, + 5, + 6, + true, + "www", + "www" + ], + [ + "term", + "single-term", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 2943004857435312037, + 9330444828971529586, + null, + null, + 22, + 38, + 16, + 32, + 7, + 8, + true, + "naturalearthdata", + "naturalearthdata" + ], + [ + "term", + "single-term", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 389609625695971718, + 4325380352130131677, + null, + null, + 39, + 43, + 33, + 37, + 9, + 11, + true, + "com/", + "com/" + ], + [ + "sentence", + "improper", 15684389308320953629, "TEXT", "#/texts/167", @@ -62075,7 +66758,133 @@ ], [ "reference", - "author", + "reference-number", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 12178341415889410024, + 11239483387003711537, + null, + null, + 0, + 6, + 0, + 3, + 0, + 1, + true, + "\u00a7\u00a7\u00a7", + "\u00a7\u00a7\u00a7" + ], + [ + "reference", + "url", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 449425663079441853, + 5532800466031663479, + null, + null, + 7, + 36, + 4, + 33, + 1, + 11, + true, + "https://www.ccreservoirs.com/", + "https://www.ccreservoirs.com/" + ], + [ + "term", + "single-term", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 329104161533497127, + 4064657654566889450, + null, + null, + 7, + 12, + 4, + 9, + 1, + 2, + true, + "https", + "https" + ], + [ + "term", + "single-term", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 12178341415895524451, + 11239821040174356665, + null, + null, + 15, + 18, + 12, + 15, + 5, + 6, + true, + "www", + "www" + ], + [ + "term", + "single-term", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 4898272711883537501, + 319067657400806549, + null, + null, + 19, + 31, + 16, + 28, + 7, + 8, + true, + "ccreservoirs", + "ccreservoirs" + ], + [ + "term", + "single-term", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 389609625695971718, + 13520393129950967142, + null, + null, + 32, + 36, + 29, + 33, + 9, + 11, + true, + "com/", + "com/" + ], + [ + "sentence", + "improper", 14590754343934702701, "TEXT", "#/texts/168", @@ -62096,28 +66905,91 @@ ], [ "reference", - "citation-number", + "authors", + 14590754343934702701, + "TEXT", + "#/texts/168", + 1.0, + 1858797456585454232, + 2809842144121602219, + null, + null, + 0, + 10, + 0, + 10, + 0, + 1, + true, + "REFERENCES", + "REFERENCES" + ], + [ + "term", + "single-term", + 14590754343934702701, + "TEXT", + "#/texts/168", + 1.0, + 1858797456585454232, + 2809842144121602219, + null, + null, + 0, + 10, + 0, + 10, + 0, + 1, + true, + "REFERENCES", + "REFERENCES" + ], + [ + "sentence", + "improper", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 15441160910541481980, + 8386387568042747678, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "1.", + "1." + ], + [ + "reference", + "reference-number", 10480452763767134455, "TEXT", "#/texts/169", 1.0, 17767354399704235161, - 16208788960124925205, + 16208788960124925204, null, null, 0, - 1, + 2, 0, - 1, + 2, 0, - 1, + 2, true, "1", - "1" + "1." ], [ "sentence", - "", + "proper", 10480452763767134455, "TEXT", "#/texts/169", @@ -62138,24 +67010,24 @@ ], [ "reference", - "author", + "authors", 10480452763767134455, "TEXT", "#/texts/169", 1.0, - 11879540473470058199, - 12427853451193245392, + 14045775576648193325, + 8244242289281145129, null, null, 3, - 17, + 52, 3, - 17, + 52, 2, - 5, + 15, true, - "Staar Peter WJ", - "Staar Peter WJ" + "Staar Peter WJ, Michele D, Christoph A, Costas B", + "Staar Peter WJ, Michele D, Christoph A, Costas B." ], [ "term", @@ -62178,27 +67050,6 @@ "Staar Peter WJ", "Staar Peter WJ" ], - [ - "reference", - "author", - 10480452763767134455, - "TEXT", - "#/texts/169", - 1.0, - 6613162031266505134, - 16138057201536909006, - null, - null, - 19, - 28, - 19, - 28, - 6, - 8, - true, - "Michele D", - "Michele D" - ], [ "term", "single-term", @@ -62220,27 +67071,6 @@ "Michele D", "Michele D" ], - [ - "reference", - "author", - 10480452763767134455, - "TEXT", - "#/texts/169", - 1.0, - 4457167794784606628, - 16487730286724222122, - null, - null, - 30, - 41, - 30, - 41, - 9, - 11, - true, - "Christoph A", - "Christoph A" - ], [ "term", "single-term", @@ -62283,27 +67113,6 @@ "Costas B Corpus", "Costas B. Corpus" ], - [ - "reference", - "author", - 10480452763767134455, - "TEXT", - "#/texts/169", - 1.0, - 6560601913145533820, - 12701816617387729389, - null, - null, - 43, - 52, - 43, - 52, - 12, - 15, - true, - "Costas B.", - "Costas B." - ], [ "reference", "title", @@ -62311,19 +67120,19 @@ "TEXT", "#/texts/169", 1.0, - 3346237141252876309, - 13011534883222988606, + 10495776784264289684, + 6718213806780973142, null, null, 53, - 136, + 146, 53, - 136, + 146, 15, - 28, + 33, true, - "Corpus conversion service: a machine learning platform to ingest documents at scale", - "Corpus conversion service: a machine learning platform to ingest documents at scale" + "Corpus conversion service: a machine learning platform to ingest documents at scale. KDD '18", + "Corpus conversion service: a machine learning platform to ingest documents at scale. KDD '18." ], [ "term", @@ -62432,7 +67241,7 @@ ], [ "sentence", - "", + "proper", 10480452763767134455, "TEXT", "#/texts/169", @@ -62451,27 +67260,6 @@ "KDD '18.", "KDD '18." ], - [ - "reference", - "container-title", - 10480452763767134455, - "TEXT", - "#/texts/169", - 1.0, - 8106351470704634736, - 17995829417296331915, - null, - null, - 138, - 145, - 138, - 145, - 29, - 32, - true, - "KDD '18", - "KDD '18" - ], [ "term", "single-term", @@ -62495,7 +67283,7 @@ ], [ "sentence", - "", + "proper", 10480452763767134455, "TEXT", "#/texts/169", @@ -62514,27 +67302,6 @@ "New York, NY: ACM; 2018:774-782.", "New York, NY: ACM; 2018:774-782." ], - [ - "reference", - "location", - 10480452763767134455, - "TEXT", - "#/texts/169", - 1.0, - 6517026456739326224, - 8283202906327186871, - null, - null, - 147, - 160, - 147, - 160, - 33, - 38, - true, - "New York, NY:", - "New York, NY:" - ], [ "term", "single-term", @@ -62605,44 +67372,65 @@ "TEXT", "#/texts/169", 1.0, - 8751415320993915403, - 4351521141262751348, + 15683444325968468739, + 16996762089080527682, null, null, - 164, - 178, - 164, - 178, - 39, - 45, + 166, + 179, + 166, + 179, + 40, + 46, + true, + "2018:774-782", + "2018:774-782." + ], + [ + "sentence", + "improper", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 15441160910541481780, + 9679667702159864047, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, true, - "; 2018:774-782", - "; 2018:774-782" + "2.", + "2." ], [ "reference", - "citation-number", + "reference-number", 11866471329779366855, "TEXT", "#/texts/170", 1.0, 17767354399704235162, - 7639029136784882071, + 7639029136784882064, null, null, 0, - 1, + 2, 0, - 1, + 2, 0, - 1, + 2, true, "2", - "2" + "2." ], [ "sentence", - "", + "proper", 11866471329779366855, "TEXT", "#/texts/170", @@ -62663,24 +67451,24 @@ ], [ "reference", - "author", + "authors", 11866471329779366855, "TEXT", "#/texts/170", 1.0, - 11879540473470058199, - 6818801233014041471, + 4357204087924678948, + 11503245573532489830, null, null, 3, - 17, + 42, 3, - 17, + 42, 2, - 5, + 15, true, - "Staar Peter WJ", - "Staar Peter WJ" + "Staar Peter WJ, Kl BP, Roxana I, et al", + "Staar Peter WJ, Kl BP, Roxana I, et al." ], [ "term", @@ -62703,27 +67491,6 @@ "Staar Peter WJ", "Staar Peter WJ" ], - [ - "reference", - "author", - 11866471329779366855, - "TEXT", - "#/texts/170", - 1.0, - 329104159232588720, - 1186563503698797045, - null, - null, - 19, - 24, - 19, - 24, - 6, - 8, - true, - "Kl BP", - "Kl BP" - ], [ "term", "single-term", @@ -62745,27 +67512,6 @@ "Kl BP", "Kl BP" ], - [ - "reference", - "author", - 11866471329779366855, - "TEXT", - "#/texts/170", - 1.0, - 14652187939873997159, - 718674333250886747, - null, - null, - 26, - 34, - 26, - 34, - 9, - 11, - true, - "Roxana I", - "Roxana I" - ], [ "term", "single-term", @@ -62816,18 +67562,18 @@ "#/texts/170", 1.0, 7105706713138331748, - 8882313339767931673, + 8882313339767931654, null, null, 43, - 129, + 130, 43, - 129, + 130, 15, - 29, + 30, true, "Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance", - "Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance" + "Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance." ], [ "term", @@ -62978,7 +67724,7 @@ ], [ "sentence", - "", + "proper", 11866471329779366855, "TEXT", "#/texts/170", @@ -62997,27 +67743,6 @@ "Chicago, IL: IEEE; 2016:812-821.", "Chicago, IL: IEEE; 2016:812-821." ], - [ - "reference", - "journal", - 11866471329779366855, - "TEXT", - "#/texts/170", - 1.0, - 8106350741667376964, - 2037770047407614341, - null, - null, - 131, - 138, - 131, - 138, - 30, - 31, - true, - "Chicago", - "Chicago" - ], [ "term", "single-term", @@ -63088,44 +67813,65 @@ "TEXT", "#/texts/170", 1.0, - 389609625548777056, - 17963509656509068572, + 325347433255123998, + 9431696322833619114, null, null, 150, - 154, + 163, 150, - 154, + 163, 36, - 37, + 42, true, - "2016", - "2016" + "2016:812-821", + "2016:812-821." + ], + [ + "sentence", + "improper", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 15441160910541481845, + 8041722171934135301, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "3.", + "3." ], [ "reference", - "citation-number", + "reference-number", 6016885898370676469, "TEXT", "#/texts/171", 1.0, 17767354399704235163, - 13510159049290326510, + 13510159049290326505, null, null, 0, - 1, + 2, 0, - 1, + 2, 0, - 1, + 2, true, "3", - "3" + "3." ], [ "sentence", - "", + "proper", 6016885898370676469, "TEXT", "#/texts/171", @@ -63146,24 +67892,24 @@ ], [ "reference", - "author", + "authors", 6016885898370676469, "TEXT", "#/texts/171", 1.0, - 14650311461945683358, - 1978144735469983705, + 9243870653745040564, + 2494378156442366016, null, null, 3, - 11, + 43, 3, - 11, + 43, 2, - 4, + 16, true, - "Matteo M", - "Matteo M" + "Matteo M, Christoph A, Val'ery W, et al", + "Matteo M, Christoph A, Val'ery W, et al." ], [ "term", @@ -63186,27 +67932,6 @@ "Matteo M", "Matteo M" ], - [ - "reference", - "author", - 6016885898370676469, - "TEXT", - "#/texts/171", - 1.0, - 4457167794784606628, - 3737697229009384388, - null, - null, - 13, - 24, - 13, - 24, - 5, - 7, - true, - "Christoph A", - "Christoph A" - ], [ "term", "single-term", @@ -63228,27 +67953,6 @@ "Christoph A", "Christoph A" ], - [ - "reference", - "author", - 6016885898370676469, - "TEXT", - "#/texts/171", - 1.0, - 6183363009296336817, - 2886377010043332845, - null, - null, - 26, - 35, - 26, - 35, - 8, - 12, - true, - "Val'ery W", - "Val'ery W" - ], [ "expression", "wtoken-concatenation", @@ -63341,18 +68045,18 @@ "#/texts/171", 1.0, 14518759528420507379, - 35296972575901155, + 35296972575901154, null, null, 44, - 139, + 140, 44, - 139, + 140, 16, - 27, + 28, true, "An information extraction and knowledge graph platform for accelerating biochemical discoveries", - "An information extraction and knowledge graph platform for accelerating biochemical discoveries" + "An information extraction and knowledge graph platform for accelerating biochemical discoveries." ], [ "term", @@ -63440,7 +68144,7 @@ ], [ "sentence", - "", + "proper", 6016885898370676469, "TEXT", "#/texts/171", @@ -63543,6 +68247,27 @@ "abs", "abs" ], + [ + "reference", + "volume", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 948495657295850540, + 12052824091433651138, + null, + null, + 151, + 161, + 151, + 161, + 32, + 35, + true, + "1907.08400", + "1907.08400" + ], [ "reference", "date", @@ -63551,43 +68276,64 @@ "#/texts/171", 1.0, 16381206542172555288, - 10693536807570486686, + 10693536807570486685, null, null, 161, - 167, + 168, 161, - 167, + 168, 35, - 37, + 38, true, "; 2019", - "; 2019" + "; 2019." + ], + [ + "sentence", + "improper", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 15441160910541486262, + 13393766537274350374, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "4.", + "4." ], [ "reference", - "citation-number", + "reference-number", 13946275785662847920, "TEXT", "#/texts/172", 1.0, 17767354399704235156, - 2787669627718018145, + 2787669627718018158, null, null, 0, - 1, + 2, 0, - 1, + 2, 0, - 1, + 2, true, "4", - "4" + "4." ], [ "sentence", - "", + "proper", 13946275785662847920, "TEXT", "#/texts/172", @@ -63608,24 +68354,24 @@ ], [ "reference", - "author", + "authors", 13946275785662847920, "TEXT", "#/texts/172", 1.0, - 8106352039693059414, - 189526913306248274, + 8145380721974590875, + 8036423230253362696, null, null, 3, - 10, + 51, 3, - 10, + 51, 2, - 4, + 17, true, - "Paolo R", - "Paolo R" + "Paolo R, Marco P, Floriana B, Peter S, Costas B", + "Paolo R, Marco P, Floriana B, Peter S, Costas B." ], [ "term", @@ -63648,27 +68394,6 @@ "Paolo R", "Paolo R" ], - [ - "reference", - "author", - 13946275785662847920, - "TEXT", - "#/texts/172", - 1.0, - 8106471247241844081, - 12829126084417792103, - null, - null, - 12, - 19, - 12, - 19, - 5, - 7, - true, - "Marco P", - "Marco P" - ], [ "term", "single-term", @@ -63690,27 +68415,6 @@ "Marco P", "Marco P" ], - [ - "reference", - "author", - 13946275785662847920, - "TEXT", - "#/texts/172", - 1.0, - 15356089124994678984, - 18000216761919637454, - null, - null, - 21, - 31, - 21, - 31, - 8, - 10, - true, - "Floriana B", - "Floriana B" - ], [ "term", "single-term", @@ -63732,27 +68436,6 @@ "Floriana B", "Floriana B" ], - [ - "reference", - "author", - 13946275785662847920, - "TEXT", - "#/texts/172", - 1.0, - 8106352035144611657, - 2775049790770760163, - null, - null, - 33, - 40, - 33, - 40, - 11, - 13, - true, - "Peter S", - "Peter S" - ], [ "term", "single-term", @@ -63795,27 +68478,6 @@ "Costas B Application", "Costas B. Application" ], - [ - "reference", - "author", - 13946275785662847920, - "TEXT", - "#/texts/172", - 1.0, - 6560601913145533820, - 12130024709208567744, - null, - null, - 42, - 51, - 42, - 51, - 14, - 17, - true, - "Costas B.", - "Costas B." - ], [ "reference", "title", @@ -63823,19 +68485,19 @@ "TEXT", "#/texts/172", 1.0, - 14371818679908732529, - 10294554605073457499, + 89727683796184421, + 11248393883266780918, null, null, 52, - 174, + 223, 52, - 174, + 223, 17, - 36, + 44, true, - "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019", - "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019" + "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019). Abu Dhabi International Petroleum Exhibition &", + "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019). Abu Dhabi International Petroleum Exhibition &" ], [ "term", @@ -63986,7 +68648,7 @@ ], [ "sentence", - "", + "proper", 13946275785662847920, "TEXT", "#/texts/172", @@ -64005,27 +68667,6 @@ "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10.", "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10." ], - [ - "reference", - "container-title", - 13946275785662847920, - "TEXT", - "#/texts/172", - 1.0, - 4292761212337338605, - 773134743697376497, - null, - null, - 177, - 245, - 177, - 245, - 38, - 48, - true, - "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi", - "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi" - ], [ "term", "enum-term-mark-4", @@ -64068,6 +68709,27 @@ "Abu Dhabi International Petroleum Exhibition", "Abu Dhabi International Petroleum Exhibition" ], + [ + "reference", + "conference", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 3847339587475413410, + 2047970562154800974, + null, + null, + 224, + 251, + 224, + 251, + 44, + 51, + true, + "Conference, Abu Dhabi, UAE", + "Conference, Abu Dhabi, UAE," + ], [ "term", "single-term", @@ -64110,27 +68772,6 @@ "Abu Dhabi", "Abu Dhabi" ], - [ - "reference", - "location", - 13946275785662847920, - "TEXT", - "#/texts/172", - 1.0, - 16381206478137548706, - 9744551904329916157, - null, - null, - 247, - 253, - 247, - 253, - 49, - 52, - true, - "UAE, :", - "UAE, :" - ], [ "term", "single-term", @@ -64152,6 +68793,27 @@ "UAE", "UAE" ], + [ + "reference", + "pages", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 12178341415896216312, + 16626963629120408485, + null, + null, + 252, + 256, + 252, + 256, + 51, + 54, + true, + ":10", + ":10." + ], [ "expression", "wtoken-concatenation", @@ -64174,50 +68836,50 @@ ":10" ], [ - "sentence", - "", + "reference", + "doi", 13946275785662847920, "TEXT", "#/texts/172", 1.0, - 5857244370669890274, - 17990747492643866277, + 11673547348366864691, + 4147257630836829657, null, null, 257, - 269, + 292, 257, - 269, + 292, 54, - 60, + 70, true, - "https://doi.", - "https://doi." + "https://doi.org/10.2118/197610-MS", + "https://doi. org/10.2118/197610-MS." ], [ - "expression", - "wtoken-concatenation", + "sentence", + "proper", 13946275785662847920, "TEXT", "#/texts/172", 1.0, - 7742135058095281026, - 17571544217117981683, + 5857244370669890274, + 17990747492643866277, null, null, 257, - 268, + 269, 257, - 268, + 269, 54, - 59, + 60, true, - "https://doi", - "https://doi" + "https://doi.", + "https://doi." ], [ - "reference", - "url", + "expression", + "wtoken-concatenation", 13946275785662847920, "TEXT", "#/texts/172", @@ -64279,25 +68941,25 @@ "doi" ], [ - "reference", - "url", + "sentence", + "improper", 13946275785662847920, "TEXT", "#/texts/172", 1.0, - 16381206566166927037, - 12728529714635134334, + 15557566671061207768, + 230972920426777869, null, null, 270, - 276, + 289, 270, - 276, + 289, 60, - 63, + 68, true, - "org/10", - "org/10" + "org/10.2118/197610-", + "org/10.2118/197610-" ], [ "term", @@ -64322,7 +68984,7 @@ ], [ "sentence", - "", + "proper", 13946275785662847920, "TEXT", "#/texts/172", @@ -64362,30 +69024,51 @@ "MS", "MS" ], + [ + "sentence", + "improper", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 15441160910541486327, + 5428431164759035833, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "5.", + "5." + ], [ "reference", - "citation-number", + "reference-number", 7693798302433367973, "TEXT", "#/texts/173", 1.0, 17767354399704235157, - 9080683344301571175, + 9080683344301571168, null, null, 0, - 1, + 2, 0, - 1, + 2, 0, - 1, + 2, true, "5", - "5" + "5." ], [ "sentence", - "", + "proper", 7693798302433367973, "TEXT", "#/texts/173", @@ -64406,24 +69089,24 @@ ], [ "reference", - "author", + "authors", 7693798302433367973, "TEXT", "#/texts/173", 1.0, - 3027248490321213074, - 16283814403211008850, + 4212509100547346489, + 17078400926856851527, null, null, 3, - 14, + 55, 3, - 14, + 55, 2, - 4, + 17, true, - "Guillaume L", - "Guillaume L" + "Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D", + "Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D." ], [ "term", @@ -64446,27 +69129,6 @@ "Guillaume L", "Guillaume L" ], - [ - "reference", - "author", - 7693798302433367973, - "TEXT", - "#/texts/173", - 1.0, - 14650310996645589292, - 14357325801323977565, - null, - null, - 16, - 24, - 16, - 24, - 5, - 7, - true, - "Miguel B", - "Miguel B" - ], [ "term", "single-term", @@ -64488,27 +69150,6 @@ "Miguel B", "Miguel B" ], - [ - "reference", - "author", - 7693798302433367973, - "TEXT", - "#/texts/173", - 1.0, - 6049415556904669075, - 4491667145265607561, - null, - null, - 26, - 35, - 26, - 35, - 8, - 10, - true, - "Sandeep S", - "Sandeep S" - ], [ "term", "single-term", @@ -64530,27 +69171,6 @@ "Sandeep S", "Sandeep S" ], - [ - "reference", - "author", - 7693798302433367973, - "TEXT", - "#/texts/173", - 1.0, - 14650438760956024332, - 12941354247565292233, - null, - null, - 37, - 45, - 37, - 45, - 11, - 13, - true, - "Kazuya K", - "Kazuya K" - ], [ "term", "single-term", @@ -64595,24 +69215,24 @@ ], [ "reference", - "author", + "title", 7693798302433367973, "TEXT", "#/texts/173", 1.0, - 14650449385951782031, - 12018837533588020118, + 16200640505386782750, + 16685934982266491450, null, null, - 47, - 55, - 47, - 55, - 14, + 56, + 165, + 56, + 165, 17, + 32, true, - "Chris D.", - "Chris D." + "Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics;", + "Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics;" ], [ "term", @@ -64721,28 +69341,70 @@ ], [ "reference", - "citation-number", + "date", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 389609625548777056, + 1791736220903901961, + null, + null, + 166, + 171, + 166, + 171, + 32, + 34, + true, + "2016", + "2016." + ], + [ + "sentence", + "improper", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 15441160910541481013, + 5942897417109902577, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "6.", + "6." + ], + [ + "reference", + "reference-number", 3109792572574236398, "TEXT", "#/texts/174", 1.0, 17767354399704235158, - 2935027410945303089, + 2935027410945303088, null, null, 0, - 1, + 2, 0, - 1, + 2, 0, - 1, + 2, true, "6", - "6" + "6." ], [ "sentence", - "", + "proper", 3109792572574236398, "TEXT", "#/texts/174", @@ -64763,24 +69425,24 @@ ], [ "reference", - "author", + "authors", 3109792572574236398, "TEXT", "#/texts/174", 1.0, - 12139207556299923335, - 12395232115938598978, + 3481924782295664840, + 935617422453535067, null, null, 3, - 16, + 25, 3, - 16, + 25, 2, - 5, + 9, true, - "Chiu Jason PC", - "Chiu Jason PC" + "Chiu Jason PC, Eric N", + "Chiu Jason PC, Eric N." ], [ "term", @@ -64824,27 +69486,6 @@ "Eric N Named", "Eric N. Named" ], - [ - "reference", - "author", - 3109792572574236398, - "TEXT", - "#/texts/174", - 1.0, - 8106350848262626922, - 5052428205716655678, - null, - null, - 18, - 25, - 18, - 25, - 6, - 9, - true, - "Eric N.", - "Eric N." - ], [ "reference", "title", @@ -64853,18 +69494,18 @@ "#/texts/174", 1.0, 16636370883913883252, - 5810162511985509685, + 5810162511985509682, null, null, 26, - 79, + 80, 26, - 79, + 80, 9, - 17, + 18, true, "Named entity recognition with bidirectional LSTM-CNNs", - "Named entity recognition with bidirectional LSTM-CNNs" + "Named entity recognition with bidirectional LSTM-CNNs." ], [ "term", @@ -64952,7 +69593,7 @@ ], [ "sentence", - "", + "proper", 3109792572574236398, "TEXT", "#/texts/174", @@ -64979,18 +69620,18 @@ "#/texts/174", 1.0, 389609625541773713, - 1712767977156820574, + 1712767977156820575, null, null, 81, - 85, + 86, 81, - 85, + 86, 18, - 19, + 20, true, "TACL", - "TACL" + "TACL." ], [ "term", @@ -65013,6 +69654,27 @@ "TACL", "TACL" ], + [ + "sentence", + "improper", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 900810462997696699, + 9296645351405634953, + null, + null, + 87, + 102, + 87, + 102, + 20, + 28, + true, + "2016;4:357-370.", + "2016;4:357-370." + ], [ "reference", "date", @@ -65020,19 +69682,19 @@ "TEXT", "#/texts/174", 1.0, - 389609625548777056, - 1668465275038003542, + 329104147695661831, + 15059294784117209596, null, null, 87, - 91, + 92, 87, - 91, + 92, 20, - 21, + 22, true, - "2016", - "2016" + "2016;", + "2016;" ], [ "reference", @@ -65041,44 +69703,65 @@ "TEXT", "#/texts/174", 1.0, - 9584872678510603869, - 10893893406063870923, + 6498928726029246334, + 10334044460289682205, null, null, - 91, - 101, - 91, - 101, - 21, - 27, + 92, + 102, + 92, + 102, + 22, + 28, + true, + "4:357-370", + "4:357-370." + ], + [ + "sentence", + "improper", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 15441160910541481076, + 14099067875649218598, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, true, - ";4:357-370", - ";4:357-370" + "7.", + "7." ], [ "reference", - "citation-number", + "reference-number", 8111170387462350170, "TEXT", "#/texts/175", 1.0, 17767354399704235159, - 17892509173094146701, + 17892509173094146700, null, null, 0, - 1, + 2, 0, - 1, + 2, 0, - 1, + 2, true, "7", - "7" + "7." ], [ "sentence", - "", + "proper", 8111170387462350170, "TEXT", "#/texts/175", @@ -65099,24 +69782,24 @@ ], [ "reference", - "author", + "authors", 8111170387462350170, "TEXT", "#/texts/175", 1.0, - 6611312511369759405, - 3019524304480366334, + 7850636613883620371, + 5463699822054275279, null, null, 3, - 12, + 21, 3, - 12, + 21, 2, - 4, + 8, true, - "Matthew H", - "Matthew H" + "Matthew H, Ines M", + "Matthew H, Ines M." ], [ "term", @@ -65139,27 +69822,6 @@ "Matthew H", "Matthew H" ], - [ - "reference", - "author", - 8111170387462350170, - "TEXT", - "#/texts/175", - 1.0, - 8106350362383531053, - 10877267985434630613, - null, - null, - 14, - 21, - 14, - 21, - 5, - 8, - true, - "Ines M.", - "Ines M." - ], [ "term", "single-term", @@ -65188,19 +69850,19 @@ "TEXT", "#/texts/175", 1.0, - 8673657110667713983, - 2132423457048291450, + 8498785529703184960, + 117824391260336618, null, null, 22, - 138, + 150, 22, - 138, + 150, 8, - 24, + 28, true, - "spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing", - "spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing" + "spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing. To appear", + "spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing. To appear." ], [ "term", @@ -65225,7 +69887,28 @@ ], [ "sentence", - "", + "improper", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 12178341415895638165, + 5486425860276194881, + null, + null, + 22, + 25, + 22, + 25, + 8, + 8, + false, + "spa", + "spa" + ], + [ + "sentence", + "proper", 8111170387462350170, "TEXT", "#/texts/175", @@ -65330,7 +70013,7 @@ ], [ "sentence", - "", + "proper", 8111170387462350170, "TEXT", "#/texts/175", @@ -65349,6 +70032,27 @@ "To appear.", "To appear." ], + [ + "sentence", + "improper", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 329104147695661623, + 6425989175071208113, + null, + null, + 151, + 156, + 151, + 156, + 28, + 30, + true, + "2017.", + "2017." + ], [ "reference", "date", @@ -65357,43 +70061,64 @@ "#/texts/175", 1.0, 389609625548777057, - 14192492111179186414, + 14192492111179186413, null, null, 151, - 155, + 156, 151, - 155, + 156, 28, - 29, + 30, true, "2017", - "2017" + "2017." + ], + [ + "sentence", + "improper", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 15441160910541481399, + 8301553353386600029, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "8.", + "8." ], [ "reference", - "citation-number", + "reference-number", 14682702346227170925, "TEXT", "#/texts/176", 1.0, 17767354399704235152, - 15651484829649486928, + 15651484829649486931, null, null, 0, - 1, + 2, 0, - 1, + 2, 0, - 1, + 2, true, "8", - "8" + "8." ], [ "sentence", - "", + "proper", 14682702346227170925, "TEXT", "#/texts/176", @@ -65414,24 +70139,24 @@ ], [ "reference", - "author", + "authors", 14682702346227170925, "TEXT", "#/texts/176", 1.0, - 6627095272342846459, - 8960025720845820047, + 5848297652864005955, + 13778801729762411640, null, null, 3, - 12, + 54, 3, - 12, + 54, 2, - 4, + 18, true, - "Magoon LB", - "Magoon LB" + "Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!", + "Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!)," ], [ "term", @@ -65454,27 +70179,6 @@ "Magoon LB", "Magoon LB" ], - [ - "reference", - "author", - 14682702346227170925, - "TEXT", - "#/texts/176", - 1.0, - 6563582333827106756, - 4026322596752919867, - null, - null, - 14, - 23, - 14, - 23, - 5, - 7, - true, - "Hudson TL", - "Hudson TL" - ], [ "term", "single-term", @@ -65496,27 +70200,6 @@ "Hudson TL", "Hudson TL" ], - [ - "reference", - "author", - 14682702346227170925, - "TEXT", - "#/texts/176", - 1.0, - 1612814864176813785, - 12195293078214673428, - null, - null, - 25, - 35, - 25, - 35, - 8, - 11, - true, - "Peters KE.", - "Peters KE." - ], [ "term", "single-term", @@ -65580,27 +70263,6 @@ "Egret-Hibernia(!)", "Egret-Hibernia(!)" ], - [ - "reference", - "title", - 14682702346227170925, - "TEXT", - "#/texts/176", - 1.0, - 10827383077041810226, - 7289787549141850214, - null, - null, - 36, - 52, - 36, - 52, - 11, - 16, - true, - "Egret-Hibernia(!", - "Egret-Hibernia(!" - ], [ "term", "single-term", @@ -65651,18 +70313,18 @@ "#/texts/176", 1.0, 8991166294068381652, - 13146587142049422219, + 13146587142049422196, null, null, 55, - 137, + 138, 55, - 137, + 138, 18, - 31, + 32, true, "a significant petroleum system, northern Grand Banks area, offshore eastern Canada", - "a significant petroleum system, northern Grand Banks area, offshore eastern Canada" + "a significant petroleum system, northern Grand Banks area, offshore eastern Canada." ], [ "term", @@ -65729,7 +70391,7 @@ ], [ "sentence", - "", + "proper", 14682702346227170925, "TEXT", "#/texts/176", @@ -65756,18 +70418,18 @@ "#/texts/176", 1.0, 14445748745948696227, - 6494504935180328364, + 6494504935180328365, null, null, 139, - 161, + 162, 139, - 161, + 162, 32, - 37, + 38, true, "Am Assoc Pet Geol Bull", - "Am Assoc Pet Geol Bull" + "Am Assoc Pet Geol Bull." ], [ "term", @@ -65790,6 +70452,27 @@ "Am Assoc Pet Geol Bull", "Am Assoc Pet Geol Bull" ], + [ + "sentence", + "improper", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 2669509315780110933, + 17452564295061618645, + null, + null, + 163, + 184, + 163, + 184, + 38, + 49, + true, + "2005;89(9):1203-1237.", + "2005;89(9):1203-1237." + ], [ "reference", "date", @@ -65797,19 +70480,19 @@ "TEXT", "#/texts/176", 1.0, - 329104147695665975, - 7749771140976442, + 12010959389695517137, + 15013988311129840871, null, null, 163, - 168, + 184, 163, - 168, + 184, 38, - 40, + 49, true, - "2005;", - "2005;" + "2005;89(9):1203-1237", + "2005;89(9):1203-1237." ], [ "parenthesis", @@ -65853,6 +70536,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -65897,7 +70601,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/177", @@ -66147,6 +70851,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", @@ -66336,30 +71061,51 @@ "applicable Creative Commons License", "applicable Creative Commons License" ], + [ + "sentence", + "improper", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 15441160910541481462, + 3095595477306612046, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "9.", + "9." + ], [ "reference", - "citation-number", + "reference-number", 11430385775112165283, "TEXT", "#/texts/178", 1.0, 17767354399704235153, - 10433678415276841389, + 10433678415276841390, null, null, 0, - 1, + 2, 0, - 1, + 2, 0, - 1, + 2, true, "9", - "9" + "9." ], [ "sentence", - "", + "proper", 11430385775112165283, "TEXT", "#/texts/178", @@ -66401,13 +71147,13 @@ ], [ "reference", - "author", + "authors", 11430385775112165283, "TEXT", "#/texts/178", 1.0, - 7087532328962869115, - 5488976721015347116, + 6557810835592781181, + 17946965941344362242, null, null, 3, @@ -66417,7 +71163,7 @@ 2, 5, true, - "Estrada E.", + "Estrada E", "Estrada E." ], [ @@ -66428,18 +71174,18 @@ "#/texts/178", 1.0, 10002059539925749429, - 4038144589619849267, + 4038144589619849266, null, null, 14, - 53, + 54, 14, - 53, + 54, 5, - 10, + 11, true, "Subgraph centrality in complex networks", - "Subgraph centrality in complex networks" + "Subgraph centrality in complex networks." ], [ "term", @@ -66485,7 +71231,7 @@ ], [ "sentence", - "", + "proper", 11430385775112165283, "TEXT", "#/texts/178", @@ -66512,18 +71258,18 @@ "#/texts/178", 1.0, 1821145667706451373, - 6349148037602643636, + 6349148037602643639, null, null, 55, - 65, + 66, 55, - 65, + 66, 11, - 14, + 15, true, "Phys Rev E", - "Phys Rev E" + "Phys Rev E." ], [ "term", @@ -66546,6 +71292,27 @@ "Phys Rev E", "Phys Rev E" ], + [ + "sentence", + "improper", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 13573598089500757206, + 6692458992266800770, + null, + null, + 67, + 85, + 67, + 85, + 15, + 24, + true, + "2005;71(5):056103.", + "2005;71(5):056103." + ], [ "reference", "date", @@ -66553,19 +71320,40 @@ "TEXT", "#/texts/178", 1.0, - 8104407400303630267, - 3516783299715161152, + 329104147695665975, + 9845650019423915667, null, null, 67, - 74, + 72, 67, - 74, + 72, 15, - 18, + 17, true, - "2005;71", - "2005;71" + "2005;", + "2005;" + ], + [ + "reference", + "volume", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 16380810009856206301, + 11521716509534155114, + null, + null, + 72, + 78, + 72, + 78, + 17, + 22, + true, + "71(5):", + "71(5):" ], [ "parenthesis", @@ -66590,28 +71378,70 @@ ], [ "reference", - "citation-number", + "pages", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 16380805714058077749, + 2458865183124865563, + null, + null, + 78, + 85, + 78, + 85, + 22, + 24, + true, + "056103", + "056103." + ], + [ + "sentence", + "improper", 5825495964576843004, "TEXT", "#/texts/179", 1.0, - 15441160910541481982, - 2952327273286615865, + 12178341415896426716, + 2496381961233018859, null, null, 0, - 2, + 3, + 0, + 3, 0, 2, + true, + "10.", + "10." + ], + [ + "reference", + "reference-number", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 15441160910541481982, + 2952327273286615866, + null, + null, 0, - 1, + 3, + 0, + 3, + 0, + 2, true, "10", - "10" + "10." ], [ "sentence", - "", + "proper", 5825495964576843004, "TEXT", "#/texts/179", @@ -66632,24 +71462,24 @@ ], [ "reference", - "author", + "authors", 5825495964576843004, "TEXT", "#/texts/179", 1.0, - 2628812302410383486, - 8225541491002394036, + 15943206817210566989, + 16687717442496902915, null, null, 4, - 19, + 38, 4, - 19, + 38, 2, - 4, + 9, true, - "Estrada Ernesto", - "Estrada Ernesto" + "Estrada Ernesto, Higham Desmond J", + "Estrada Ernesto, Higham Desmond J." ], [ "term", @@ -66672,27 +71502,6 @@ "Estrada Ernesto", "Estrada Ernesto" ], - [ - "reference", - "author", - 5825495964576843004, - "TEXT", - "#/texts/179", - 1.0, - 17728567422753594500, - 4401840231895103727, - null, - null, - 21, - 38, - 21, - 38, - 5, - 9, - true, - "Higham Desmond J.", - "Higham Desmond J." - ], [ "term", "single-term", @@ -66714,6 +71523,27 @@ "Higham Desmond J", "Higham Desmond J" ], + [ + "reference", + "date", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 389609625548777062, + 8937154938925174773, + null, + null, + 39, + 46, + 39, + 46, + 9, + 13, + true, + "2010", + "(2010)." + ], [ "parenthesis", "reference", @@ -66736,56 +71566,56 @@ "(2010)" ], [ - "reference", - "date", + "sentence", + "improper", 5825495964576843004, "TEXT", "#/texts/179", 1.0, - 389609625548777062, - 8937154938925173833, + 17767354399704235166, + 8049906976560456930, null, null, - 40, - 44, - 40, - 44, - 10, - 11, + 45, + 46, + 45, + 46, + 12, + 13, true, - "2010", - "2010" + ".", + "." ], [ - "reference", - "journal", + "sentence", + "proper", 5825495964576843004, "TEXT", "#/texts/179", 1.0, - 745633759305567859, - 2105664067016610109, + 13313338743045791386, + 13496281760238992122, null, null, 47, - 112, + 100, 47, - 112, + 100, 13, - 22, + 20, true, - "Network Properties Revealed through Matrix Functions. SIAM Review", - "Network Properties Revealed through Matrix Functions. SIAM Review" + "Network Properties Revealed through Matrix Functions.", + "Network Properties Revealed through Matrix Functions." ], [ - "sentence", - "", + "reference", + "title", 5825495964576843004, "TEXT", "#/texts/179", 1.0, - 13313338743045791386, - 13496281760238992122, + 10343442203235089501, + 11301938714393369373, null, null, 47, @@ -66795,7 +71625,7 @@ 13, 20, true, - "Network Properties Revealed through Matrix Functions.", + "Network Properties Revealed through Matrix Functions", "Network Properties Revealed through Matrix Functions." ], [ @@ -66842,7 +71672,7 @@ ], [ "sentence", - "", + "proper", 5825495964576843004, "TEXT", "#/texts/179", @@ -66861,6 +71691,27 @@ "SIAM Review, 52, (4), 696-714.", "SIAM Review, 52, (4), 696-714." ], + [ + "reference", + "journal", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 2746419737099405232, + 18061106767070096393, + null, + null, + 101, + 113, + 101, + 113, + 20, + 23, + true, + "SIAM Review", + "SIAM Review," + ], [ "term", "single-term", @@ -66882,6 +71733,27 @@ "SIAM Review", "SIAM Review" ], + [ + "reference", + "volume", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 16380809986266457236, + 161102776712015127, + null, + null, + 114, + 122, + 114, + 122, + 23, + 29, + true, + "52, (4", + "52, (4)," + ], [ "parenthesis", "reference", @@ -66903,6 +71775,27 @@ "(4)", "(4)" ], + [ + "reference", + "pages", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 8104408773920978895, + 9147525378271823462, + null, + null, + 123, + 131, + 123, + 131, + 29, + 33, + true, + "696-714", + "696-714." + ], [ "expression", "wtoken-concatenation", @@ -66924,26 +71817,47 @@ "696-714", "696-714" ], + [ + "sentence", + "improper", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 10188858309180365192, + 12377323489588219996, + null, + null, + 132, + 168, + 132, + 168, + 33, + 49, + true, + "http://dx.doi.org/10.1137/090761070.", + "http://dx.doi.org/10.1137/090761070." + ], [ "reference", - "url", + "doi", 5825495964576843004, "TEXT", "#/texts/179", 1.0, 16159594323378820687, - 15692242274322104012, + 15692242274322104013, null, null, 132, - 167, + 168, 132, - 167, + 168, 33, - 48, + 49, true, "http://dx.doi.org/10.1137/090761070", - "http://dx.doi.org/10.1137/090761070" + "http://dx.doi.org/10.1137/090761070." ], [ "term", @@ -67009,50 +71923,50 @@ "org" ], [ - "reference", - "citation-number", + "sentence", + "improper", 5698421097735371040, "TEXT", "#/texts/180", 1.0, - 15441160910541481983, - 11293846485728944316, + 12178341415896426655, + 7596226664406524957, null, null, 0, - 2, + 3, 0, - 2, + 3, 0, - 1, + 2, true, - "11", - "11" + "11.", + "11." ], [ "reference", - "author", + "reference-number", 5698421097735371040, "TEXT", "#/texts/180", 1.0, - 12825927039497398082, - 7276111248299729235, + 15441160910541481983, + 11293846485728944319, null, null, - 4, - 39, - 4, - 39, + 0, + 3, + 0, + 3, + 0, 2, - 7, true, - "Labs Redis. Benchmarking RedisGraph", - "Labs Redis. Benchmarking RedisGraph" + "11", + "11." ], [ "sentence", - "", + "proper", 5698421097735371040, "TEXT", "#/texts/180", @@ -67071,6 +71985,27 @@ "Labs Redis.", "Labs Redis." ], + [ + "reference", + "authors", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 1413805758909278007, + 12182268615745487815, + null, + null, + 4, + 15, + 4, + 15, + 2, + 5, + true, + "Labs Redis", + "Labs Redis." + ], [ "term", "single-term", @@ -67094,7 +72029,7 @@ ], [ "sentence", - "", + "proper", 5698421097735371040, "TEXT", "#/texts/180", @@ -67113,6 +72048,27 @@ "Benchmarking RedisGraph 1.0.", "Benchmarking RedisGraph 1.0." ], + [ + "reference", + "title", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 17216005724109731720, + 11993954726519740908, + null, + null, + 16, + 44, + 16, + 44, + 5, + 11, + true, + "Benchmarking RedisGraph 1.0", + "Benchmarking RedisGraph 1.0." + ], [ "term", "single-term", @@ -67156,92 +72112,92 @@ "1.0" ], [ - "reference", - "date", + "sentence", + "improper", 5698421097735371040, "TEXT", "#/texts/180", 1.0, - 17767354399704235161, - 12147516458969154680, + 329104147695662014, + 11537339699383207639, null, null, - 40, - 41, - 40, - 41, - 7, - 8, + 45, + 50, + 45, + 50, + 11, + 13, true, - "1", - "1" + "2019.", + "2019." ], [ "reference", - "title", + "date", 5698421097735371040, "TEXT", "#/texts/180", 1.0, - 17767354399704235160, - 12147516442504229262, + 389609625548777055, + 1517668227262464255, null, null, - 42, - 43, - 42, - 43, - 9, - 10, + 45, + 50, + 45, + 50, + 11, + 13, true, - "0", - "0" + "2019", + "2019." ], [ - "reference", - "date", - 5698421097735371040, + "sentence", + "improper", + 5870535063942256428, "TEXT", - "#/texts/180", + "#/texts/181", 1.0, - 389609625548777055, - 1517668227262464254, + 12178341415896426590, + 4180477249261114913, null, null, - 45, - 49, - 45, - 49, - 11, - 12, + 0, + 3, + 0, + 3, + 0, + 2, true, - "2019", - "2019" + "12.", + "12." ], [ "reference", - "citation-number", + "reference-number", 5870535063942256428, "TEXT", "#/texts/181", 1.0, 15441160910541481976, - 12703724519968684238, + 12703724519968684239, null, null, 0, - 2, + 3, 0, - 2, + 3, 0, - 1, + 2, true, "12", - "12" + "12." ], [ "sentence", - "", + "proper", 5870535063942256428, "TEXT", "#/texts/181", @@ -67262,13 +72218,13 @@ ], [ "reference", - "author", + "authors", 5870535063942256428, "TEXT", "#/texts/181", 1.0, - 15754713894443025139, - 17869835566751337591, + 15861880261780248619, + 9206162103335947230, null, null, 4, @@ -67278,7 +72234,7 @@ 2, 4, true, - "TigerGraph.", + "TigerGraph", "TigerGraph." ], [ @@ -67304,7 +72260,7 @@ ], [ "sentence", - "", + "proper", 5870535063942256428, "TEXT", "#/texts/181", @@ -67331,18 +72287,18 @@ "#/texts/181", 1.0, 17475892521501552303, - 8529795867214537154, + 8529795867214537155, null, null, 16, - 45, + 46, 16, - 45, + 46, 4, - 10, + 11, true, "Real-Time Deep Link Analytics", - "Real-Time Deep Link Analytics" + "Real-Time Deep Link Analytics." ], [ "name", @@ -67386,6 +72342,27 @@ "Time Deep Link Analytics", "Time Deep Link Analytics" ], + [ + "sentence", + "improper", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 329104147695661814, + 7543078487534121494, + null, + null, + 47, + 52, + 47, + 52, + 11, + 13, + true, + "2018.", + "2018." + ], [ "reference", "date", @@ -67394,43 +72371,64 @@ "#/texts/181", 1.0, 389609625548777054, - 3194806985827377522, + 3194806985827377521, null, null, 47, - 51, + 52, 47, - 51, + 52, 11, - 12, + 13, true, "2018", - "2018" + "2018." + ], + [ + "sentence", + "improper", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 12178341415896424072, + 14083466083102208723, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "13.", + "13." ], [ "reference", - "citation-number", + "reference-number", 18196767266655606709, "TEXT", "#/texts/182", 1.0, 15441160910541481977, - 12462842527617278799, + 12462842527617278832, null, null, 0, - 2, + 3, 0, - 2, + 3, 0, - 1, + 2, true, "13", - "13" + "13." ], [ "sentence", - "", + "proper", 18196767266655606709, "TEXT", "#/texts/182", @@ -67451,24 +72449,24 @@ ], [ "reference", - "author", + "authors", 18196767266655606709, "TEXT", "#/texts/182", 1.0, - 14652280730090715542, - 9368048166047908224, + 4413158441497355977, + 888725642167870501, null, null, 4, - 12, + 21, 4, - 12, + 21, 2, - 4, + 8, true, - "Jeremy K", - "Jeremy K" + "Jeremy K, John G", + "Jeremy K, John G." ], [ "term", @@ -67512,27 +72510,6 @@ "John G Graph", "John G. Graph" ], - [ - "reference", - "author", - 18196767266655606709, - "TEXT", - "#/texts/182", - 1.0, - 8106396242733918714, - 2646308426186848374, - null, - null, - 14, - 21, - 14, - 21, - 5, - 8, - true, - "John G.", - "John G." - ], [ "reference", "title", @@ -67541,18 +72518,18 @@ "#/texts/182", 1.0, 11539515714196318944, - 4409464707523225606, + 4409464707523225605, null, null, 22, - 72, + 73, 22, - 72, + 73, 8, - 16, + 17, true, "Graph Algorithms in the Language of Linear Algebra", - "Graph Algorithms in the Language of Linear Algebra" + "Graph Algorithms in the Language of Linear Algebra." ], [ "term", @@ -67619,7 +72596,7 @@ ], [ "sentence", - "", + "proper", 18196767266655606709, "TEXT", "#/texts/182", @@ -67638,27 +72615,6 @@ "Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011.", "Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011." ], - [ - "reference", - "journal", - 18196767266655606709, - "TEXT", - "#/texts/182", - 1.0, - 1813266722082342225, - 593931840598100395, - null, - null, - 74, - 86, - 74, - 86, - 17, - 18, - true, - "Philadelphia", - "Philadelphia" - ], [ "term", "single-term", @@ -67792,44 +72748,65 @@ "TEXT", "#/texts/182", 1.0, - 16381206542172555296, - 17521384641614480308, + 389609625548777063, + 12403401240882116541, null, null, - 138, - 144, - 138, - 144, - 27, - 29, + 140, + 145, + 140, + 145, + 28, + 30, + true, + "2011", + "2011." + ], + [ + "sentence", + "improper", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 12178341415896424137, + 2021336641528383539, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, true, - "; 2011", - "; 2011" + "14.", + "14." ], [ "reference", - "citation-number", + "reference-number", 3623403683642367845, "TEXT", "#/texts/183", 1.0, 15441160910541481978, - 9067685736347109846, + 9067685736347109847, null, null, 0, - 2, + 3, 0, - 2, + 3, 0, - 1, + 2, true, "14", - "14" + "14." ], [ "sentence", - "", + "proper", 3623403683642367845, "TEXT", "#/texts/183", @@ -67850,24 +72827,24 @@ ], [ "reference", - "author", + "authors", 3623403683642367845, "TEXT", "#/texts/183", 1.0, - 3893756947393595038, - 15910484170600691612, + 11143603644967201081, + 10268584537510827373, null, null, 4, - 17, + 98, 4, - 17, + 96, 2, - 4, + 21, true, - "Kepner Jeremy", - "Kepner Jeremy" + "Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning", + "Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning" ], [ "term", @@ -67890,27 +72867,6 @@ "Kepner Jeremy", "Kepner Jeremy" ], - [ - "reference", - "author", - 3623403683642367845, - "TEXT", - "#/texts/183", - 1.0, - 4638041857648041651, - 2139644705806385528, - null, - null, - 19, - 30, - 19, - 30, - 5, - 7, - true, - "Bader David", - "Bader David" - ], [ "term", "single-term", @@ -67932,27 +72888,6 @@ "Bader David", "Bader David" ], - [ - "reference", - "author", - 3623403683642367845, - "TEXT", - "#/texts/183", - 1.0, - 9621725435760800320, - 4639858687526125642, - null, - null, - 32, - 47, - 32, - 45, - 8, - 12, - true, - "Bulu\u00e7 Ayd \u0131 n", - "Bulu\u00e7 Ayd \u0131 n" - ], [ "term", "single-term", @@ -67974,27 +72909,6 @@ "Bulu\u00e7 Ayd", "Bulu\u00e7 Ayd" ], - [ - "reference", - "author", - 3623403683642367845, - "TEXT", - "#/texts/183", - 1.0, - 978039607314331382, - 9008054255178396141, - null, - null, - 49, - 61, - 47, - 59, - 13, - 15, - true, - "Gilbert John", - "Gilbert John" - ], [ "term", "single-term", @@ -68016,27 +72930,6 @@ "Gilbert John", "Gilbert John" ], - [ - "reference", - "author", - 3623403683642367845, - "TEXT", - "#/texts/183", - 1.0, - 10968707392751490476, - 11627993516556341660, - null, - null, - 63, - 78, - 61, - 76, - 16, - 18, - true, - "Mattson Timothy", - "Mattson Timothy" - ], [ "term", "single-term", @@ -68059,8 +72952,8 @@ "Mattson Timothy" ], [ - "reference", - "author", + "term", + "single-term", 3623403683642367845, "TEXT", "#/texts/183", @@ -68080,25 +72973,25 @@ "Meyerhenke Henning" ], [ - "term", - "single-term", + "reference", + "date", 3623403683642367845, "TEXT", "#/texts/183", 1.0, - 3010219124533777340, - 3552467627404320563, + 389609625548777059, + 3330964369910710952, null, null, - 80, - 98, - 78, - 96, - 19, + 99, + 106, + 97, + 104, 21, + 25, true, - "Meyerhenke Henning", - "Meyerhenke Henning" + "2015", + "(2015)." ], [ "parenthesis", @@ -68121,30 +73014,9 @@ "(2015)", "(2015)" ], - [ - "reference", - "date", - 3623403683642367845, - "TEXT", - "#/texts/183", - 1.0, - 389609625548777059, - 3330964369910711146, - null, - null, - 100, - 104, - 98, - 102, - 22, - 23, - true, - "2015", - "2015" - ], [ "sentence", - "", + "proper", 3623403683642367845, "TEXT", "#/texts/183", @@ -68165,24 +73037,24 @@ ], [ "reference", - "location", + "title", 3623403683642367845, "TEXT", "#/texts/183", 1.0, - 9440834537675533739, - 6746478687441634720, + 4447441827419394948, + 14102975208778644634, null, null, 107, - 143, + 163, 105, - 141, + 161, 25, - 33, + 37, true, - "Graphs, Matrices, and the GraphBLAS:", - "Graphs, Matrices, and the GraphBLAS:" + "Graphs, Matrices, and the GraphBLAS: Seven Good Reasons", + "Graphs, Matrices, and the GraphBLAS: Seven Good Reasons." ], [ "term", @@ -68270,7 +73142,7 @@ ], [ "sentence", - "", + "proper", 3623403683642367845, "TEXT", "#/texts/183", @@ -68289,6 +73161,27 @@ "Procedia Computer Science, 51, 2453-2462.", "Procedia Computer Science, 51, 2453-2462." ], + [ + "reference", + "journal", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 11311803343161413167, + 2833609951174621744, + null, + null, + 164, + 190, + 162, + 188, + 37, + 41, + true, + "Procedia Computer Science", + "Procedia Computer Science," + ], [ "term", "single-term", @@ -68310,6 +73203,48 @@ "Procedia Computer Science", "Procedia Computer Science" ], + [ + "reference", + "volume", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 15441160910541486330, + 9067694506000682764, + null, + null, + 191, + 194, + 189, + 192, + 41, + 43, + true, + "51", + "51," + ], + [ + "reference", + "pages", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 6573068860818606718, + 4687668980596472571, + null, + null, + 195, + 205, + 193, + 203, + 43, + 47, + true, + "2453-2462", + "2453-2462." + ], [ "expression", "wtoken-concatenation", @@ -68331,26 +73266,47 @@ "2453-2462", "2453-2462" ], + [ + "sentence", + "improper", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 17722292403768798252, + 7166915790163671949, + null, + null, + 206, + 252, + 204, + 250, + 47, + 71, + true, + "http://dx.doi.org/10.1016/j.procs.2015.05.353.", + "http://dx.doi.org/10.1016/j.procs.2015.05.353." + ], [ "reference", - "url", + "doi", 3623403683642367845, "TEXT", "#/texts/183", 1.0, - 16959048237954323084, - 10596594611762835857, + 13624625778145690696, + 15445141723877014785, null, null, 206, - 239, + 252, 204, - 237, + 250, 47, - 64, + 71, true, - "http://dx.doi.org/10.1016/j.procs", - "http://dx.doi.org/10.1016/j.procs" + "http://dx.doi.org/10.1016/j.procs.2015.05.353", + "http://dx.doi.org/10.1016/j.procs.2015.05.353." ], [ "term", @@ -68458,50 +73414,50 @@ "procs" ], [ - "reference", - "date", - 3623403683642367845, + "sentence", + "improper", + 13936866850854297069, "TEXT", - "#/texts/183", + "#/texts/184", 1.0, - 389609625548777059, - 3330964369910703397, + 12178341415896420618, + 3824456860028023899, null, null, - 240, - 244, - 238, - 242, - 65, - 66, + 0, + 3, + 0, + 3, + 0, + 2, true, - "2015", - "2015" + "15.", + "15." ], [ "reference", - "citation-number", + "reference-number", 13936866850854297069, "TEXT", "#/texts/184", 1.0, 15441160910541481979, - 10213682970367471311, + 10213682970367471344, null, null, 0, - 2, + 3, 0, - 2, + 3, 0, - 1, + 2, true, "15", - "15" + "15." ], [ "sentence", - "", + "proper", 13936866850854297069, "TEXT", "#/texts/184", @@ -68522,24 +73478,24 @@ ], [ "reference", - "author", + "authors", 13936866850854297069, "TEXT", "#/texts/184", 1.0, - 8106396252822508385, - 7971302054101082514, + 15404759540282474341, + 7980371121466471931, null, null, 4, - 11, + 28, 4, - 11, + 28, 2, - 4, + 9, true, - "Aydin B", - "Aydin B" + "Aydin B, Gilbert John R", + "Aydin B, Gilbert John R." ], [ "term", @@ -68583,27 +73539,6 @@ "Gilbert John R The", "Gilbert John R. The" ], - [ - "reference", - "author", - 13936866850854297069, - "TEXT", - "#/texts/184", - 1.0, - 3367556578117774584, - 5704823584998723957, - null, - null, - 13, - 28, - 13, - 28, - 5, - 9, - true, - "Gilbert John R.", - "Gilbert John R." - ], [ "reference", "title", @@ -68612,18 +73547,18 @@ "#/texts/184", 1.0, 6150328359964540652, - 10199114762007747151, + 10199114762007747144, null, null, 29, - 93, + 94, 29, - 93, + 94, 9, - 19, + 20, true, "The combinatorial BLAS: design, implementation, and applications", - "The combinatorial BLAS: design, implementation, and applications" + "The combinatorial BLAS: design, implementation, and applications." ], [ "term", @@ -68711,7 +73646,7 @@ ], [ "sentence", - "", + "proper", 13936866850854297069, "TEXT", "#/texts/184", @@ -68738,18 +73673,18 @@ "#/texts/184", 1.0, 15067288891537767501, - 3357793480659482128, + 3357793480659482143, null, null, 95, - 125, + 126, 95, - 125, + 126, 20, - 26, + 27, true, "Int J High Perform Comput Appl", - "Int J High Perform Comput Appl" + "Int J High Perform Comput Appl." ], [ "term", @@ -68774,7 +73709,7 @@ ], [ "sentence", - "", + "proper", 13936866850854297069, "TEXT", "#/texts/184", @@ -68794,46 +73729,46 @@ "2011;25 (4):496-509." ], [ - "expression", - "wtoken-concatenation", + "reference", + "date", 13936866850854297069, "TEXT", "#/texts/184", 1.0, - 8104407400321262254, - 3429534335477953780, + 11473506778099773410, + 15021630246282813280, null, null, 127, - 134, + 147, 127, - 134, + 147, 27, - 30, + 38, true, - "2011;25", - "2011;25" + "2011;25 (4):496-509", + "2011;25 (4):496-509." ], [ - "reference", - "date", + "expression", + "wtoken-concatenation", 13936866850854297069, "TEXT", "#/texts/184", 1.0, - 329104147695662665, - 13454856964816440075, + 8104407400321262254, + 3429534335477953780, null, null, 127, - 132, + 134, 127, - 132, + 134, 27, - 29, + 30, true, - "2011;", - "2011;" + "2011;25", + "2011;25" ], [ "parenthesis", @@ -68856,30 +73791,51 @@ "(4)", "(4)" ], + [ + "sentence", + "improper", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 12178341415896420683, + 15900700274059095170, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "16.", + "16." + ], [ "reference", - "citation-number", + "reference-number", 8497015665124263236, "TEXT", "#/texts/185", 1.0, 15441160910541481860, - 13099555958800192769, + 13099555958800192774, null, null, 0, - 2, + 3, 0, - 2, + 3, 0, - 1, + 2, true, "16", - "16" + "16." ], [ "sentence", - "", + "proper", 8497015665124263236, "TEXT", "#/texts/185", @@ -68900,24 +73856,24 @@ ], [ "reference", - "author", + "authors", 8497015665124263236, "TEXT", "#/texts/185", 1.0, - 14652280730090715542, - 12791881049692147803, + 17859568381876102831, + 13827450673521059842, null, null, 4, - 12, + 44, 4, - 12, + 44, 2, - 4, + 15, true, - "Jeremy K", - "Jeremy K" + "Jeremy K, Peter A, Bader David A, et al", + "Jeremy K, Peter A, Bader David A, et al." ], [ "term", @@ -68940,27 +73896,6 @@ "Jeremy K", "Jeremy K" ], - [ - "reference", - "author", - 8497015665124263236, - "TEXT", - "#/texts/185", - 1.0, - 8106352035144611671, - 4513564816050590788, - null, - null, - 14, - 21, - 14, - 21, - 5, - 7, - true, - "Peter A", - "Peter A" - ], [ "term", "single-term", @@ -68982,27 +73917,6 @@ "Peter A", "Peter A" ], - [ - "reference", - "author", - 8497015665124263236, - "TEXT", - "#/texts/185", - 1.0, - 11373457542276896833, - 10633744312666392907, - null, - null, - 23, - 36, - 23, - 36, - 8, - 11, - true, - "Bader David A", - "Bader David A" - ], [ "term", "single-term", @@ -69052,19 +73966,19 @@ "TEXT", "#/texts/185", 1.0, - 16641826418709048621, - 2282440200854755549, + 11793767366674291400, + 16322176465659145653, null, null, 45, - 86, + 103, 45, - 86, + 103, 15, - 20, + 25, true, - "Mathematical foundations of the GraphBLAS", - "Mathematical foundations of the GraphBLAS" + "Mathematical foundations of the GraphBLAS. 2016 IEEE HPEC", + "Mathematical foundations of the GraphBLAS. 2016 IEEE HPEC." ], [ "term", @@ -69109,29 +74023,29 @@ "GraphBLAS" ], [ - "reference", - "container-title", + "sentence", + "improper", 8497015665124263236, "TEXT", "#/texts/185", 1.0, - 10709633855219206820, - 961925091352749103, + 389609625548777056, + 8567475520614412130, null, null, 88, - 102, + 92, 88, - 102, + 92, 21, - 24, + 22, true, - "2016 IEEE HPEC", - "2016 IEEE HPEC" + "2016", + "2016" ], [ "sentence", - "", + "proper", 8497015665124263236, "TEXT", "#/texts/185", @@ -69171,6 +74085,27 @@ "IEEE HPEC", "IEEE HPEC" ], + [ + "sentence", + "improper", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 12668400427997832797, + 10477465110317917500, + null, + null, + 104, + 114, + 104, + 114, + 25, + 31, + true, + "2016; 1-9.", + "2016; 1-9." + ], [ "reference", "date", @@ -69178,44 +74113,86 @@ "TEXT", "#/texts/185", 1.0, - 6573474049096193902, - 2260581871937703980, + 329104147695661831, + 5482404028284083905, null, null, 104, - 113, + 109, 104, - 113, + 109, 25, - 30, + 27, true, - "2016; 1-9", - "2016; 1-9" + "2016;", + "2016;" ], [ "reference", - "citation-number", + "pages", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 12178341415896427413, + 15900868514674279592, + null, + null, + 110, + 114, + 110, + 114, + 27, + 31, + true, + "1-9", + "1-9." + ], + [ + "sentence", + "improper", 15947529491299956047, "TEXT", "#/texts/186", 1.0, - 15441160910541481861, - 5749903657566610070, + 12178341415896424331, + 1785950286755592566, null, null, 0, - 2, + 3, + 0, + 3, 0, 2, + true, + "17.", + "17." + ], + [ + "reference", + "reference-number", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 15441160910541481861, + 5749903657566610071, + null, + null, 0, - 1, + 3, + 0, + 3, + 0, + 2, true, "17", - "17" + "17." ], [ "sentence", - "", + "proper", 15947529491299956047, "TEXT", "#/texts/186", @@ -69236,24 +74213,24 @@ ], [ "reference", - "author", + "authors", 15947529491299956047, "TEXT", "#/texts/186", 1.0, - 14650296444613217893, - 2015187192231796797, + 10218631809067229551, + 3045627595466121013, null, null, 4, - 12, + 46, 4, - 12, + 46, 2, - 4, + 15, true, - "Ariful A", - "Ariful A" + "Ariful A, Mathias J, Aydin B, Ng Esmond G", + "Ariful A, Mathias J, Aydin B, Ng Esmond G." ], [ "term", @@ -69276,27 +74253,6 @@ "Ariful A", "Ariful A" ], - [ - "reference", - "author", - 15947529491299956047, - "TEXT", - "#/texts/186", - 1.0, - 6611311853662317003, - 219996680584521934, - null, - null, - 14, - 23, - 14, - 23, - 5, - 7, - true, - "Mathias J", - "Mathias J" - ], [ "term", "single-term", @@ -69318,27 +74274,6 @@ "Mathias J", "Mathias J" ], - [ - "reference", - "author", - 15947529491299956047, - "TEXT", - "#/texts/186", - 1.0, - 8106396252822508385, - 5214697480984905265, - null, - null, - 25, - 32, - 25, - 32, - 8, - 10, - true, - "Aydin B", - "Aydin B" - ], [ "term", "single-term", @@ -69360,27 +74295,6 @@ "Aydin B", "Aydin B" ], - [ - "reference", - "author", - 15947529491299956047, - "TEXT", - "#/texts/186", - 1.0, - 1138450846564361539, - 13516232875802125645, - null, - null, - 34, - 46, - 34, - 46, - 11, - 15, - true, - "Ng Esmond G.", - "Ng Esmond G." - ], [ "term", "single-term", @@ -69404,7 +74318,7 @@ ], [ "sentence", - "", + "proper", 15947529491299956047, "TEXT", "#/texts/186", @@ -69431,18 +74345,18 @@ "#/texts/186", 1.0, 18143113072209505450, - 5317689214231344382, + 5317689214231344369, null, null, 47, - 104, + 105, 47, - 104, + 105, 15, - 25, + 26, true, "The reverse Cuthill-McKee algorithm in distributed-memory", - "The reverse Cuthill-McKee algorithm in distributed-memory" + "The reverse Cuthill-McKee algorithm in distributed-memory." ], [ "term", @@ -69572,28 +74486,49 @@ ], [ "reference", - "container-title", + "conference", 15947529491299956047, "TEXT", "#/texts/186", 1.0, - 10701056912570859123, - 6872071652706022831, + 5977126754161531620, + 8369992873906444297, null, null, 106, - 175, + 197, 106, - 175, + 197, 26, - 34, + 44, true, - "2017 IEEE International Parallel and Distributed Processing Symposium", - "2017 IEEE International Parallel and Distributed Processing Symposium" + "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 2017: 22-31", + "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 2017: 22-31." ], [ "sentence", - "", + "improper", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 389609625548777057, + 8314107736373646335, + null, + null, + 106, + 110, + 106, + 110, + 26, + 27, + true, + "2017", + "2017" + ], + [ + "sentence", + "proper", 15947529491299956047, "TEXT", "#/texts/186", @@ -69697,8 +74632,8 @@ "(IPDPS)" ], [ - "reference", - "container-title", + "term", + "single-term", 15947529491299956047, "TEXT", "#/texts/186", @@ -69718,71 +74653,71 @@ "IPDPS" ], [ - "term", - "single-term", + "sentence", + "improper", 15947529491299956047, "TEXT", "#/texts/186", 1.0, - 329104161866629985, - 4498077561104002021, + 15668671505312224859, + 7267236904131898531, null, null, - 177, - 182, - 177, - 182, - 35, - 36, + 185, + 197, + 185, + 197, + 38, + 44, true, - "IPDPS", - "IPDPS" + "2017: 22-31.", + "2017: 22-31." ], [ - "reference", - "title", - 15947529491299956047, + "sentence", + "improper", + 14843401725435831033, "TEXT", - "#/texts/186", + "#/texts/187", 1.0, - 7366731910384143591, - 4074534479596534226, + 12178341415896424394, + 9464187724344101613, null, null, - 185, - 196, - 185, - 196, - 38, - 43, + 0, + 3, + 0, + 3, + 0, + 2, true, - "2017: 22-31", - "2017: 22-31" + "18.", + "18." ], [ "reference", - "citation-number", + "reference-number", 14843401725435831033, "TEXT", "#/texts/187", 1.0, 15441160910541481862, - 17618650105274567067, + 17618650105274567066, null, null, 0, - 2, + 3, 0, - 2, + 3, 0, - 1, + 2, true, "18", - "18" + "18." ], [ "sentence", - "", + "proper", 14843401725435831033, "TEXT", "#/texts/187", @@ -69803,24 +74738,24 @@ ], [ "reference", - "author", + "authors", 14843401725435831033, "TEXT", "#/texts/187", 1.0, - 9277063416399937233, - 9921862040524615824, + 9985406748938595316, + 16734523975477127612, null, null, 4, - 14, + 37, 4, - 14, + 37, 2, - 4, + 11, true, - "Rukhsana S", - "Rukhsana S" + "Rukhsana S, Anila U, Chughtai IR", + "Rukhsana S, Anila U, Chughtai IR." ], [ "term", @@ -69843,27 +74778,6 @@ "Rukhsana S", "Rukhsana S" ], - [ - "reference", - "author", - 14843401725435831033, - "TEXT", - "#/texts/187", - 1.0, - 8106479273814684994, - 12770854321018137055, - null, - null, - 16, - 23, - 16, - 23, - 5, - 7, - true, - "Anila U", - "Anila U" - ], [ "term", "single-term", @@ -69885,27 +74799,6 @@ "Anila U", "Anila U" ], - [ - "reference", - "author", - 14843401725435831033, - "TEXT", - "#/texts/187", - 1.0, - 16985962715048067011, - 772749724699858811, - null, - null, - 25, - 37, - 25, - 37, - 8, - 11, - true, - "Chughtai IR.", - "Chughtai IR." - ], [ "term", "single-term", @@ -69955,19 +74848,19 @@ "TEXT", "#/texts/187", 1.0, - 12931819230736677229, - 14856363282836835505, + 5583013427504923325, + 8891716095058217669, null, null, 38, - 86, + 120, 38, - 86, + 120, 11, - 18, + 23, true, - "Review of storage techniques for sparse matrices", - "Review of storage techniques for sparse matrices" + "Review of storage techniques for sparse matrices. 2005 Pakistan Section Multitopic", + "Review of storage techniques for sparse matrices. 2005 Pakistan Section Multitopic" ], [ "term", @@ -70012,8 +74905,8 @@ "sparse matrices" ], [ - "reference", - "date", + "sentence", + "improper", 14843401725435831033, "TEXT", "#/texts/187", @@ -70034,7 +74927,7 @@ ], [ "sentence", - "", + "proper", 14843401725435831033, "TEXT", "#/texts/187", @@ -70054,8 +74947,8 @@ "Pakistan Section Multitopic Conference." ], [ - "reference", - "title", + "term", + "single-term", 14843401725435831033, "TEXT", "#/texts/187", @@ -70075,71 +74968,113 @@ "Pakistan Section Multitopic Conference" ], [ - "term", - "single-term", + "reference", + "conference", 14843401725435831033, "TEXT", "#/texts/187", 1.0, - 1320248361117940781, - 5199561905441189481, + 4373101011741787076, + 6434853878367657275, null, null, - 93, - 131, - 93, - 131, - 20, - 24, + 121, + 137, + 121, + 137, + 23, + 26, true, - "Pakistan Section Multitopic Conference", - "Pakistan Section Multitopic Conference" + "Conference. 2005", + "Conference. 2005" ], [ - "reference", - "date", + "sentence", + "improper", 14843401725435831033, "TEXT", "#/texts/187", 1.0, - 14654380575675005536, - 9801102795206480618, + 6573469177968412116, + 6998677959073478193, null, null, 133, - 141, + 142, 133, - 141, + 142, 25, - 29, + 30, + true, + "2005 1-7.", + "2005 1-7." + ], + [ + "reference", + "pages", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 12178341415896427411, + 9464229838695116070, + null, + null, + 138, + 142, + 138, + 142, + 26, + 30, true, - "2005 1-7", - "2005 1-7" + "1-7", + "1-7." + ], + [ + "sentence", + "improper", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 12178341415896423945, + 1346293265340748508, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "19.", + "19." ], [ "reference", - "citation-number", + "reference-number", 16676439669743530711, "TEXT", "#/texts/188", 1.0, 15441160910541481863, - 8099163979199984832, + 8099163979199984839, null, null, 0, - 2, + 3, 0, - 2, + 3, 0, - 1, + 2, true, "19", - "19" + "19." ], [ "sentence", - "", + "proper", 16676439669743530711, "TEXT", "#/texts/188", @@ -70160,24 +75095,24 @@ ], [ "reference", - "author", + "authors", 16676439669743530711, "TEXT", "#/texts/188", 1.0, - 14638563242508500832, - 2752940376292253295, + 4102400299870176607, + 16168638938102127468, null, null, 4, - 12, + 56, 4, - 12, + 56, 2, - 4, + 14, true, - "Welte DH", - "Welte DH" + "Welte DH, Horsfield B, Baker DR. Petroleum and Basin", + "Welte DH, Horsfield B, Baker DR. Petroleum and Basin" ], [ "term", @@ -70200,27 +75135,6 @@ "Welte DH", "Welte DH" ], - [ - "reference", - "author", - 16676439669743530711, - "TEXT", - "#/texts/188", - 1.0, - 1317380608127935415, - 8792991722627090893, - null, - null, - 14, - 25, - 14, - 25, - 5, - 7, - true, - "Horsfield B", - "Horsfield B" - ], [ "term", "single-term", @@ -70242,27 +75156,6 @@ "Horsfield B", "Horsfield B" ], - [ - "reference", - "author", - 16676439669743530711, - "TEXT", - "#/texts/188", - 1.0, - 4172892994592792372, - 2160694788416159558, - null, - null, - 27, - 46, - 27, - 46, - 8, - 12, - true, - "Baker DR. Petroleum", - "Baker DR. Petroleum" - ], [ "term", "single-term", @@ -70326,27 +75219,6 @@ "Petroleum and Basin Evolution", "Petroleum and Basin Evolution" ], - [ - "reference", - "author", - 16676439669743530711, - "TEXT", - "#/texts/188", - 1.0, - 5561358046097680519, - 15395766198352277458, - null, - null, - 51, - 67, - 51, - 67, - 13, - 16, - true, - "Basin Evolution:", - "Basin Evolution:" - ], [ "term", "single-term", @@ -70375,19 +75247,19 @@ "TEXT", "#/texts/188", 1.0, - 1197865287651023688, - 134234943361095181, + 4264009440714515689, + 4786526718705436187, null, null, - 68, - 104, - 68, - 104, - 16, - 20, + 57, + 134, + 57, + 134, + 14, + 27, true, - "Insights from Petroleum Geochemistry", - "Insights from Petroleum Geochemistry" + "Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling", + "Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling," ], [ "term", @@ -70452,27 +75324,6 @@ "Petroleum Geochemistry", "Petroleum Geochemistry" ], - [ - "reference", - "journal", - 16676439669743530711, - "TEXT", - "#/texts/188", - 1.0, - 2422127895824933260, - 7556925222758925531, - null, - null, - 106, - 133, - 106, - 133, - 21, - 26, - true, - "Geology, and Basin Modeling", - "Geology, and Basin Modeling" - ], [ "term", "single-term", @@ -70515,27 +75366,6 @@ "Basin Modeling", "Basin Modeling" ], - [ - "reference", - "location", - 16676439669743530711, - "TEXT", - "#/texts/188", - 1.0, - 11741555610443867475, - 15927342063432766432, - null, - null, - 135, - 153, - 135, - 153, - 27, - 30, - true, - "Berlin Heidelberg:", - "Berlin Heidelberg:" - ], [ "term", "single-term", @@ -70585,23 +75415,23 @@ "TEXT", "#/texts/188", 1.0, - 16381206542172924133, - 9981189962990674937, + 389609625536085743, + 8456122008713527720, null, null, - 169, - 175, - 169, - 175, - 33, - 35, + 171, + 176, + 171, + 176, + 34, + 36, true, - "; 1997", - "; 1997" + "1997", + "1997." ], [ "sentence", - "", + "proper", 2986547206451163051, "TEXT", "#/texts/189", @@ -70627,19 +75457,19 @@ "TEXT", "#/texts/189", 1.0, - 912378836411683307, - 17710224191321636054, + 18273937239822213328, + 9383076520753321936, null, null, 0, - 35, + 152, 0, - 35, + 152, 0, - 8, + 31, true, - "How to cite this article: Staar PWJ", - "How to cite this article: Staar PWJ" + "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", + "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora." ], [ "term", @@ -70683,27 +75513,6 @@ "Staar PWJ", "Staar PWJ" ], - [ - "reference", - "author", - 2986547206451163051, - "TEXT", - "#/texts/189", - 1.0, - 8106351306870445011, - 7231860053894851093, - null, - null, - 37, - 44, - 37, - 44, - 9, - 11, - true, - "Dolfi M", - "Dolfi M" - ], [ "term", "single-term", @@ -70746,48 +75555,6 @@ "Auer C Corpus", "Auer C. Corpus" ], - [ - "reference", - "author", - 2986547206451163051, - "TEXT", - "#/texts/189", - 1.0, - 8106479197488776816, - 6022123083747398357, - null, - null, - 46, - 53, - 46, - 53, - 12, - 15, - true, - "Auer C.", - "Auer C." - ], - [ - "reference", - "title", - 2986547206451163051, - "TEXT", - "#/texts/189", - 1.0, - 4375081646508065875, - 5872894694925809811, - null, - null, - 54, - 151, - 54, - 151, - 15, - 30, - true, - "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", - "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora" - ], [ "term", "single-term", @@ -70874,7 +75641,7 @@ ], [ "sentence", - "", + "proper", 2986547206451163051, "TEXT", "#/texts/189", @@ -70901,18 +75668,18 @@ "#/texts/189", 1.0, 10525943314116263182, - 11312474291607917611, + 11312474291607917610, null, null, 153, - 171, + 172, 153, - 171, + 172, 31, - 34, + 35, true, "Applied AI Letters", - "Applied AI Letters" + "Applied AI Letters." ], [ "term", @@ -70937,7 +75704,7 @@ ], [ "sentence", - "", + "proper", 2986547206451163051, "TEXT", "#/texts/189", @@ -70957,29 +75724,29 @@ "2020;1:e20." ], [ - "expression", - "wtoken-concatenation", + "reference", + "date", 2986547206451163051, "TEXT", "#/texts/189", 1.0, 12668563530344603848, - 14820206483220239473, + 14820206483220239470, null, null, 173, - 183, + 184, 173, - 183, + 184, 35, - 41, + 42, true, "2020;1:e20", - "2020;1:e20" + "2020;1:e20." ], [ - "reference", - "date", + "expression", + "wtoken-concatenation", 2986547206451163051, "TEXT", "#/texts/189", @@ -70998,6 +75765,27 @@ "2020;1:e20", "2020;1:e20" ], + [ + "sentence", + "improper", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 751450063096904044, + 2161551171101074414, + null, + null, + 185, + 216, + 185, + 216, + 42, + 58, + true, + "https://doi.org/10.1002/ail2.20", + "https://doi.org/10.1002/ail2.20" + ], [ "reference", "url", @@ -71005,19 +75793,19 @@ "TEXT", "#/texts/189", 1.0, - 3534146179424153776, - 941092659905843871, + 751450063096904044, + 2161551171101074414, null, null, 185, - 203, + 216, 185, - 203, + 216, 42, - 51, + 58, true, - "https://doi.org/10", - "https://doi.org/10" + "https://doi.org/10.1002/ail2.20", + "https://doi.org/10.1002/ail2.20" ], [ "term", @@ -71124,6 +75912,27 @@ "26895595", "26895595" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], [ "numval", "year", @@ -71168,7 +75977,7 @@ ], [ "sentence", - "", + "proper", 18391264192891079539, "TEXT", "#/texts/190", @@ -71418,6 +76227,27 @@ "2023", "2023" ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], [ "term", "single-term", diff --git a/tests/data/glm/test_01A/glm_ref/topology.json b/tests/data/glm/test_01A/glm_ref/topology.json index 08bf9ea3..c5a11c47 100644 --- a/tests/data/glm/test_01A/glm_ref/topology.json +++ b/tests/data/glm/test_01A/glm_ref/topology.json @@ -29,12 +29,12 @@ [ -1, "prev", - 9616 + 9638 ], [ 1, "next", - 9682 + 9706 ], [ 2, @@ -64,12 +64,12 @@ [ 32, "tax-dn", - 1092 + 1111 ], [ 33, "tax-up", - 1684 + 1721 ], [ 64, @@ -84,72 +84,72 @@ [ 66, "to-singular", - 312 + 314 ], [ 67, "to-plural", - 312 + 314 ], [ 96, "to-token", - 1046 + 1073 ], [ 97, "from-token", - 1046 + 1073 ], [ 98, "to-pos", - 1895 + 1882 ], [ 99, "from-pos", - 1895 + 1882 ], [ 100, "to-label", - 1451 + 1572 ], [ 101, "from-label", - 1451 + 1572 ], [ 102, "to-root", - 1107 + 1127 ], [ 103, "from-root", - 1075 + 1094 ], [ 128, "to-sent", - 2216 + 2409 ], [ 129, "from-sent", - 2216 + 2409 ], [ 130, "to-text", - 1460 + 1502 ], [ 131, "from-text", - 1460 + 1502 ], [ 132, @@ -164,12 +164,12 @@ [ 134, "to-doc", - 913 + 937 ], [ 135, "from-doc", - 913 + 937 ], [ 256, @@ -860,61 +860,61 @@ -1, "prev", 1, - 7045 + 7159 ], [ -1, "prev", 2, - 1423 + 1387 ], [ -1, "prev", 4, - 537 + 532 ], [ -1, "prev", 8, - 272 + 243 ], [ -1, "prev", 16, - 165 + 149 ], [ -1, "prev", 32, - 100 + 97 ], [ -1, "prev", 64, - 42 + 40 ], [ -1, "prev", 128, - 22 + 23 ], [ -1, "prev", 256, - 6 + 5 ], [ -1, "prev", 512, - 4 + 3 ], [ -1, @@ -992,61 +992,61 @@ 1, "next", 1, - 7082 + 7199 ], [ 1, "next", 2, - 1433 + 1395 ], [ 1, "next", 4, - 544 + 540 ], [ 1, "next", 8, - 277 + 248 ], [ 1, "next", 16, - 170 + 153 ], [ 1, "next", 32, - 102 + 100 ], [ 1, "next", 64, - 42 + 40 ], [ 1, "next", 128, - 22 + 23 ], [ 1, "next", 256, - 6 + 5 ], [ 1, "next", 512, - 4 + 3 ], [ 1, @@ -1778,25 +1778,25 @@ 32, "tax-dn", 0, - 170 + 172 ], [ 32, "tax-dn", 1, - 263 + 269 ], [ 32, "tax-dn", 2, - 550 + 559 ], [ 32, "tax-dn", 4, - 73 + 75 ], [ 32, @@ -1910,43 +1910,43 @@ 33, "tax-up", 0, - 170 + 172 ], [ 33, "tax-up", 1, - 604 + 622 ], [ 33, "tax-up", 2, - 652 + 663 ], [ 33, "tax-up", 4, - 135 + 139 ], [ 33, "tax-up", 8, - 75 + 76 ], [ 33, "tax-up", 16, - 37 + 36 ], [ 33, "tax-up", 32, - 8 + 10 ], [ 33, @@ -2312,7 +2312,7 @@ 66, "to-singular", 1, - 312 + 314 ], [ 66, @@ -2444,7 +2444,7 @@ 67, "to-plural", 1, - 312 + 314 ], [ 67, @@ -2576,31 +2576,31 @@ 96, "to-token", 1, - 785 + 807 ], [ 96, "to-token", 2, - 106 + 110 ], [ 96, "to-token", 4, - 85 + 86 ], [ 96, "to-token", 8, - 41 + 40 ], [ 96, "to-token", 16, - 22 + 23 ], [ 96, @@ -2708,31 +2708,31 @@ 97, "from-token", 1, - 785 + 807 ], [ 97, "from-token", 2, - 106 + 110 ], [ 97, "from-token", 4, - 85 + 86 ], [ 97, "from-token", 8, - 41 + 40 ], [ 97, "from-token", 16, - 22 + 23 ], [ 97, @@ -2840,55 +2840,55 @@ 98, "to-pos", 1, - 1101 + 1090 ], [ 98, "to-pos", 2, - 291 + 300 ], [ 98, "to-pos", 4, - 226 + 229 ], [ 98, "to-pos", 8, - 127 + 120 ], [ 98, "to-pos", 16, - 74 + 73 ], [ 98, "to-pos", 32, - 43 + 40 ], [ 98, "to-pos", 64, - 19 + 18 ], [ 98, "to-pos", 128, - 7 + 3 ], [ 98, "to-pos", 256, - 4 + 6 ], [ 98, @@ -2972,55 +2972,55 @@ 99, "from-pos", 1, - 1101 + 1090 ], [ 99, "from-pos", 2, - 291 + 300 ], [ 99, "from-pos", 4, - 226 + 229 ], [ 99, "from-pos", 8, - 127 + 120 ], [ 99, "from-pos", 16, - 74 + 73 ], [ 99, "from-pos", 32, - 43 + 40 ], [ 99, "from-pos", 64, - 19 + 18 ], [ 99, "from-pos", 128, - 7 + 3 ], [ 99, "from-pos", 256, - 4 + 6 ], [ 99, @@ -3104,37 +3104,37 @@ 100, "to-label", 1, - 866 + 964 ], [ 100, "to-label", 2, - 232 + 248 ], [ 100, "to-label", 4, - 164 + 167 ], [ 100, "to-label", 8, - 104 + 105 ], [ 100, "to-label", 16, - 61 + 60 ], [ 100, "to-label", 32, - 15 + 19 ], [ 100, @@ -3236,37 +3236,37 @@ 101, "from-label", 1, - 866 + 964 ], [ 101, "from-label", 2, - 232 + 248 ], [ 101, "from-label", 4, - 164 + 167 ], [ 101, "from-label", 8, - 104 + 105 ], [ 101, "from-label", 16, - 61 + 60 ], [ 101, "from-label", 32, - 15 + 19 ], [ 101, @@ -3362,19 +3362,19 @@ 102, "to-root", 0, - 173 + 175 ], [ 102, "to-root", 1, - 793 + 809 ], [ 102, "to-root", 2, - 102 + 104 ], [ 102, @@ -3494,19 +3494,19 @@ 103, "from-root", 0, - 105 + 106 ], [ 103, "from-root", 1, - 511 + 517 ], [ 103, "from-root", 2, - 334 + 346 ], [ 103, @@ -3518,13 +3518,13 @@ 103, "from-root", 8, - 27 + 26 ], [ 103, "from-root", 16, - 11 + 12 ], [ 103, @@ -3632,19 +3632,19 @@ 128, "to-sent", 1, - 2166 + 2350 ], [ 128, "to-sent", 2, - 47 + 55 ], [ 128, "to-sent", 4, - 3 + 4 ], [ 128, @@ -3764,19 +3764,19 @@ 129, "from-sent", 1, - 2166 + 2350 ], [ 129, "from-sent", 2, - 47 + 55 ], [ 129, "from-sent", 4, - 3 + 4 ], [ 129, @@ -3896,13 +3896,13 @@ 130, "to-text", 1, - 1325 + 1365 ], [ 130, "to-text", 2, - 89 + 91 ], [ 130, @@ -4028,13 +4028,13 @@ 131, "from-text", 1, - 1325 + 1365 ], [ 131, "from-text", 2, - 89 + 91 ], [ 131, @@ -4424,13 +4424,13 @@ 134, "to-doc", 1, - 691 + 710 ], [ 134, "to-doc", 2, - 95 + 100 ], [ 134, @@ -4442,13 +4442,13 @@ 134, "to-doc", 8, - 34 + 33 ], [ 134, "to-doc", 16, - 18 + 19 ], [ 134, @@ -4556,13 +4556,13 @@ 135, "from-doc", 1, - 691 + 710 ], [ 135, "from-doc", 2, - 95 + 100 ], [ 135, @@ -4574,13 +4574,13 @@ 135, "from-doc", 8, - 34 + 33 ], [ 135, "from-doc", 16, - 18 + 19 ], [ 135, @@ -5107,7 +5107,7 @@ [ 0, "token", - 1625 + 1671 ], [ 1, @@ -5127,27 +5127,27 @@ [ 9, "conn", - 185 + 187 ], [ 10, "term", - 1020 + 1045 ], [ 11, "verb", - 470 + 479 ], [ 16, "sentence", - 300 + 361 ], [ 32, "text", - 178 + 188 ], [ 48, @@ -5251,49 +5251,49 @@ 0, "token", 0, - 15 + 17 ], [ 0, "token", 1, - 882 + 925 ], [ 0, "token", 2, - 265 + 267 ], [ 0, "token", 4, - 196 + 202 ], [ 0, "token", 8, - 121 + 122 ], [ 0, "token", 16, - 77 + 74 ], [ 0, "token", 32, - 38 + 36 ], [ 0, "token", 64, - 19 + 16 ], [ 0, @@ -5425,7 +5425,7 @@ 1, "syntax", 64, - 5 + 4 ], [ 1, @@ -5437,7 +5437,7 @@ 1, "syntax", 256, - 10 + 11 ], [ 1, @@ -5779,43 +5779,43 @@ 9, "conn", 0, - 7 + 0 ], [ 9, "conn", 1, - 103 + 106 ], [ 9, "conn", 2, - 19 + 24 ], [ 9, "conn", 4, - 21 + 20 ], [ 9, "conn", 8, - 17 + 16 ], [ 9, "conn", 16, - 8 + 10 ], [ 9, "conn", 32, - 5 + 6 ], [ 9, @@ -5911,49 +5911,49 @@ 10, "term", 0, - 153 + 108 ], [ 10, "term", 1, - 660 + 716 ], [ 10, "term", 2, - 100 + 99 ], [ 10, "term", 4, - 53 + 66 ], [ 10, "term", 8, - 36 + 35 ], [ 10, "term", 16, - 15 + 18 ], [ 10, "term", 32, - 2 + 1 ], [ 10, "term", 64, - 1 + 2 ], [ 10, @@ -6043,25 +6043,25 @@ 11, "verb", 0, - 21 + 0 ], [ 11, "verb", 1, - 352 + 376 ], [ 11, "verb", 2, - 54 + 56 ], [ 11, "verb", 4, - 27 + 31 ], [ 11, @@ -6181,13 +6181,13 @@ 16, "sentence", 1, - 298 + 357 ], [ 16, "sentence", 2, - 2 + 3 ], [ 16, @@ -6205,7 +6205,7 @@ 16, "sentence", 16, - 0 + 1 ], [ 16, @@ -6313,7 +6313,7 @@ 32, "text", 1, - 67 + 77 ], [ 32, @@ -7121,31 +7121,31 @@ 0, "token", 2, - 915 + 961 ], [ 0, "token", 4, - 380 + 385 ], [ 0, "token", 8, - 178 + 180 ], [ 0, "token", 16, - 91 + 89 ], [ 0, "token", 32, - 37 + 32 ], [ 0, @@ -7283,13 +7283,13 @@ 1, "syntax", 64, - 12 + 11 ], [ 1, "syntax", 128, - 11 + 12 ], [ 1, @@ -7649,13 +7649,13 @@ 9, "conn", 2, - 106 + 107 ], [ 9, "conn", 4, - 35 + 36 ], [ 9, @@ -7769,7 +7769,7 @@ 10, "term", 0, - 107 + 108 ], [ 10, @@ -7781,25 +7781,25 @@ 10, "term", 2, - 721 + 740 ], [ 10, "term", 4, - 130 + 131 ], [ 10, "term", 8, - 44 + 47 ], [ 10, "term", 16, - 16 + 17 ], [ 10, @@ -7913,13 +7913,13 @@ 11, "verb", 2, - 375 + 381 ], [ 11, "verb", 4, - 73 + 76 ], [ 11, @@ -8045,13 +8045,13 @@ 16, "sentence", 2, - 298 + 357 ], [ 16, "sentence", 4, - 2 + 3 ], [ 16, @@ -8069,7 +8069,7 @@ 16, "sentence", 32, - 0 + 1 ], [ 16, @@ -8177,7 +8177,7 @@ 32, "text", 2, - 67 + 77 ], [ 32, @@ -8973,55 +8973,55 @@ 0, "token", 1, - 873 + 915 ], [ 0, "token", 2, - 260 + 265 ], [ 0, "token", 4, - 210 + 216 ], [ 0, "token", 8, - 120 + 121 ], [ 0, "token", 16, - 76 + 77 ], [ 0, "token", 32, - 45 + 42 ], [ 0, "token", 64, - 21 + 18 ], [ 0, "token", 128, - 8 + 4 ], [ 0, "token", 256, - 5 + 6 ], [ 0, @@ -9141,31 +9141,31 @@ 1, "syntax", 64, - 3 + 2 ], [ 1, "syntax", 128, - 1 + 2 ], [ 1, "syntax", 256, - 9 + 11 ], [ 1, "syntax", 512, - 6 + 5 ], [ 1, "syntax", 1024, - 4 + 3 ], [ 1, @@ -9501,25 +9501,25 @@ 9, "conn", 1, - 105 + 106 ], [ 9, "conn", 2, - 23 + 24 ], [ 9, "conn", 4, - 21 + 20 ], [ 9, "conn", 8, - 14 + 15 ], [ 9, @@ -9627,19 +9627,19 @@ 10, "term", 0, - 107 + 108 ], [ 10, "term", 1, - 691 + 710 ], [ 10, "term", 2, - 95 + 100 ], [ 10, @@ -9651,13 +9651,13 @@ 10, "term", 8, - 34 + 33 ], [ 10, "term", 16, - 18 + 19 ], [ 10, @@ -9765,19 +9765,19 @@ 11, "verb", 1, - 369 + 376 ], [ 11, "verb", 2, - 54 + 55 ], [ 11, "verb", 4, - 30 + 31 ], [ 11, @@ -9897,13 +9897,13 @@ 16, "sentence", 1, - 298 + 357 ], [ 16, "sentence", 2, - 2 + 3 ], [ 16, @@ -9921,7 +9921,7 @@ 16, "sentence", 16, - 0 + 1 ], [ 16, @@ -10029,7 +10029,7 @@ 32, "text", 1, - 67 + 77 ], [ 32, diff --git a/tests/data/texts/references.nlp.jsonl b/tests/data/texts/references.nlp.jsonl index 18a2bdc4..811ecf4d 100644 --- a/tests/data/texts/references.nlp.jsonl +++ b/tests/data/texts/references.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["reference", "citation-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, null, null, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3089690646178643593, 8143668872857370346, null, null, 4, 16, 4, 16, 3, 6, true, "J. Nagamatsu", "J. Nagamatsu"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 4958313984110456424, 12758216704979571657, null, null, 18, 29, 18, 29, 7, 10, true, "N. Nakagawa", "N. Nakagawa"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 11365016642846088308, 8423174012903247560, null, null, 31, 42, 31, 42, 11, 14, true, "T. Muranaka", "T. Muranaka"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3078907282781552519, 13717640772378057531, null, null, 44, 55, 44, 55, 15, 18, true, "Y. Zenitani", "Y. Zenitani"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 1988581422311921121, 4091673062415006471, null, null, 61, 72, 61, 72, 20, 23, true, "J. Akimitsu", "J. Akimitsu"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164561, null, null, 74, 120, 74, 120, 24, 31, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, null, null, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, null, null, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14523797031010145779, "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text_hash": 18067349248114064711, "type": "text"} -{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, null, null, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, null, null, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, null, null, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, null, null, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, null, null, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, null, null, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, null, null, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, null, null, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, null, null, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, null, null, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, null, null, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, null, null, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, null, null, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, null, null, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4183773491823524238, "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text_hash": 7798907214565353722, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["reference", "reference-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576118, null, null, 0, 3, 0, 3, 0, 3, true, "1", "[1]"], ["reference", "authors", 14523797031010145779, "TEXT", "#", 1.0, 13167782075772401771, 9399779311929866553, null, null, 4, 73, 4, 73, 3, 24, true, "J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu", "J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu,"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164566, null, null, 74, 121, 74, 121, 24, 32, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride,"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, null, null, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "volume", 14523797031010145779, "TEXT", "#", 1.0, 8104407864682872540, 17344131718252767312, null, null, 129, 136, 129, 136, 33, 36, true, "410, 63", "410, 63"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310666, null, null, 137, 144, 137, 144, 36, 40, true, "2001", "(2001)."]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14523797031010145779, "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text_hash": 18067349248114064711, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["reference", "reference-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961154, null, null, 0, 3, 0, 3, 0, 3, true, "9", "[9]"], ["reference", "authors", 4183773491823524238, "TEXT", "#", 1.0, 18273863669034285590, 7628635609201023314, null, null, 4, 123, 4, 122, 3, 46, true, "E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl", "E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl,"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518781, null, null, 124, 205, 123, 204, 46, 58, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si,"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912423, null, null, 206, 222, 205, 221, 58, 64, true, "Phys. Rev. Lett", "Phys. Rev. Lett."], ["reference", "volume", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610503, null, null, 223, 226, 222, 225, 64, 66, true, "92", "92,"], ["reference", "pages", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, null, null, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477263, null, null, 234, 241, 233, 240, 67, 71, true, "2004", "(2004)."]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4183773491823524238, "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text_hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/terms.nlp.jsonl b/tests/data/texts/terms.nlp.jsonl index 88bdba21..6975eda1 100644 --- a/tests/data/texts/terms.nlp.jsonl +++ b/tests/data/texts/terms.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied_models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, null, null, 0, 177, 0, 164, 0, 37, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, null, null, 7, 31, 7, 26, 1, 9, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, null, null, 16, 26, 16, 23, 4, 7, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206575305750373, 3269040892355287555, null, null, 16, 25, 16, 22, 4, 6, true, "[f\u0281\u0251\u0303s", "[f\u0281\u0251\u0303s"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, null, null, 27, 30, 24, 25, 7, 8, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, null, null, 48, 63, 43, 58, 12, 14, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, null, null, 64, 122, 59, 109, 14, 24, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, null, null, 73, 95, 68, 88, 17, 19, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, null, null, 96, 106, 89, 98, 19, 21, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, null, null, 107, 121, 99, 108, 21, 23, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, null, null, 123, 127, 110, 114, 25, 28, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, null, null, 124, 126, 111, 113, 26, 27, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, null, null, 128, 130, 115, 117, 28, 29, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, null, null, 133, 140, 120, 127, 30, 31, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, null, null, 141, 158, 128, 145, 31, 33, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, null, null, 159, 161, 146, 148, 33, 34, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, null, null, 162, 176, 149, 163, 34, 36, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, null, null, 170, 176, 157, 163, 35, 36, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, null, null, 178, 375, 165, 362, 37, 73, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, null, null, 186, 194, 173, 181, 39, 40, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, null, null, 195, 211, 182, 198, 40, 42, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, null, null, 204, 227, 191, 214, 41, 44, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, null, null, 216, 227, 203, 214, 43, 44, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, null, null, 228, 234, 215, 221, 44, 46, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, null, null, 235, 243, 222, 230, 46, 47, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 820203855428083856, 16279894764651307170, null, null, 252, 280, 239, 267, 49, 54, true, "Atlantic, Pacific and Indian", "Atlantic, Pacific and Indian"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, null, null, 252, 260, 239, 247, 49, 50, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, null, null, 262, 269, 249, 256, 51, 52, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 4553045173532721202, 17291436396596241777, null, null, 274, 287, 261, 274, 53, 55, true, "Indian oceans", "Indian oceans"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, null, null, 281, 293, 268, 280, 54, 59, true, "oceans,[XII]", "oceans,[XII]"], ["parenthesis", "square brackets", 9818235231875948258, "TEXT", "#", 1.0, 329104147687597164, 12284735790511259080, null, null, 288, 293, 275, 280, 56, 59, true, "[XII]", "[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895542235, 10796895691287030884, null, null, 289, 292, 276, 279, 57, 58, true, "XII", "XII"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 14650940714797320124, 6236592394333508229, null, null, 292, 300, 279, 287, 58, 60, true, "] giving", "] giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, null, null, 308, 314, 295, 301, 62, 64, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, null, null, 315, 361, 302, 348, 64, 69, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, null, null, 362, 368, 349, 355, 69, 71, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, null, null, 369, 374, 356, 361, 71, 72, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, null, null, 376, 637, 363, 624, 73, 124, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, null, null, 376, 410, 363, 397, 73, 77, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, null, null, 389, 395, 376, 382, 74, 75, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, null, null, 411, 415, 398, 402, 77, 78, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, null, null, 416, 438, 403, 425, 78, 81, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 78, 79, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 78, 79, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 80, 81, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 80, 81, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, null, null, 439, 445, 426, 432, 81, 83, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, null, null, 446, 451, 433, 438, 83, 84, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 85, 86, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 85, 86, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, null, null, 461, 467, 448, 454, 86, 88, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 91, 92, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 91, 92, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, null, null, 492, 498, 479, 485, 92, 94, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, null, null, 505, 521, 492, 508, 96, 99, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 96, 97, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 96, 97, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, null, null, 515, 521, 502, 508, 98, 99, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, null, null, 522, 528, 509, 515, 99, 101, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, null, null, 541, 558, 528, 545, 104, 107, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 104, 105, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 104, 105, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 106, 107, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 106, 107, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, null, null, 559, 565, 546, 552, 107, 109, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, null, null, 566, 571, 553, 558, 109, 110, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, null, null, 579, 594, 566, 581, 113, 115, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, null, null, 595, 603, 582, 590, 115, 117, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 117, 119, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 117, 119, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, null, null, 619, 625, 606, 612, 119, 121, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, null, null, 626, 636, 613, 623, 121, 123, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, null, null, 638, 961, 625, 948, 124, 182, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, null, null, 642, 659, 629, 646, 125, 127, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, null, null, 660, 667, 647, 654, 127, 128, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, null, null, 668, 676, 655, 663, 128, 130, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, null, null, 677, 682, 664, 669, 130, 131, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, null, null, 683, 689, 670, 676, 131, 133, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 133, 135, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 133, 135, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, null, null, 709, 717, 696, 704, 136, 138, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 138, 140, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 138, 140, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, null, null, 736, 742, 723, 729, 140, 142, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 142, 144, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 142, 144, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 146, 148, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 146, 148, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, null, null, 778, 798, 765, 785, 149, 151, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, null, null, 799, 806, 786, 793, 151, 152, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, null, null, 807, 820, 794, 807, 152, 154, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, null, null, 821, 823, 808, 810, 154, 155, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, null, null, 824, 864, 811, 851, 155, 162, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 155, 157, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 155, 157, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, null, null, 839, 851, 826, 838, 158, 160, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, null, null, 856, 864, 843, 851, 161, 162, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, null, null, 865, 871, 852, 858, 162, 164, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, null, null, 872, 886, 859, 873, 164, 166, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, null, null, 892, 910, 879, 897, 168, 171, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, null, null, 916, 928, 903, 915, 173, 175, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, null, null, 929, 931, 916, 918, 175, 176, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 176, 177, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 176, 177, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 179, 181, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 179, 181, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, null, null, 962, 1384, 949, 1371, 182, 276, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, null, null, 966, 991, 953, 978, 183, 186, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, null, null, 992, 1020, 979, 1007, 186, 193, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, null, null, 998, 1000, 985, 987, 188, 189, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, null, null, 1007, 1019, 994, 1006, 190, 192, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, null, null, 1021, 1025, 1008, 1012, 193, 194, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, null, null, 1028, 1036, 1015, 1023, 195, 196, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, null, null, 1037, 1041, 1024, 1028, 196, 197, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, null, null, 1042, 1044, 1029, 1031, 197, 198, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, null, null, 1045, 1052, 1032, 1039, 198, 201, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, null, null, 1053, 1056, 1040, 1043, 201, 203, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486414, 16516410147586311652, null, null, 1053, 1055, 1040, 1042, 201, 202, true, "km", "km"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235162, 2654033242220620585, null, null, 1055, 1056, 1042, 1043, 202, 203, true, "2", "2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, null, null, 1057, 1072, 1044, 1059, 203, 210, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, null, null, 1058, 1065, 1045, 1052, 204, 207, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, null, null, 1066, 1071, 1053, 1058, 207, 209, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, null, null, 1077, 1081, 1064, 1068, 211, 212, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, null, null, 1084, 1100, 1071, 1087, 213, 215, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, null, null, 1101, 1103, 1088, 1090, 215, 216, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, null, null, 1104, 1108, 1091, 1095, 216, 217, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, null, null, 1109, 1111, 1096, 1098, 217, 218, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, null, null, 1112, 1119, 1099, 1106, 218, 219, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, null, null, 1120, 1122, 1107, 1109, 219, 220, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, null, null, 1123, 1125, 1110, 1112, 220, 221, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, null, null, 1126, 1133, 1113, 1120, 221, 222, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, null, null, 1134, 1145, 1121, 1132, 222, 230, true, "2023.[5][8]", "2023.[5][8]"], ["numval", "year", 9818235231875948258, "TEXT", "#", 1.0, 389609625548777251, 4871157181485963100, null, null, 1134, 1138, 1121, 1125, 222, 223, true, "2023", "2023"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577901, 10796892691399633238, null, null, 1139, 1142, 1126, 1129, 224, 227, true, "[5]", "[5]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235157, 2654033131002543179, null, null, 1140, 1141, 1127, 1128, 225, 226, true, "5", "5"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577838, 10796892702691935623, null, null, 1142, 1145, 1129, 1132, 227, 230, true, "[8]", "[8]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235152, 2654033132467492508, null, null, 1143, 1144, 1130, 1131, 228, 229, true, "8", "8"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, null, null, 1146, 1152, 1133, 1139, 230, 231, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, null, null, 1153, 1155, 1140, 1142, 231, 232, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14782540711164886662, 14111360077134393327, null, null, 1158, 1170, 1145, 1157, 233, 235, true, "unitary semi", "unitary semi"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, null, null, 1166, 1183, 1153, 1170, 234, 237, true, "semi-presidential", "semi-presidential"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9493572096187311884, 17586523526652496832, null, null, 1171, 1192, 1158, 1179, 236, 238, true, "presidential republic", "presidential republic"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, null, null, 1193, 1197, 1180, 1184, 238, 239, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, null, null, 1202, 1209, 1189, 1196, 240, 241, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, null, null, 1210, 1212, 1197, 1199, 241, 242, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, null, null, 1213, 1218, 1200, 1205, 242, 243, true, "Paris", "Paris"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, null, null, 1224, 1233, 1211, 1220, 245, 248, true, "countrys", "country's"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263873511, null, null, 1224, 1231, 1211, 1218, 245, 246, true, "country", "country"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13491731564569135959, 5310634626438687925, null, null, 1232, 1246, 1219, 1233, 247, 250, true, "s largest city", "s largest city"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, null, null, 1251, 1286, 1238, 1273, 251, 256, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, null, null, 1269, 1286, 1256, 1273, 254, 256, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, null, null, 1288, 1311, 1275, 1298, 257, 261, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, null, null, 1312, 1319, 1299, 1306, 261, 262, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, null, null, 1320, 1383, 1307, 1370, 262, 275, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, null, null, 1320, 1329, 1307, 1316, 262, 263, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, null, null, 1331, 1335, 1318, 1322, 264, 265, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, null, null, 1337, 1345, 1324, 1332, 266, 267, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, null, null, 1347, 1352, 1334, 1339, 268, 269, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, null, null, 1354, 1362, 1341, 1349, 270, 271, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, null, null, 1364, 1374, 1351, 1361, 272, 273, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, null, null, 1379, 1383, 1366, 1370, 274, 275, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.93], ["semantic", 9818235231875948258, "TEXT", "#", "text", 0.82]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 9818235231875948258, "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text_hash": 13399504000106611798, "type": "text"} -{"applied_models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, null, null, 0, 188, 0, 188, 0, 40, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, null, null, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, null, null, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, null, null, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301128091, null, null, 24, 33, 24, 33, 5, 6, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587921185, null, null, 34, 41, 34, 41, 6, 7, true, "pairing", "pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, null, null, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, null, null, 45, 53, 45, 53, 8, 11, true, "two-band", "two-band"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206570221100871, 2911818818181444888, null, null, 49, 55, 49, 55, 10, 12, true, "band s", "band s"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, null, null, 54, 60, 54, 60, 11, 14, true, "s-wave", "s-wave"], ["term", "enum-term-mark-2", 4522339299074192207, "TEXT", "#", 1.0, 8560127426779937860, 4026994879422986240, null, null, 56, 66, 56, 66, 13, 16, true, "wave and d", "wave and d"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625633602560, 14144633872330801396, null, null, 56, 60, 56, 60, 13, 14, true, "wave", "wave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, null, null, 65, 71, 65, 71, 15, 18, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5267005535915851615, 13852357345485708038, null, null, 67, 87, 67, 87, 17, 19, true, "wave superconductors", "wave superconductors"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, null, null, 88, 92, 88, 92, 19, 20, true, "with", "with"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, null, null, 93, 96, 93, 96, 20, 23, true, "D4h", "D4h"], ["numval", "ival", 4522339299074192207, "TEXT", "#", 1.0, 17767354399704235156, 8513040951015345484, null, null, 94, 95, 94, 95, 21, 22, true, "4", "4"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 2516792725790519961, 10765065347046652233, null, null, 95, 105, 95, 105, 22, 24, true, "h symmetry", "h symmetry"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, null, null, 106, 113, 106, 113, 24, 26, true, "in both", "in both"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, null, null, 114, 127, 114, 127, 26, 29, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183561878, null, null, 114, 118, 114, 118, 26, 27, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1366921581602115232, 15058186165846257397, null, null, 119, 137, 119, 137, 28, 30, true, "reversal invariant", "reversal invariant"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, null, null, 146, 148, 146, 148, 32, 33, true, "as", "as"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, null, null, 149, 162, 149, 162, 33, 36, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183567675, null, null, 149, 153, 149, 153, 33, 34, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16155708024079339904, 14846007814114510811, null, null, 154, 171, 154, 171, 35, 37, true, "reversal symmetry", "reversal symmetry"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, null, null, 172, 180, 172, 180, 37, 38, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, null, null, 181, 187, 181, 187, 38, 39, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, null, null, 189, 384, 189, 384, 40, 75, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, null, null, 193, 201, 193, 201, 41, 42, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, null, null, 202, 204, 202, 204, 42, 43, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, null, null, 205, 214, 205, 214, 43, 44, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, null, null, 215, 244, 215, 244, 44, 47, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, null, null, 249, 264, 249, 264, 48, 50, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, null, null, 265, 271, 265, 271, 50, 52, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, null, null, 272, 286, 272, 286, 52, 53, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, null, null, 288, 293, 288, 293, 54, 55, true, "nodes", "nodes"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, null, null, 298, 309, 298, 309, 56, 60, true, "(dis)appear", "(dis)appear"], ["parenthesis", "round brackets", 4522339299074192207, "TEXT", "#", 1.0, 329104053577713079, 7302082272979819201, null, null, 298, 303, 298, 303, 56, 59, true, "(dis)", "(dis)"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 12178341415895452094, 8713100074317547395, null, null, 299, 302, 299, 302, 57, 58, true, "dis", "dis"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 16381206574684919940, 8627590102959499799, null, null, 303, 309, 303, 309, 59, 60, true, "appear", "appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, null, null, 311, 316, 311, 316, 61, 62, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, null, null, 322, 327, 322, 327, 64, 65, true, "leave", "leave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, null, null, 328, 341, 328, 341, 65, 68, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4859188827321755536, 9887725278734779219, null, null, 333, 351, 333, 351, 67, 69, true, "symmetry locations", "symmetry locations"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, null, null, 357, 374, 357, 374, 70, 72, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, null, null, 375, 383, 375, 383, 72, 74, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, null, null, 385, 594, 385, 594, 75, 114, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, null, null, 398, 404, 398, 404, 77, 79, true, "in the", "in the"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, null, null, 405, 411, 405, 411, 79, 82, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 3545604367994270661, 11829255560935036292, null, null, 407, 416, 407, 416, 81, 83, true, "wave case", "wave case"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, null, null, 421, 425, 421, 425, 85, 86, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, null, null, 426, 430, 426, 430, 86, 87, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, null, null, 440, 454, 440, 454, 89, 91, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, null, null, 455, 475, 455, 475, 91, 93, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, null, null, 481, 490, 481, 490, 94, 95, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, null, null, 491, 498, 491, 498, 95, 96, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, null, null, 499, 508, 499, 508, 96, 97, true, "increases", "increases"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, null, null, 515, 526, 515, 526, 99, 102, true, "zero-energy", "zero-energy"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1885602650026083434, 12476719833465444023, null, null, 520, 534, 520, 534, 101, 103, true, "energy Andreev", "energy Andreev"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104159325585799, 66191664906118763, null, null, 535, 540, 535, 540, 103, 104, true, "bound", "bound"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433796974, null, null, 541, 547, 541, 547, 104, 105, true, "states", "states"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, null, null, 548, 555, 548, 555, 105, 107, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, null, null, 560, 570, 560, 570, 108, 109, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, null, null, 571, 573, 571, 573, 109, 110, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, null, null, 574, 593, 574, 593, 110, 113, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.87], ["semantic", 4522339299074192207, "TEXT", "#", "text", 0.97]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4522339299074192207, "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text_hash": 7455828584320671675, "type": "text"} +{"applied_models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "instances": {"data": [["sentence", "proper", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, null, null, 0, 177, 0, 164, 0, 37, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, null, null, 7, 31, 7, 26, 1, 9, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, null, null, 16, 26, 16, 23, 4, 7, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206575305750373, 3269040892355287555, null, null, 16, 25, 16, 22, 4, 6, true, "[f\u0281\u0251\u0303s", "[f\u0281\u0251\u0303s"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, null, null, 27, 30, 24, 25, 7, 8, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, null, null, 48, 63, 43, 58, 12, 14, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, null, null, 64, 122, 59, 109, 14, 24, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, null, null, 73, 95, 68, 88, 17, 19, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, null, null, 96, 106, 89, 98, 19, 21, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, null, null, 107, 121, 99, 108, 21, 23, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, null, null, 123, 127, 110, 114, 25, 28, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, null, null, 124, 126, 111, 113, 26, 27, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, null, null, 128, 130, 115, 117, 28, 29, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, null, null, 133, 140, 120, 127, 30, 31, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, null, null, 141, 158, 128, 145, 31, 33, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, null, null, 159, 161, 146, 148, 33, 34, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, null, null, 162, 176, 149, 163, 34, 36, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, null, null, 170, 176, 157, 163, 35, 36, true, "Europe", "Europe"], ["sentence", "proper", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, null, null, 178, 375, 165, 362, 37, 73, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, null, null, 186, 194, 173, 181, 39, 40, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, null, null, 195, 211, 182, 198, 40, 42, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, null, null, 204, 227, 191, 214, 41, 44, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, null, null, 216, 227, 203, 214, 43, 44, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, null, null, 228, 234, 215, 221, 44, 46, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, null, null, 235, 243, 222, 230, 46, 47, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 820203855428083856, 16279894764651307170, null, null, 252, 280, 239, 267, 49, 54, true, "Atlantic, Pacific and Indian", "Atlantic, Pacific and Indian"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, null, null, 252, 260, 239, 247, 49, 50, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, null, null, 262, 269, 249, 256, 51, 52, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 4553045173532721202, 17291436396596241777, null, null, 274, 287, 261, 274, 53, 55, true, "Indian oceans", "Indian oceans"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, null, null, 281, 293, 268, 280, 54, 59, true, "oceans,[XII]", "oceans,[XII]"], ["parenthesis", "square brackets", 9818235231875948258, "TEXT", "#", 1.0, 329104147687597164, 12284735790511259080, null, null, 288, 293, 275, 280, 56, 59, true, "[XII]", "[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895542235, 10796895691287030884, null, null, 289, 292, 276, 279, 57, 58, true, "XII", "XII"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 14650940714797320124, 6236592394333508229, null, null, 292, 300, 279, 287, 58, 60, true, "] giving", "] giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, null, null, 308, 314, 295, 301, 62, 64, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, null, null, 315, 361, 302, 348, 64, 69, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, null, null, 362, 368, 349, 355, 69, 71, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, null, null, 369, 374, 356, 361, 71, 72, true, "world", "world"], ["sentence", "proper", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, null, null, 376, 637, 363, 624, 73, 124, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, null, null, 376, 410, 363, 397, 73, 77, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, null, null, 389, 395, 376, 382, 74, 75, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, null, null, 411, 415, 398, 402, 77, 78, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, null, null, 416, 438, 403, 425, 78, 81, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 78, 79, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 78, 79, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 80, 81, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 80, 81, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, null, null, 439, 445, 426, 432, 81, 83, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, null, null, 446, 451, 433, 438, 83, 84, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 85, 86, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 85, 86, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, null, null, 461, 467, 448, 454, 86, 88, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 91, 92, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 91, 92, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, null, null, 492, 498, 479, 485, 92, 94, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, null, null, 505, 521, 492, 508, 96, 99, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 96, 97, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 96, 97, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, null, null, 515, 521, 502, 508, 98, 99, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, null, null, 522, 528, 509, 515, 99, 101, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, null, null, 541, 558, 528, 545, 104, 107, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 104, 105, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 104, 105, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 106, 107, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 106, 107, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, null, null, 559, 565, 546, 552, 107, 109, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, null, null, 566, 571, 553, 558, 109, 110, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, null, null, 579, 594, 566, 581, 113, 115, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, null, null, 595, 603, 582, 590, 115, 117, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 117, 119, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 117, 119, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, null, null, 619, 625, 606, 612, 119, 121, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, null, null, 626, 636, 613, 623, 121, 123, true, "north west", "north west"], ["sentence", "proper", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, null, null, 638, 961, 625, 948, 124, 182, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, null, null, 642, 659, 629, 646, 125, 127, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, null, null, 660, 667, 647, 654, 127, 128, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, null, null, 668, 676, 655, 663, 128, 130, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, null, null, 677, 682, 664, 669, 130, 131, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, null, null, 683, 689, 670, 676, 131, 133, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 133, 135, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 133, 135, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, null, null, 709, 717, 696, 704, 136, 138, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 138, 140, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 138, 140, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, null, null, 736, 742, 723, 729, 140, 142, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 142, 144, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 142, 144, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 146, 148, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 146, 148, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, null, null, 778, 798, 765, 785, 149, 151, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, null, null, 799, 806, 786, 793, 151, 152, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, null, null, 807, 820, 794, 807, 152, 154, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, null, null, 821, 823, 808, 810, 154, 155, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, null, null, 824, 864, 811, 851, 155, 162, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 155, 157, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 155, 157, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, null, null, 839, 851, 826, 838, 158, 160, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, null, null, 856, 864, 843, 851, 161, 162, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, null, null, 865, 871, 852, 858, 162, 164, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, null, null, 872, 886, 859, 873, 164, 166, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, null, null, 892, 910, 879, 897, 168, 171, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, null, null, 916, 928, 903, 915, 173, 175, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, null, null, 929, 931, 916, 918, 175, 176, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 176, 177, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 176, 177, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 179, 181, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 179, 181, true, "Indian Ocean", "Indian Ocean"], ["sentence", "proper", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, null, null, 962, 1384, 949, 1371, 182, 276, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, null, null, 966, 991, 953, 978, 183, 186, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, null, null, 992, 1020, 979, 1007, 186, 193, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, null, null, 998, 1000, 985, 987, 188, 189, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, null, null, 1007, 1019, 994, 1006, 190, 192, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, null, null, 1021, 1025, 1008, 1012, 193, 194, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, null, null, 1028, 1036, 1015, 1023, 195, 196, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, null, null, 1037, 1041, 1024, 1028, 196, 197, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, null, null, 1042, 1044, 1029, 1031, 197, 198, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, null, null, 1045, 1052, 1032, 1039, 198, 201, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, null, null, 1053, 1056, 1040, 1043, 201, 203, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486414, 16516410147586311652, null, null, 1053, 1055, 1040, 1042, 201, 202, true, "km", "km"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235162, 2654033242220620585, null, null, 1055, 1056, 1042, 1043, 202, 203, true, "2", "2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, null, null, 1057, 1072, 1044, 1059, 203, 210, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, null, null, 1058, 1065, 1045, 1052, 204, 207, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, null, null, 1066, 1071, 1053, 1058, 207, 209, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, null, null, 1077, 1081, 1064, 1068, 211, 212, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, null, null, 1084, 1100, 1071, 1087, 213, 215, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, null, null, 1101, 1103, 1088, 1090, 215, 216, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, null, null, 1104, 1108, 1091, 1095, 216, 217, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, null, null, 1109, 1111, 1096, 1098, 217, 218, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, null, null, 1112, 1119, 1099, 1106, 218, 219, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, null, null, 1120, 1122, 1107, 1109, 219, 220, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, null, null, 1123, 1125, 1110, 1112, 220, 221, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, null, null, 1126, 1133, 1113, 1120, 221, 222, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, null, null, 1134, 1145, 1121, 1132, 222, 230, true, "2023.[5][8]", "2023.[5][8]"], ["numval", "year", 9818235231875948258, "TEXT", "#", 1.0, 389609625548777251, 4871157181485963100, null, null, 1134, 1138, 1121, 1125, 222, 223, true, "2023", "2023"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577901, 10796892691399633238, null, null, 1139, 1142, 1126, 1129, 224, 227, true, "[5]", "[5]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235157, 2654033131002543179, null, null, 1140, 1141, 1127, 1128, 225, 226, true, "5", "5"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577838, 10796892702691935623, null, null, 1142, 1145, 1129, 1132, 227, 230, true, "[8]", "[8]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235152, 2654033132467492508, null, null, 1143, 1144, 1130, 1131, 228, 229, true, "8", "8"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, null, null, 1146, 1152, 1133, 1139, 230, 231, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, null, null, 1153, 1155, 1140, 1142, 231, 232, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14782540711164886662, 14111360077134393327, null, null, 1158, 1170, 1145, 1157, 233, 235, true, "unitary semi", "unitary semi"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, null, null, 1166, 1183, 1153, 1170, 234, 237, true, "semi-presidential", "semi-presidential"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9493572096187311884, 17586523526652496832, null, null, 1171, 1192, 1158, 1179, 236, 238, true, "presidential republic", "presidential republic"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, null, null, 1193, 1197, 1180, 1184, 238, 239, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, null, null, 1202, 1209, 1189, 1196, 240, 241, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, null, null, 1210, 1212, 1197, 1199, 241, 242, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, null, null, 1213, 1218, 1200, 1205, 242, 243, true, "Paris", "Paris"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, null, null, 1224, 1233, 1211, 1220, 245, 248, true, "countrys", "country's"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263873511, null, null, 1224, 1231, 1211, 1218, 245, 246, true, "country", "country"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13491731564569135959, 5310634626438687925, null, null, 1232, 1246, 1219, 1233, 247, 250, true, "s largest city", "s largest city"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, null, null, 1251, 1286, 1238, 1273, 251, 256, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, null, null, 1269, 1286, 1256, 1273, 254, 256, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, null, null, 1288, 1311, 1275, 1298, 257, 261, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, null, null, 1312, 1319, 1299, 1306, 261, 262, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, null, null, 1320, 1383, 1307, 1370, 262, 275, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, null, null, 1320, 1329, 1307, 1316, 262, 263, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, null, null, 1331, 1335, 1318, 1322, 264, 265, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, null, null, 1337, 1345, 1324, 1332, 266, 267, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, null, null, 1347, 1352, 1334, 1339, 268, 269, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, null, null, 1354, 1362, 1341, 1349, 270, 271, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, null, null, 1364, 1374, 1351, 1361, 272, 273, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, null, null, 1379, 1383, 1366, 1370, 274, 275, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.93], ["semantic", 9818235231875948258, "TEXT", "#", "text", 0.82]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 9818235231875948258, "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text_hash": 13399504000106611798, "type": "text"} +{"applied_models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "instances": {"data": [["sentence", "proper", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, null, null, 0, 188, 0, 188, 0, 40, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, null, null, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, null, null, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, null, null, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301128091, null, null, 24, 33, 24, 33, 5, 6, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587921185, null, null, 34, 41, 34, 41, 6, 7, true, "pairing", "pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, null, null, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, null, null, 45, 53, 45, 53, 8, 11, true, "two-band", "two-band"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206570221100871, 2911818818181444888, null, null, 49, 55, 49, 55, 10, 12, true, "band s", "band s"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, null, null, 54, 60, 54, 60, 11, 14, true, "s-wave", "s-wave"], ["term", "enum-term-mark-2", 4522339299074192207, "TEXT", "#", 1.0, 8560127426779937860, 4026994879422986240, null, null, 56, 66, 56, 66, 13, 16, true, "wave and d", "wave and d"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625633602560, 14144633872330801396, null, null, 56, 60, 56, 60, 13, 14, true, "wave", "wave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, null, null, 65, 71, 65, 71, 15, 18, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5267005535915851615, 13852357345485708038, null, null, 67, 87, 67, 87, 17, 19, true, "wave superconductors", "wave superconductors"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, null, null, 88, 92, 88, 92, 19, 20, true, "with", "with"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, null, null, 93, 96, 93, 96, 20, 23, true, "D4h", "D4h"], ["numval", "ival", 4522339299074192207, "TEXT", "#", 1.0, 17767354399704235156, 8513040951015345484, null, null, 94, 95, 94, 95, 21, 22, true, "4", "4"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 2516792725790519961, 10765065347046652233, null, null, 95, 105, 95, 105, 22, 24, true, "h symmetry", "h symmetry"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, null, null, 106, 113, 106, 113, 24, 26, true, "in both", "in both"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, null, null, 114, 127, 114, 127, 26, 29, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183561878, null, null, 114, 118, 114, 118, 26, 27, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1366921581602115232, 15058186165846257397, null, null, 119, 137, 119, 137, 28, 30, true, "reversal invariant", "reversal invariant"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, null, null, 146, 148, 146, 148, 32, 33, true, "as", "as"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, null, null, 149, 162, 149, 162, 33, 36, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183567675, null, null, 149, 153, 149, 153, 33, 34, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16155708024079339904, 14846007814114510811, null, null, 154, 171, 154, 171, 35, 37, true, "reversal symmetry", "reversal symmetry"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, null, null, 172, 180, 172, 180, 37, 38, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, null, null, 181, 187, 181, 187, 38, 39, true, "states", "states"], ["sentence", "proper", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, null, null, 189, 384, 189, 384, 40, 75, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, null, null, 193, 201, 193, 201, 41, 42, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, null, null, 202, 204, 202, 204, 42, 43, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, null, null, 205, 214, 205, 214, 43, 44, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, null, null, 215, 244, 215, 244, 44, 47, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, null, null, 249, 264, 249, 264, 48, 50, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, null, null, 265, 271, 265, 271, 50, 52, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, null, null, 272, 286, 272, 286, 52, 53, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, null, null, 288, 293, 288, 293, 54, 55, true, "nodes", "nodes"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, null, null, 298, 309, 298, 309, 56, 60, true, "(dis)appear", "(dis)appear"], ["parenthesis", "round brackets", 4522339299074192207, "TEXT", "#", 1.0, 329104053577713079, 7302082272979819201, null, null, 298, 303, 298, 303, 56, 59, true, "(dis)", "(dis)"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 12178341415895452094, 8713100074317547395, null, null, 299, 302, 299, 302, 57, 58, true, "dis", "dis"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 16381206574684919940, 8627590102959499799, null, null, 303, 309, 303, 309, 59, 60, true, "appear", "appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, null, null, 311, 316, 311, 316, 61, 62, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, null, null, 322, 327, 322, 327, 64, 65, true, "leave", "leave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, null, null, 328, 341, 328, 341, 65, 68, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4859188827321755536, 9887725278734779219, null, null, 333, 351, 333, 351, 67, 69, true, "symmetry locations", "symmetry locations"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, null, null, 357, 374, 357, 374, 70, 72, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, null, null, 375, 383, 375, 383, 72, 74, true, "is tuned", "is tuned"], ["sentence", "proper", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, null, null, 385, 594, 385, 594, 75, 114, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, null, null, 398, 404, 398, 404, 77, 79, true, "in the", "in the"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, null, null, 405, 411, 405, 411, 79, 82, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 3545604367994270661, 11829255560935036292, null, null, 407, 416, 407, 416, 81, 83, true, "wave case", "wave case"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, null, null, 421, 425, 421, 425, 85, 86, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, null, null, 426, 430, 426, 430, 86, 87, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, null, null, 440, 454, 440, 454, 89, 91, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, null, null, 455, 475, 455, 475, 91, 93, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, null, null, 481, 490, 481, 490, 94, 95, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, null, null, 491, 498, 491, 498, 95, 96, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, null, null, 499, 508, 499, 508, 96, 97, true, "increases", "increases"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, null, null, 515, 526, 515, 526, 99, 102, true, "zero-energy", "zero-energy"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1885602650026083434, 12476719833465444023, null, null, 520, 534, 520, 534, 101, 103, true, "energy Andreev", "energy Andreev"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104159325585799, 66191664906118763, null, null, 535, 540, 535, 540, 103, 104, true, "bound", "bound"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433796974, null, null, 541, 547, 541, 547, 104, 105, true, "states", "states"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, null, null, 548, 555, 548, 555, 105, 107, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, null, null, 560, 570, 560, 570, 108, 109, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, null, null, 571, 573, 571, 573, 109, 110, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, null, null, 574, 593, 574, 593, 110, 113, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.87], ["semantic", 4522339299074192207, "TEXT", "#", "text", 0.97]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4522339299074192207, "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text_hash": 7455828584320671675, "type": "text"} diff --git a/tests/test_nlp.py b/tests/test_nlp.py index a0a18873..00accc53 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -GENERATE=True +GENERATE=False import os import json @@ -438,7 +438,7 @@ def test_05A(): target_leg = "./tests/data/docs/doc_01.leg.json" target_nlp = "./tests/data/docs/doc_01.nlp.json" - print(f"reading {source} ... ", end="") + #print(f"reading {source} ... ", end="") with open(source, "r") as fr: doc_i = json.load(fr) From f30bbe19c101fad9cb2d9ac48869fb4ef8f128bc Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Sun, 3 Dec 2023 06:39:19 +0100 Subject: [PATCH 22/22] updated the models (2) Signed-off-by: Peter Staar --- deepsearch_glm/glm_utils.py | 6 ++---- src/andromeda/glm/model_cli/create/model_creator.h | 2 +- tests/test_glm.py | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/deepsearch_glm/glm_utils.py b/deepsearch_glm/glm_utils.py index 1af1f8d9..e071ee29 100644 --- a/deepsearch_glm/glm_utils.py +++ b/deepsearch_glm/glm_utils.py @@ -43,8 +43,7 @@ def load_glm_config(idir:str): def load_glm(idir:str): config = load_glm_config(idir) - - #glm = andromeda_glm.glm_model() + glm = glm_model() glm.load(config) @@ -60,7 +59,7 @@ def create_glm_config_from_docs(odir:str, json_files:list[str], }, "save": { "root": odir, - "write-CSV": True, + "write-CSV": False, "write-JSON": False, "write-path-text": False } @@ -129,7 +128,6 @@ def create_glm_from_docs(odir:str, json_files:list[str], config = create_glm_config_from_docs(odir, json_files, nlp_models) - #glm = andromeda_glm.glm_model() glm = glm_model() glm.create(config) diff --git a/src/andromeda/glm/model_cli/create/model_creator.h b/src/andromeda/glm/model_cli/create/model_creator.h index 25c6c9c5..13f3f621 100644 --- a/src/andromeda/glm/model_cli/create/model_creator.h +++ b/src/andromeda/glm/model_cli/create/model_creator.h @@ -221,7 +221,7 @@ namespace andromeda text_node = nodes.insert(text_node, false); text_hash = text_node.get_hash(); - LOG_S(INFO) << "inserted node: " << doc_path; + //LOG_S(INFO) << "inserted node: " << doc_path; } std::vector& tokens = subj.get_word_tokens(); diff --git a/tests/test_glm.py b/tests/test_glm.py index 60c223a1..3dbbbfb6 100644 --- a/tests/test_glm.py +++ b/tests/test_glm.py @@ -31,7 +31,7 @@ def test_02A_create_glm_from_doc(): else: rdir = os.path.join(sdir, "glm_ref") odir = os.path.join(sdir, "glm_out") - + model_names = "semantic;name;verb;term;abbreviation" json_files = glob.glob(os.path.join(sdir, "docs/*.json"))